Master common web automation patterns with VisionAgent
Learn essential patterns for reliable web automation using VisionAgent. This tutorial covers real-world scenarios based on common automation challenges.
Prerequisites: Complete Your First Agent tutorial before starting.
Learn different ways to launch applications and websites.
Copy
Ask AI
from askui import VisionAgentwith VisionAgent() as agent: # Open website in default browser agent.tools.webbrowser.open_new("https://www.saucedemo.com") agent.wait(2) # Verify page loaded if agent.get("Is the Swag Labs login page visible?"): print("✓ Application opened successfully")
Always add wait times after opening applications to ensure they’re fully loaded before interacting with them.
from askui import VisionAgentfrom askui import locators as locwith VisionAgent() as agent: # Click with implicit wait agent.click(loc.Text("Add to cart")) # Alternative: Click with error handling try: agent.click(loc.Text("Checkout")) print("✓ Clicked checkout button") except Exception as e: print(f"✗ Could not find checkout button: {e}")
When text appears on multiple lines, use partial matching:
Copy
Ask AI
# Instead of searching for "Web Automation\nTesting"# Search for the beginning of the textagent.click(loc.Text("Web Automation", match_type="contains"))
Merged or Overlapping Text
When overlay text merges with background text:
Copy
Ask AI
# Use AI element as fallbackagent.click( loc.Text("Start now") .or_(loc.AiElement("start-button")))# Or target beginning of textagent.click(loc.Text("Start", match_type="contains"))
Missing Whitespace
Handle text with inconsistent spacing:
Copy
Ask AI
# Use regex for flexible matchingagent.click(loc.Text(r"your\s*name", match_type="regex"))
from askui import VisionAgentfrom askui import locators as locwith VisionAgent() as agent: # Click icon using AI element agent.click(loc.AiElement("shopping-cart")) # Alternative: Use prompt with visual relations agent.click( loc.Prompt("cart icon") .right_of(loc.Text("Product Name")) ) # Or use element with visual relations agent.click( loc.Element() .right_of(loc.Text("Product Name")) .and_(loc.Prompt("cart icon")) )
Use spatial relationships to find elements precisely.
Copy
Ask AI
from askui import VisionAgentfrom askui import locators as locwith VisionAgent() as agent: # Click button below specific text agent.click( loc.Prompt("button") .below_of(loc.Text("Product Details")) # See docs: /04-reference/01-agent-frameworks/02-python/02-vision-agent-api/locators#below-of ) # Click icon to the right of text agent.click( loc.Prompt("icon") .right_of(loc.Text("Quantity")) # See docs: /04-reference/01-agent-frameworks/02-python/02-vision-agent-api/locators#right-of ) # Complex relationship agent.click( loc.Element() .above_of(loc.Text("Total")) # See docs: /04-reference/01-agent-frameworks/02-python/02-vision-agent-api/locators#above-of .left_of(loc.Text("$29.99")) # See docs: /04-reference/01-agent-frameworks/02-python/02-vision-agent-api/locators#left-of )
Visual relationships are powerful for targeting elements in dynamic layouts:
from askui import VisionAgentfrom askui import locators as locimport timewith VisionAgent() as agent: # Fixed wait agent.wait(2) # Check for element existence with retry pattern max_retries = 10 for i in range(max_retries): try: agent.locate(loc.Text("Welcome")) print("✓ Element found") break except: if i < max_retries - 1: agent.wait(1) else: print("✗ Element not found after retries") # Wait for condition using get start_time = time.time() while time.time() - start_time < 15: if agent.get("Is the shopping cart visible?"): break agent.wait(1) # Wait for element to disappear while True: try: agent.locate(loc.Text("Loading...")) agent.wait(0.5) except: # Element no longer found break
from askui import VisionAgentwith VisionAgent() as agent: # Single key press agent.keyboard('enter') agent.keyboard('escape') agent.keyboard('tab') # Key combinations with modifiers agent.keyboard('a', modifier_keys=['control']) # Select all agent.keyboard('c', modifier_keys=['control']) # Copy agent.keyboard('v', modifier_keys=['control']) # Paste # Page navigation agent.keyboard('pagedown') agent.keyboard('end') # Go to end of page # Close popup/window agent.keyboard('f4', modifier_keys=['alt'])
Since VisionAgent focuses on core functionality, here are useful helper functions for common patterns:
Copy
Ask AI
from askui import VisionAgentfrom askui import locators as locimport timeclass AutomationHelpers: """Common helper functions for VisionAgent automations""" @staticmethod def wait_until(agent, condition_func, timeout=10, check_interval=0.5): """Wait until a condition is met""" start_time = time.time() while time.time() - start_time < timeout: if condition_func(): return True agent.wait(check_interval) return False @staticmethod def element_exists(agent, locator): """Check if an element exists""" try: agent.locate(locator) return True except: return False @staticmethod def wait_for_element(agent, locator, timeout=10): """Wait for an element to appear""" def check(): return AutomationHelpers.element_exists(agent, locator) return AutomationHelpers.wait_until(agent, check, timeout) @staticmethod def wait_for_element_gone(agent, locator, timeout=10): """Wait for an element to disappear""" def check(): return not AutomationHelpers.element_exists(agent, locator) return AutomationHelpers.wait_until(agent, check, timeout)# Usage examplewith VisionAgent() as agent: helpers = AutomationHelpers() # Wait for page to load if helpers.wait_for_element(agent, loc.Text("Welcome"), timeout=15): print("✓ Page loaded") # Check if element exists if helpers.element_exists(agent, loc.Text("Login")): agent.click(loc.Text("Login"))