Tool Use Agent
A cross-platform tool orchestration demo showing how waxell-observe traces Computer Use (Anthropic), A2A agent-to-agent communication (Google), and Composio third-party tool integrations. The parent orchestrator coordinates three specialized child agents, each exercising different @tool patterns and framework-specific mock objects that match the exact method signatures the real instrumentors wrap.
This example requires OPENAI_API_KEY, WAXELL_API_KEY, and WAXELL_API_URL. Use --dry-run to run without any API keys.
Architecture
Key Code
Tool decorators -- typed tool calls across frameworks
Each @waxell.tool() call specifies a tool_type that categorizes the span in the trace. The name parameter overrides the default (function name) for cleaner display.
# Computer Use tools -- call mock Anthropic Messages.create with computer_use tools
@waxell.tool(name="take_screenshot", tool_type="computer_use")
def take_screenshot(anthropic_client: MockAnthropicClient) -> dict:
computer_use_tools = [
{"type": "computer_20241022", "name": "computer",
"display_width_px": 1920, "display_height_px": 1080},
]
resp = anthropic_client.messages.create(
model="claude-sonnet-4-5-20250929",
messages=[{"role": "user", "content": "Take a screenshot"}],
tools=computer_use_tools, max_tokens=1024)
return {"action": "screenshot", "content_blocks": len(resp.content)}
# A2A tools -- call mock Google A2A protocol methods
@waxell.tool(name="send_a2a_message", tool_type="a2a")
def send_a2a_message(a2a_client: MockA2AClient, content: str) -> dict:
message = MockA2AMessage(content=content, sender=a2a_client.agent_name)
result = a2a_client.send_message(message=message)
return {"sender": a2a_client.agent_name,
"receiver": a2a_client.target_agent, "status": result["status"]}
@waxell.tool(name="delegate_a2a_task", tool_type="a2a")
def delegate_a2a_task(a2a_client: MockA2AClient, task_id: str) -> dict:
task = MockA2ATask(task_id=task_id, status="submitted")
task_result = a2a_client.send_task(task=task)
poll_result = a2a_client.get_task(task_id=task_result.task_id)
return {"task_id": task_result.task_id,
"poll_status": poll_result.status,
"artifact_count": len(poll_result.artifacts)}
# Composio tools -- call mock ComposioToolSet methods
@waxell.tool(name="discover_tools", tool_type="composio")
def discover_tools(composio: MockComposioToolSet, tool_actions: list,
app_names: list) -> dict:
available_tools = composio.get_tools(actions=tool_actions)
all_actions = []
for app in app_names:
all_actions.extend(composio.get_actions(apps=[app]))
return {"tools_found": len(available_tools),
"actions_discovered": len(all_actions)}
@waxell.tool(name="execute_composio_action", tool_type="composio")
def execute_composio_action(composio: MockComposioToolSet,
action: str, params: dict) -> dict:
result = composio.execute_action(action=action, params=params)
return {"action": action, "status": result["status"]}
Decision and reasoning -- framework selection and coverage evaluation
The orchestrator decides which frameworks to invoke based on query analysis, then evaluates coverage across all three.
@waxell.decision(name="choose_tool_framework",
options=["computer_use", "a2a", "composio", "all"])
async def choose_tool_framework(query: str, detected_frameworks: list) -> dict:
if len(detected_frameworks) >= 3 or "workflow" in query.lower():
return {"chosen": "all",
"reasoning": "Query spans screen interaction, agent delegation, and tool execution",
"confidence": 0.92}
return {"chosen": detected_frameworks[0] if len(detected_frameworks) == 1 else "all",
"reasoning": f"Query targets {len(detected_frameworks)} framework(s)"}
@waxell.reasoning_dec(step="evaluate_workflow_coverage")
def evaluate_workflow_coverage(computer_use_results, a2a_results,
composio_results) -> dict:
total_actions = sum(r.get("total_actions", 0)
for r in [computer_use_results, a2a_results, composio_results])
return {
"thought": f"Cross-platform workflow executed {total_actions} actions across 3 frameworks.",
"evidence": [
f"Computer Use: screenshot + click/type ({computer_use_results.get('total_actions', 0)} actions)",
f"A2A: message + task delegation ({a2a_results.get('total_actions', 0)} actions)",
f"Composio: discovery + execution ({composio_results.get('total_actions', 0)} actions)",
],
"conclusion": f"Full coverage achieved with {total_actions} actions across all 3 frameworks.",
}
Orchestrator -- coordinating three child agents
The parent agent runs preprocessing, framework selection, all three child agents, evaluation, and a synthesis LLM call -- producing 7 total tool calls across 3 frameworks.
@waxell.observe(agent_name="tool-use-orchestrator",
workflow_name="tool-use-communication-pipeline")
async def run_tool_use_pipeline(query: str, dry_run: bool = False, waxell_ctx=None):
waxell.tag("demo", "tool_use")
waxell.tag("frameworks", "computer_use,a2a,composio")
waxell.metadata("child_agents", ["computer-use-agent", "a2a-agent", "composio-agent"])
preprocessed = await preprocess_query(query) # @step
framework_choice = await choose_tool_framework( # @decision
query=query, detected_frameworks=preprocessed["detected_frameworks"])
cu_result = await run_computer_use_agent(query=query) # child @observe
a2a_result = await run_a2a_agent(query=query) # child @observe
composio_result = await run_composio_agent(query=query) # child @observe
coverage = evaluate_workflow_coverage( # @reasoning
computer_use_results=cu_result,
a2a_results=a2a_result, composio_results=composio_result)
response = await client.chat.completions.create(...) # auto-instrumented
total_tool_calls = cu_result["total_actions"] + a2a_result["total_actions"] + composio_result["total_actions"]
waxell.score("workflow_coverage", 1.0, comment="All 3 frameworks exercised")
waxell.score("total_tool_calls", float(total_tool_calls),
data_type="numeric", comment="Total tool calls across all child agents")
What this demonstrates
@waxell.observe-- parent orchestrator with three child agents (computer-use, a2a, composio), auto-linked lineage@waxell.tool(tool_type="computer_use")-- records Anthropic computer use operations (screenshot, click, type)@waxell.tool(tool_type="a2a")-- records Google A2A inter-agent protocol calls (send_message, send_task, get_task)@waxell.tool(tool_type="composio")-- records Composio third-party tool operations (discover, execute)@waxell.tool(name="...")-- custom span names for clearer trace display@waxell.step_dec-- records preprocessing with framework detection logic@waxell.decision-- records framework selection with confidence score@waxell.reasoning_dec-- evaluates cross-framework workflow coveragewaxell.score()-- attaches both float (workflow_coverage) and numeric (total_tool_calls) scoreswaxell.tag()/waxell.metadata()-- enriches spans with framework tags and child agent lists- Auto-instrumented LLM call -- synthesis
gpt-4o-minicall captured automatically - Mock framework objects -- mock classes match exact
wrapttargets for Computer Use, A2A, and Composio instrumentors
Run it
# Dry-run (no API keys needed)
cd dev/waxell-dev
python -m app.demos.tool_use_agent --dry-run
# Live (requires OpenAI API key)
python -m app.demos.tool_use_agent