Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion experiments/run_osworld.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_task_ids() -> set[str]:
def main():
n_jobs = 4
use_vmware = True
relaunch = True
relaunch = False
agent_args = [
OSWORLD_CLAUDE,
# OSWORLD_OAI # performs poorly.
Expand Down
9 changes: 3 additions & 6 deletions src/agentlab/agents/tool_use_agent/tool_use_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,18 @@
from typing import Any

import bgym
import numpy as np
import pandas as pd
from bgym import Benchmark as BgymBenchmark
from browsergym.core.observation import extract_screenshot
from browsergym.utils.obs import (
flatten_axtree_to_str,
flatten_dom_to_str,
overlay_som,
prune_html,
)
from PIL import Image

from agentlab.agents import agent_utils
from agentlab.benchmarks.abstract_env import AbstractBenchmark as AgentLabBenchmark
from bgym import Benchmark as BgymBenchmark
from agentlab.agents.agent_args import AgentArgs
from agentlab.benchmarks.abstract_env import AbstractBenchmark as AgentLabBenchmark
from agentlab.benchmarks.osworld import OSWorldActionSet
from agentlab.llm.base_api import BaseModelArgs
from agentlab.llm.llm_utils import image_to_png_base64_url
Expand Down Expand Up @@ -629,7 +626,7 @@ def get_action(self, obs: Any) -> float:
)

OSWORLD_OAI = ToolUseAgentArgs(
model_args=OPENAI_MODEL_CONFIG,
model_args=GPT_4_1_MINI,
config=PromptConfig(
tag_screenshot=True,
goal=Goal(goal_as_system_msg=True),
Expand Down