-
Notifications
You must be signed in to change notification settings - Fork 131
Expand file tree
/
Copy pathswebench_multi_tools.yaml
More file actions
158 lines (134 loc) · 6.74 KB
/
swebench_multi_tools.yaml
File metadata and controls
158 lines (134 loc) · 6.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# SWE-bench wrapper configuration for OpenHands
# SWE-bench wrapper configuration for OpenHands
swe_agents:
responses_api_agents:
swe_agents: &swe_agents_config
entrypoint: app.py
domain: coding
description: Software engineering tasks with OpenHands agent harness.
value: Improve agentic software engineering capabilities.
# Agent framework configuration
agent_framework: openhands
agent_config: responses_api_agents/swe_agents/configs/oh_config.toml
agent_max_turns: 100
agent_framework_repo: https://github.com/sdevare-nv/nv-OpenHands.git
agent_framework_commit: 6a5d7571d5e9a5ca4586dad62da97a89f8c07084 # pragma: allowlist secret
# Container configuration
container_formatter: ???
container_folder_path: null
swebench_agent_timeout: 1800
swebench_tests_timeout: 900
apptainer_memory_limit_mb: 32768
command_exec_timeout: 300
dataset_path: ???
agent_prompt_overrides:
# Codex agent
- user_prompt_template: responses_api_agents/swe_agents/prompts/codex/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/codex/system_prompt.j2
agent_cls: CodexAgent
diversify_tool_names: false
# OpenCode agent
- user_prompt_template: responses_api_agents/swe_agents/prompts/opencode/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/opencode/system_prompt.j2
agent_cls: OpenCodeAgent
diversify_tool_names: false
# Terminus2 agent
- user_prompt_template: responses_api_agents/swe_agents/prompts/terminus/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/terminus/system_prompt.j2
agent_cls: Terminus2Agent
diversify_tool_names: false
# CodeAct agent
- user_prompt_template: responses_api_agents/swe_agents/prompts/openhands/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/openhands/system_prompt.j2
agent_cls: CodeActAgent
diversify_tool_names: false
# Plan and execute agent - CodeActAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/plan_and_execute/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/plan_and_execute/system_prompt.j2
agent_cls: CodeActAgent
diversify_tool_names: true
# Explore plan execute agent - OpenCodeAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/explore_plan_execute/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/explore_plan_execute/system_prompt.j2
agent_cls: OpenCodeAgent
diversify_tool_names: true
# Minimalist agent - CodeActAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/minimalist/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/minimalist/system_prompt.j2
agent_cls: CodeActAgent
diversify_tool_names: true
# Test driven agent - CodeActAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/test_driven/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/test_driven/system_prompt.j2
agent_cls: CodeActAgent
diversify_tool_names: true
# Hypothesis driven agent - CodexAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/hypothesis_driven/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/hypothesis_driven/system_prompt.j2
agent_cls: CodexAgent
diversify_tool_names: true
# Incremental agent - CodexAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/incremental/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/incremental/system_prompt.j2
agent_cls: CodexAgent
diversify_tool_names: true
# Root cause agent - OpenCodeAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/root_cause/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/root_cause/system_prompt.j2
agent_cls: OpenCodeAgent
diversify_tool_names: true
# Divide and conquer agent - OpenCodeAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/divide_and_conquer/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/divide_and_conquer/system_prompt.j2
agent_cls: OpenCodeAgent
diversify_tool_names: true
# Breadth first agent - CodeActAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/breadth_first/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/breadth_first/system_prompt.j2
agent_cls: CodeActAgent
diversify_tool_names: true
# Surgical agent - OpenCodeAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/surgical/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/surgical/system_prompt.j2
agent_cls: OpenCodeAgent
diversify_tool_names: true
# Verify first agent - CodexAgent
- user_prompt_template: responses_api_agents/swe_agents/prompts/verify_first/user_prompt.j2
system_prompt_template: responses_api_agents/swe_agents/prompts/verify_first/system_prompt.j2
agent_cls: CodexAgent
diversify_tool_names: true
# Optional model server reference
model_server:
name: policy_model # openai_model
type: responses_api_models
datasets:
# Training dataset
- name: train
type: train
jsonl_fpath: responses_api_agents/swe_agents/data/swegym_for_sweagent_and_openhands.jsonl
gitlab_identifier:
dataset_name: swegym_for_sweagent_and_openhands
version: 0.0.2
artifact_fpath: swegym-converted.jsonl
license: Apache 2.0
# Validation dataset
- name: validation
type: validation
jsonl_fpath: responses_api_agents/swe_agents/data/swebench_verified_for_sweagent_and_openhands.jsonl
gitlab_identifier:
dataset_name: swebench_verified_for_sweagent_and_openhands
version: 0.0.1
artifact_fpath: swebench_verified_for_sweagent_and_openhands.jsonl
license: TBD
# Example dataset for quick testing
- name: example
type: example
jsonl_fpath: responses_api_agents/swe_agents/data/example.jsonl
swe_agents_val:
responses_api_agents:
swe_agents:
<<: *swe_agents_config
swe_agents_train:
responses_api_agents:
swe_agents:
<<: *swe_agents_config