Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
87ee99b
add task for role assignment
patricebechard Dec 5, 2025
94a18bc
add role and case tasks for servicenow
patricebechard Dec 5, 2025
f6d5306
minor formatting
patricebechard Dec 5, 2025
0ad112c
add json and evaluators for dynamic guidance tasks
patricebechard Dec 8, 2025
c9757c5
update dynamic guidance tasks for servicenow
patricebechard Dec 8, 2025
cdbc3d2
bug fix
patricebechard Dec 8, 2025
a4ecb8e
rename data files, bug fixes
patricebechard Dec 9, 2025
2d49fd4
extend form tasks into workspace form tasks
marquezo Dec 9, 2025
9e45699
add incident tasks with custom goal
marquezo Dec 10, 2025
1036218
update tasks for ritm, interaction
patricebechard Dec 10, 2025
27e6828
add validators and teardown for three incident related tasks
patricebechard Dec 10, 2025
46e2cd4
add validator and teardown for user group task
patricebechard Dec 10, 2025
b8a7f7b
add license task
patricebechard Dec 10, 2025
134ca00
add change request and customer account tasks
patricebechard Dec 10, 2025
3fd8767
add navigation task with custom goal
patricebechard Dec 10, 2025
9c7bb09
add configs for tasks
patricebechard Dec 12, 2025
d663bd7
add new service catalog tasks
patricebechard Dec 12, 2025
604db3d
add __TASKS__ variable at the bottom of files
patricebechard Dec 12, 2025
c660f35
fix task configs files
patricebechard Dec 12, 2025
20783a4
fix double quote in f string issue
patricebechard Dec 12, 2025
3748ac8
fix issues with new service catalog tasks
patricebechard Dec 12, 2025
daf38d3
bug fixes
patricebechard Dec 12, 2025
37bdc29
bugfix role tasks
patricebechard Dec 12, 2025
8a93460
add samples
chenemil Dec 12, 2025
41b5feb
add samples
chenemil Dec 12, 2025
b5b88b3
add samples
chenemil Dec 12, 2025
4544688
add samples
chenemil Dec 12, 2025
0c83b9c
add samples
chenemil Dec 12, 2025
d90ed66
add transfer order tasks (not solid implementation yet)
marquezo Dec 11, 2025
243ff12
add create user group tasks and refactor the validation
marquezo Dec 12, 2025
f64230c
add samples
chenemil Dec 12, 2025
5890636
add samples
chenemil Dec 12, 2025
4b977a7
add samples
chenemil Dec 12, 2025
fb7a948
add samples
chenemil Dec 12, 2025
4464742
add samples
chenemil Dec 12, 2025
13e7b24
add samples
chenemil Dec 12, 2025
5ee7dfb
add samples
chenemil Dec 12, 2025
a42bb4e
add deactivate_user_group json, fix bugs in service_catalog
patricebechard Dec 12, 2025
4f6bd0f
update service catalog example and packaging data
patricebechard Dec 13, 2025
c5d9412
add samples
chenemil Dec 12, 2025
e65d72a
add samples
chenemil Dec 12, 2025
9fb6631
add samples
chenemil Dec 12, 2025
8b17328
add samples
chenemil Dec 12, 2025
6c67d98
add samples
chenemil Dec 12, 2025
6f9d937
add samples
chenemil Dec 12, 2025
f79b857
minor bug fixes
patricebechard Dec 14, 2025
f271ef8
bug fix (double quote in fstring)
patricebechard Dec 14, 2025
712bf6a
fix logic in role tasks validation and teardown
patricebechard Dec 14, 2025
54afb5d
add more task examples to assign roles tasks
patricebechard Dec 14, 2025
f018f0d
fix validator for iphone ordering when replacement is 'yes'
patricebechard Dec 14, 2025
fe71de6
fix teardown for close_case
patricebechard Dec 15, 2025
c21d519
fix resolve incident teardown function
patricebechard Dec 15, 2025
f234d58
remove call to _get_form as it causes issues in workspace forms
marquezo Dec 15, 2025
fb0b77c
changes to validators and data files for various tasks
patricebechard Dec 16, 2025
1622dde
remove base tasks from workarena from dynamic guidance tasks
patricebechard Dec 16, 2025
3d595b8
minor bugfix for validator in order iphone
patricebechard Dec 17, 2025
28bcdaa
add additional assignee
Feb 6, 2026
e8129d6
order iphone
Feb 6, 2026
e7e20f0
change ritm
Feb 9, 2026
8c1606c
resolve incident
Feb 9, 2026
180927a
change chg approver
Feb 9, 2026
5443a73
order misc hardware
Feb 9, 2026
1bb1c03
order misc hardware with business justification
Feb 9, 2026
b25261d
order reset password
Feb 9, 2026
7e3f1b9
Merge branch 'main' into dynamic-guidance-tasks
patricebechard Feb 9, 2026
b9e7da4
make startup more robust with retries for login and for start page
patricebechard Feb 4, 2026
7c24c1f
formatting
patricebechard Feb 4, 2026
3dc154b
fix navigation validation
patricebechard Feb 9, 2026
9829159
bug fixes, add args and kwargs handling, change request calls to tabl…
patricebechard Feb 13, 2026
fcc67f7
early exit order tasks when agent fails to correctly create order
patricebechard Feb 16, 2026
5b31a69
make workspace tasks more robust
patricebechard Feb 16, 2026
0fdfbd5
fix task configs for order misc hardware
patricebechard Feb 16, 2026
78a792a
fix business justification and external address validation for order …
patricebechard Feb 16, 2026
7291c0c
fix corrupted samples
patricebechard Feb 16, 2026
341a40e
fix sample for get_case_status
patricebechard Feb 16, 2026
d1f437d
fix bugs in data for change_ritm_status
patricebechard Feb 16, 2026
d567c76
remove broken samples for update_ritm_quantity
patricebechard Feb 16, 2026
def6712
change deactivate user group validator
patricebechard Feb 17, 2026
4b292c3
revert changes to add_additional_assignee_to_incident data
patricebechard Feb 17, 2026
0468a0e
add update incident task
patricebechard Feb 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion src/browsergym/workarena/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,22 @@
from .tasks.compositional.update_task import __TASKS__ as UPDATE_TASKS
from .tasks.dashboard import __TASKS__ as DASHBOARD_TASKS
from .tasks.form import __TASKS__ as FORM_TASKS
from .tasks.form_workspace import __TASKS__ as FORM_WORKSPACE_TASKS
from .tasks.knowledge import __TASKS__ as KB_TASKS
from .tasks.list import __TASKS__ as LIST_TASKS
from .tasks.navigation import __TASKS__ as NAVIGATION_TASKS
from .tasks.navigation import __DYNAMIC_GUIDANCE_TASKS__ as NAVIGATION_DYNAMIC_GUIDANCE_TASKS
from .tasks.service_catalog import __TASKS__ as SERVICE_CATALOG_TASKS
from .tasks.service_catalog import __DYNAMIC_GUIDANCE_TASKS__ as SERVICE_CATALOG_DYNAMIC_GUIDANCE_TASKS
from .tasks.case import __TASKS__ as CASE_TASKS
from .tasks.role import __TASKS__ as ROLE_TASKS
from .tasks.interaction import __TASKS__ as INTERACTION_TASKS
from .tasks.change_request import __TASKS__ as CHANGE_REQUEST_TASKS
from .tasks.customer_account import __TASKS__ as CUSTOMER_ACCOUNT_TASKS
from .tasks.incident import __TASKS__ as INCIDENT_TASKS
from .tasks.license import __TASKS__ as LICENSE_TASKS
from .tasks.ritm import __TASKS__ as RITM_TASKS
from .tasks.user_group import __TASKS__ as USER_GROUP_TASKS
from .tasks.compositional.base import CompositionalTask

ALL_WORKARENA_TASKS = [
Expand All @@ -55,6 +67,21 @@
and not issubclass(task, CompositionalBuildingBlockTask)
]

ALL_WORKARENA_DYNAMIC_GUIDANCE_TASKS = [
*SERVICE_CATALOG_DYNAMIC_GUIDANCE_TASKS,
*NAVIGATION_DYNAMIC_GUIDANCE_TASKS,
*CASE_TASKS,
*ROLE_TASKS,
*INTERACTION_TASKS,
*CHANGE_REQUEST_TASKS,
*CUSTOMER_ACCOUNT_TASKS,
*INCIDENT_TASKS,
*LICENSE_TASKS,
*RITM_TASKS,
*FORM_WORKSPACE_TASKS,
*USER_GROUP_TASKS,
]


# register the WorkArena benchmark
for task in ALL_WORKARENA_TASKS:
Expand All @@ -63,6 +90,13 @@
task,
)

# register dynamic guidance tasks
for task in ALL_WORKARENA_DYNAMIC_GUIDANCE_TASKS:
register_task(
task.get_task_id(),
task,
)

workarena_tasks_all = [task_class.get_task_id() for task_class in ALL_WORKARENA_TASKS]
workarena_tasks_atomic = [task_class.get_task_id() for task_class in ATOMIC_TASKS]

Expand Down Expand Up @@ -100,6 +134,17 @@
"workarena.servicenow.multi-chart-value-retrieval": "dashboard",
"workarena.servicenow.single-chart-value-retrieval": "dashboard",
"workarena.servicenow.single-chart-min-max-retrieval": "dashboard",
# dynamic guidance tasks
"workarena.servicenow.order-iphone": "service catalog",
"workarena.servicenow.order-mobile-phone": "service catalog",
"workarena.servicenow.order-software": "service catalog",
"workarena.servicenow.order-software-access": "service catalog",
"workarena.servicenow.order-reset-password": "service catalog",
"workarena.servicenow.order-packaging-and-shipping": "service catalog",
"workarena.servicenow.order-paper-supplies": "service catalog",
"workarena.servicenow.order-misc-hardware": "service catalog",
"workarena.servicenow.order-misc-hardware-with-business-justification": "service catalog",
"workarena.servicenow.order-reset-password": "service catalog",
}


Expand Down Expand Up @@ -129,7 +174,7 @@ def get_all_tasks_agents(filter="l2", meta_seed=42, n_seed_l1=10, is_agent_curri
raise Exception("Unsupported filter used.")
if len(filter) == 1:
level = filter[0]
if level not in ["l1", "l2", "l3"]:
if level not in ["l1", "l2", "l3", "dg"]:
raise Exception("Unsupported category of tasks.")
else:
rng = np.random.RandomState(meta_seed)
Expand All @@ -139,6 +184,12 @@ def get_all_tasks_agents(filter="l2", meta_seed=42, n_seed_l1=10, is_agent_curri
all_task_tuples.append((task, int(seed)))

return all_task_tuples
elif level == "dg":
for task in ALL_WORKARENA_DYNAMIC_GUIDANCE_TASKS:
for seed in rng.randint(0, 1000, n_seed_l1):
all_task_tuples.append((task, int(seed)))

return all_task_tuples

if len(filter) == 2:
level, filter_category = filter[0], filter[1]
Expand Down
2 changes: 2 additions & 0 deletions src/browsergym/workarena/api/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,12 @@ def get_request_items(instance: SNowInstance, sys_id: str) -> list[dict]:
"sysparm_fields": ",".join(
[
"sys_id",
"cat_item",
"short_description",
"quantity",
]
),
"sysparm_display_value": "true",
},
)["result"]

Expand Down
7 changes: 5 additions & 2 deletions src/browsergym/workarena/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import requests

import json
from ..instance import SNowInstance

import os
from requests.exceptions import HTTPError
from time import sleep

# ServiceNow API configuration
SNOW_API_HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}

if os.environ.get("EXTRA_HTTP_HEADERS"):
SNOW_API_HEADERS.update(json.loads(os.environ.get("EXTRA_HTTP_HEADERS")))


def table_api_call(
instance: SNowInstance,
Expand Down
108 changes: 107 additions & 1 deletion src/browsergym/workarena/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# ServiceNow configuration
SNOW_DATA_LOOKBACK_MINUTES = 5
SNOW_BROWSER_TIMEOUT = 30000 # Milliseconds
SNOW_BROWSER_TIMEOUT = 60000 # Milliseconds
SNOW_JS_UTILS_FILEPATH = str(resources.files(utils).joinpath("js_utils.js"))
SNOW_SUPPORTED_RELEASES = ["washingtondc"]

Expand All @@ -19,6 +19,7 @@

# Path to the Menu navigation task configuration
ALL_MENU_PATH = str(resources.files(data_files).joinpath("task_configs/all_menu.json"))
ALL_MENU_CUSTOM_GOAL_PATH = str(resources.files(data_files).joinpath("task_configs/go_to_page.json"))

# Path to the dashboard/report retrieval task configurations
DASHBOARD_RETRIEVAL_MINMAX_CONFIG_PATH = str(
Expand Down Expand Up @@ -233,3 +234,108 @@
# Report date filter patch flag
REPORT_PATCH_FLAG = "WORKARENA_DATE_FILTER_PATCH"
REPORT_FILTER_PROPERTY = "workarena.report.filter.config"


# Case tasks
GET_CASE_STATUS_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/get_case_status.json")
)
GET_CASE_RESOLUTION_NOTES_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/get_case_resnotes.json")
)
CLOSE_CASE_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/close_case.json")
)
FIND_ASSET_UNDER_ACCOUNT_CREATE_CASE_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/find_asset_under_account_create_case.json")
)

# Role tasks
ASSIGN_ROLE_TO_USER_ADMIN_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/assign_role_to_user_admin.json")
)
ASSIGN_ROLES_TO_USER_EXPLICIT_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/assign_roles_to_user_explicit.json")
)
ASSIGN_ROLES_TO_USER_IMPLICIT_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/assign_roles_to_user_implicit.json")
)

## License tasks
GET_NUMBER_LICENSES_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/get_number_licenses.json")
)

## Change Request tasks
CHANGE_CHANGE_REQUEST_APPROVER_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/change_chg_approver.json")
)

## Incident tasks
ADD_ADDITIONAL_ASSIGNEE_TO_INCIDENT_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/add_additional_assignee_to_incident.json")
)
RESOLVE_INCIDENT_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/resolve_incident.json")
)
UPDATE_INCIDENT_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/update_incident.json")
)

## Request Item tasks
CHANGE_RITM_STATUS_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/change_ritm_status.json")
)
UPDATE_RITM_QUANTITY_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/update_ritm_quantity.json")
)

## Interaction tasks
CREATE_INTERACTION_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/create_interaction.json")
)

## Customer account tasks
FIND_CUSTOMER_ACCOUNT_MANAGER_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/find_customer_account_manager.json")
)

## User group tasks
DEACTIVATE_USER_GROUP_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/deactivate_user_group.json")
)
CREATE_USER_GROUP_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/create_user_group.json")
)
CREATE_USER_GROUP_ADD_USERS_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/create_user_group_add_users.json")
)

# service catalog tasks (dynamic guidance)
ORDER_IPHONE_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_iphone.json")
)
ORDER_MOBILE_PHONE_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_mobile_phone.json")
)
ORDER_MISC_HARDWARE_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_misc_hardware.json")
)
ORDER_MISC_HARDWARE_WITH_BUSINESS_JUSTIFICATION_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_misc_hardware_with_business_justification.json")
)
ORDER_PACKAGING_AND_SHIPPING_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_packaging_and_shipping.json")
)
ORDER_RESET_PASSWORD_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_reset_password.json")
)
ORDER_PAPER_SUPPLIES_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_paper_and_supplies.json")
)
ORDER_SOFTWARE_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_software.json")
)
ORDER_SOFTWARE_ACCESS_TASK_CONFIG_PATH = str(
resources.files(data_files).joinpath("task_configs/order_software_access.json")
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[
{
"additional_assignee_list": "Elmo Gabouer",
"incident_number": "INC0000002",
"goal": "Add Elmo Gabouer as an additional assignee on INC0000002"
},
{
"additional_assignee_list": "Pat Hoshaw",
"incident_number": "INC0000016",
"goal": "Add Pat Hoshaw as an additional assignee on INC0000016"
},
{
"additional_assignee_list": "Norman Betance",
"incident_number": "INC0000020",
"goal": "Add Norman Betance as an additional assignee on INC0000020"
},
{
"additional_assignee_list": "Chris Walls",
"incident_number": "INC0000046",
"goal": "Add Chris Walls as an additional assignee on INC0000046"
},
{
"additional_assignee_list": "Olivia Jenkins",
"incident_number": "INC0000047",
"goal": "Add Olivia Jenkins as an additional assignee on INC0000047"
}
]
Loading
Loading