Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
90 commits
Select commit Hold shift + click to select a range
a6ee4a4
Goal behavior fixes (#124)
daphne-cornelisse Nov 11, 2025
c75b549
Update drive.h
eugenevinitsky Nov 13, 2025
d130cad
Add mode to only control the self-driving car (SDC) (#130)
daphne-cornelisse Nov 13, 2025
c26b245
Merge pull request #129 from Emerge-Lab/eugenevinitsky-patch-1
eugenevinitsky Nov 14, 2025
fecbb2d
Fix incorrect obs dim in draw_agent_obs (#109)
eugenevinitsky Nov 15, 2025
0439aa5
Replace product distribution action space with joint distribution (#104)
eugenevinitsky Nov 16, 2025
97dcb3d
Replace default ent_coef and learning_rate hparams (#134)
daphne-cornelisse Nov 16, 2025
040d39d
Add new weights binary with joint action space. (#136)
daphne-cornelisse Nov 18, 2025
058adff
Add support for logging optional evals during training (#133)
daphne-cornelisse Nov 18, 2025
7317d4f
Test for ini parsing (python and C) (#116)
aje-valeo Nov 20, 2025
c697f17
Fix missing arg (#141)
daphne-cornelisse Nov 20, 2025
2d30fa3
Add WOSAC interaction + map metrics. Switch from np -> torch. (#138)
WaelDLZ Nov 21, 2025
f8021df
Multi map render support to wandb (#143)
mpragnay Nov 24, 2025
87033d0
Add mode for controlled experiments (#144)
daphne-cornelisse Nov 24, 2025
aef6501
Little optimizations to use less memory in interaction_features.py (#…
daphne-cornelisse Nov 24, 2025
8690940
Fix broken link
daphne-cornelisse Nov 24, 2025
9d6a311
Data processing script that works decent. (#150)
daphne-cornelisse Nov 25, 2025
99060ba
Pass `map_dir` to the env via `.ini` and enable evaluation on a diffe…
daphne-cornelisse Nov 25, 2025
6eaea31
Add sprites in headless rendering (#152)
daphne-cornelisse Nov 26, 2025
a6af21c
Faster file processing (#153)
nadarenator Nov 27, 2025
6ce4879
Add link to small clean eval dataset
daphne-cornelisse Nov 27, 2025
0eab9bd
Fix link typo
daphne-cornelisse Nov 27, 2025
225ef99
Gif for readme (#155)
daphne-cornelisse Nov 27, 2025
f44573e
Fix link?
daphne-cornelisse Nov 27, 2025
b11d5e1
Fix vertical spaces.
daphne-cornelisse Nov 27, 2025
9c8b017
Update README.md
daphne-cornelisse Nov 27, 2025
eed2e39
Several small improvements for release (#159)
daphne-cornelisse Nov 30, 2025
95ceedd
WIP changes (#156)
nadarenator Dec 1, 2025
9d249b9
Releas note
daphne-cornelisse Dec 4, 2025
ff4f00c
Remove magic numbers in `drivenet.h`, set `MAX_AGENTS=32` by default …
daphne-cornelisse Dec 5, 2025
54affb0
Stable: Ensure all tests are passing (#168)
daphne-cornelisse Dec 6, 2025
a03e70a
Add option to zoom in on the map or show full map (#163)
Aditya-Gupta26 Dec 6, 2025
0cf5e3e
Add documentation (#170)
julianh65 Dec 8, 2025
1ce87a2
Add GitHub Actions workflow for docs deployment (#172)
julianh65 Dec 8, 2025
d03a69d
styling fixes (#173)
julianh65 Dec 9, 2025
217cd3b
Add clang format (#132)
aje-valeo Dec 12, 2025
9a4b09a
Add Sanity Command + Maps (#175)
julianh65 Dec 12, 2025
a8bce58
Documentation edits (#176)
daphne-cornelisse Dec 13, 2025
19b5eb6
Early environment resets based on agents' respawn status. (#167)
riccardosavorgnan Dec 13, 2025
43933b2
Speed up end-to-end training: 220K -> 320K on RTX 4080 by reducing # …
daphne-cornelisse Dec 14, 2025
0eeec10
Add pt. (#179)
daphne-cornelisse Dec 14, 2025
6fa0cbb
Docs edits (#178)
daphne-cornelisse Dec 15, 2025
712ac4f
There is a typo in torch.py
WaelDLZ Dec 15, 2025
76dcce1
Merge pull request #181 from Emerge-Lab/wbd/fix_a_typo_in_torchpy
eugenevinitsky Dec 15, 2025
57be883
Use num_maps for eval (#164)
WaelDLZ Dec 16, 2025
fae2b59
Fix small bug in `drive.c` and add binary weights cpt (#184)
daphne-cornelisse Dec 16, 2025
f349b24
Carla junction filter (#187)
mpragnay Dec 17, 2025
4900039
Working Carla Maps (#189)
mpragnay Dec 18, 2025
656d55a
collision fix (#192)
l1onh3art88 Dec 18, 2025
a033669
Fix Ego Speed Calculation (#166)
julianh65 Dec 19, 2025
273d8f2
Small bug fix that makes road edge not appear in agent view for jerk …
daphne-cornelisse Dec 19, 2025
685b3fd
add womd video (#195)
julianh65 Dec 20, 2025
6f0ca30
Add stop/remove collision behavior back (#169)
Aditya-Gupta26 Dec 20, 2025
61911a3
updated docs with multinode training cmd (#174)
mpragnay Dec 20, 2025
45588ca
Carla2d towns (#201)
mpragnay Dec 21, 2025
657281c
initial commit (#204)
julianh65 Dec 22, 2025
c0c5894
Fix goal resampling in Carla maps and make metrics suitable for resam…
daphne-cornelisse Dec 23, 2025
9a58142
Minor correction in resampling code (#183)
WaelDLZ Dec 23, 2025
b6ed82f
Allow human to drive with agents through classic and jerk dynamics mo…
daphne-cornelisse Dec 24, 2025
d4dabdb
Added WOSAC results on the 10k validation dataset (#185)
WaelDLZ Dec 25, 2025
459e875
Drive with agents in browser (#215)
daphne-cornelisse Dec 26, 2025
952069d
Fix demo (#217)
daphne-cornelisse Dec 26, 2025
7ca82c2
Do not randomly switch to another agent in FPV. (#219)
daphne-cornelisse Dec 27, 2025
38139bb
switch docs to mdbooks doc format (#218)
eugenevinitsky Dec 27, 2025
b89e157
Markdown edits and fix demo. (#221)
daphne-cornelisse Dec 28, 2025
bd20606
small fixes in the docs (#220)
eugenevinitsky Dec 28, 2025
07a7a1d
Release 2.0 (#214)
daphne-cornelisse Dec 30, 2025
cb12de1
Fix space and game files.
Dec 30, 2025
1d864db
Fix sup tags.
Dec 30, 2025
978d8a1
rebasing ada with pufferDrive 2
Jan 12, 2026
ce19279
Self play working
Jan 13, 2026
eb34531
Population play and self play rebased
Jan 14, 2026
ab2c29c
All features working
Jan 15, 2026
4d6136d
fixing co player features
Jan 15, 2026
64da48d
trying to pass tests
Jan 15, 2026
70b3148
fixing tests #2
Jan 15, 2026
299f4ff
fixing tests #3
Jan 15, 2026
ace6fee
attempting to fix tests #4
Jan 16, 2026
8f0eab0
attempting to fix tests #4
Jan 16, 2026
951062b
attempting to fix tests #4
Jan 16, 2026
ba472aa
fixing batch size > 1 bug
Jan 16, 2026
3082161
add back binary
m2kulkarni Jan 31, 2026
027bda6
Merge branch 'sync-upstream' of github.com:Emerge-Lab/Adaptive_Drivin…
m2kulkarni Jan 31, 2026
f289654
changed map dir
m2kulkarni Jan 31, 2026
4254f3f
moved maps
m2kulkarni Jan 31, 2026
1087d38
fix test API and config
m2kulkarni Jan 31, 2026
30a0636
Merge branch 'main' into sync-upstream
m2kulkarni Jan 31, 2026
0e829cf
Merge remote-tracking branch 'origin/main' into sync-upstream
m2kulkarni Jan 31, 2026
c657e25
fixing tests
m2kulkarni Feb 1, 2026
654970e
fix tests
m2kulkarni Feb 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,16 @@ pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/


# Ignore data files
data/
pufferlib/resources/drive/binaries/
pufferlib/resources/drive/binaries/*
pufferlib/resources/drive/binaries/training/
pufferlib/resources/drive/binaries/validation/

# But keep map_000.bin for the training test
!pufferlib/resources/drive/binaries/map_000.bin
!pufferlib/resources/drive/binaries/training/map_000.bin
pufferlib/resources/drive/sanity/sanity_binaries/

# Compiled drive binary in root
/drive
Expand All @@ -183,6 +186,10 @@ pufferlib/resources/drive/output_agent.gif pufferlib/resources/drive/output.gif
# Local artifacts and outputs
artifacts/
# Local drive renders
pufferlib/resources/drive/output*.gif
emsdk/
docs/book/*
!docs/book/assets/
pufferlib/resources/drive/output*.mp4

# Local TODO tracking
Expand Down
1 change: 1 addition & 0 deletions external/pyxodr
Submodule pyxodr added at cd4b83
135 changes: 68 additions & 67 deletions pufferlib/config/ocean/adaptive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -25,36 +25,38 @@ num_ego_agents = 512
action_type = discrete
; Options: classic, jerk
dynamics_model = classic
; Number of consecutive scenarios per episode (adaptive-specific)
k_scenarios = 2
reward_vehicle_collision = -0.5
reward_offroad_collision = -0.2
reward_ade = 0.0
reward_offroad_collision = -0.5
dt = 0.1
reward_goal = 1.0
reward_goal_post_respawn = 0.25
reward_goal_post_respawn = 0.25 # in case of reward conditioning, we scale the goal_weight by this number for post respawn
; Meters around goal to be considered "reached"
goal_radius = 2.0
; What to do when goal is reached. Options: 0:"respawn", 1:"generate_new_goals", 2:"stop"
; Max target speed in m/s for the agent to maintain towards the goal
goal_speed = 100.0
; What to do when the goal is reached. Options: 0:"respawn", 1:"generate_new_goals", 2:"stop"
goal_behavior = 0
; Determines the target distance to the new goal in the case of goal_behavior = generate_new_goals.
; Large numbers will select a goal point further away from the agent's current position.
goal_target_distance = 30.0
; Options: 0 - Ignore, 1 - Stop, 2 - Remove
collision_behavior = 0
; Options: 0 - Ignore, 1 - Stop, 2 - Remove
offroad_behavior = 0
; Number of steps before reset
; Number of steps before
scenario_length = 91
; Resample frequency = k_scenarios * scenario_length (adaptive-specific)
resample_frequency = 182
num_maps = 1000
; Which step of the trajectory to initialize the agents at upon reset
k_scenarios = 2
termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset
map_dir = "resources/drive/binaries/training"
num_maps = 10000
; Determines which step of the trajectory to initialize the agents at upon reset
init_steps = 0
; Options: "control_vehicles", "control_agents", "control_tracks_to_predict"
; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
control_mode = "control_vehicles"
; Options: "created_all_valid", "create_only_controlled"
init_mode = "create_all_valid"
; train with co players
co_player_enabled = 1

co_player_enabled = True

[env.conditioning]
; Options: "none", "reward", "entropy", "discount", "all"
Expand All @@ -71,10 +73,9 @@ discount_weight_lb = 0.80
discount_weight_ub = 0.98

[env.co_player_policy]
enabled = True
policy_name = Drive
rnn_name = Recurrent
policy_path = "experiments/puffer_drive_ewdjljwd.pt"
policy_path = "pufferlib/resources/drive/policies/varied_discount.pt"
input_size = 64
hidden_size = 256

Expand All @@ -87,39 +88,35 @@ hidden_size = 256
type = "all"
collision_weight_lb = -1.0
collision_weight_ub = 0.0
offroad_weight_lb = -0.4
offroad_weight_ub = 0.0
offroad_weight_lb = 0.0
offroad_weight_ub = -0.2
goal_weight_lb = 0.0
goal_weight_ub = 1.0
entropy_weight_lb = 0.0
entropy_weight_ub = 0.001
discount_weight_lb = 0.80
discount_weight_ub = 0.98

discount_weight_lb = 0.98
discount_weight_ub = 0.80

[train]
seed=42
total_timesteps = 2_000_000_000
# learning_rate = 0.02
# gamma = 0.985
anneal_lr = True
; Needs to be: num_agents * num_workers * BPTT horizon
batch_size = auto
; minibatch_size = 745472
; minibatch_multiplier = 512
; max_minibatch_size = 745472
minibatch_size = 372736
minibatch_multiplier = 256
max_minibatch_size = 372736
; BPTT horizon (overridden by pufferl.py for adaptive agents to k_scenarios * scenario_length)
bptt_horizon = 32
minibatch_size = 36400
max_minibatch_size = 36400
minibatch_multiplier = 400
bptt_horizon = 91
adam_beta1 = 0.9
adam_beta2 = 0.999
adam_eps = 1e-8
clip_coef = 0.2
ent_coef = 0.001
ent_coef = 0.005
gae_lambda = 0.95
gamma = 0.98
learning_rate = 0.001
learning_rate = 0.003
max_grad_norm = 1
prio_alpha = 0.8499999999999999
prio_beta0 = 0.8499999999999999
Expand All @@ -128,36 +125,40 @@ vf_clip_coef = 0.1999999999999999
vf_coef = 2
vtrace_c_clip = 1
vtrace_rho_clip = 1
checkpoint_interval = 1000
checkpoint_interval = 100
# Rendering options
render = True
render_interval = 1000
render_interval = 100
; If True, show exactly what the agent sees in agent observation
obs_only = True
; Show grid lines
show_grid = False
show_grid = True
; Draws lines from ego agent observed ORUs and road elements to show detection range
show_lasers = False
; Display human xy logs in the background
show_human_logs = True
; Options: str to path (e.g., "resources/drive/binaries/map_001.bin"), None
show_human_logs = False
; If True, zoom in on a part of the map. Otherwise, show full map
zoom_in = True
; Options: List[str to path], str to path (e.g., "resources/drive/training/binaries/map_001.bin"), None
render_map = none

[eval]
eval_interval = 1000
; Path to dataset used for evaluation
map_dir = "resources/drive/binaries/training"
; Evaluation will run on the first num_maps maps in the map_dir directory
num_maps = 20
backend = PufferEnv
# WOSAC (Waymo Open Sim Agents Challenge) evaluation settings
; WOSAC (Waymo Open Sim Agents Challenge) evaluation settings
; If True, enables evaluation on realism metrics each time we save a checkpoint
wosac_realism_eval = True
wosac_realism_eval = False
; Number of policy rollouts per scene
wosac_num_rollouts = 32
; When to start the simulation
wosac_init_steps = 10
; Total number of WOSAC agents to evaluate
wosac_num_agents = 256
; Control the tracks to predict
wosac_control_mode = "control_tracks_to_predict"
; Initialize from the tracks to predict
; Control everything valid at init in the scene
wosac_control_mode = "control_wosac"
; Create everything in valid at init the scene
wosac_init_mode = "create_all_valid"
; Stop when reaching the goal
wosac_goal_behavior = 2
Expand All @@ -167,24 +168,22 @@ wosac_sanity_check = False
; Only return aggregate results across all scenes
wosac_aggregate_results = True
; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
human_replay_eval = True
human_replay_eval = False
; Control only the self-driving car
human_replay_control_mode = "control_sdc_only"
; This equals the number of scenarios, since we control one agent in each
human_replay_num_agents = 64

[sweep.env.reward_vehicle_collision]
distribution = uniform
min = -0.5
max = 0.0
mean = -0.05
[sweep.train.learning_rate]
distribution = log_normal
min = 0.001
mean = 0.003
max = 0.005
scale = auto

[sweep.env.reward_offroad_collision]
distribution = uniform
min = -0.5
max = 0.0
mean = -0.05
[sweep.train.ent_coef]
distribution = log_normal
min = 0.001
mean = 0.005
max = 0.03
scale = auto

[sweep.env.goal_radius]
Expand All @@ -194,16 +193,18 @@ max = 20.0
mean = 10.0
scale = auto

[sweep.env.reward_ade]
distribution = uniform
min = -0.1
max = 0.0
mean = -0.02
[sweep.train.gae_lambda]
distribution = log_normal
min = 0.95
mean = 0.98
max = 0.999
scale = auto

[sweep.env.reward_goal_post_respawn]
distribution = uniform
min = 0.0
max = 1.0
mean = 0.5
scale = auto
[controlled_exp.train.goal_speed]
values = [10, 20, 30, 3]

[controlled_exp.train.ent_coef]
values = [0.001, 0.005, 0.01]

[controlled_exp.train.seed]
values = [42, 55, 1]
Loading