Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
f190996
initial race fixes
FinlaySanders Jun 22, 2026
3b1c32f
render changes
FinlaySanders Jun 22, 2026
567139b
remove eval vis hack
FinlaySanders Jun 23, 2026
327f05b
race improvements
FinlaySanders Jun 23, 2026
cbcca4e
better racing perf
FinlaySanders Jun 23, 2026
600150a
initial multi-task setup
FinlaySanders Jun 23, 2026
b155fc4
better stats layout
FinlaySanders Jun 23, 2026
9b8396d
better racing perf metric
FinlaySanders Jun 23, 2026
19afaee
stats ordering
FinlaySanders Jun 23, 2026
1e95a7f
multi task cleanup
FinlaySanders Jun 23, 2026
d8c8377
separate omega hypers improves perf
FinlaySanders Jun 23, 2026
d74d75f
experimental onehot task obs
FinlaySanders Jun 23, 2026
4783b97
update sweep config + metric
FinlaySanders Jun 23, 2026
ab71117
log layout
FinlaySanders Jun 23, 2026
1129279
rm
FinlaySanders Jun 23, 2026
d585338
fix sweep config
FinlaySanders Jun 23, 2026
eff81ce
more drones in the demo
FinlaySanders Jun 23, 2026
f8fb324
intitial sphere task
FinlaySanders Jun 23, 2026
399441c
small fixes
FinlaySanders Jun 24, 2026
786a06e
initial v-table removal
FinlaySanders Jun 24, 2026
26cefd6
much cleaner dispatch
FinlaySanders Jun 24, 2026
11efa68
hover cleanup
FinlaySanders Jun 24, 2026
d005f0a
add cube task
FinlaySanders Jun 24, 2026
9de124f
config cleanup
FinlaySanders Jun 24, 2026
15e6bc9
flag task
FinlaySanders Jun 24, 2026
7e65a89
binding cleanup
FinlaySanders Jun 24, 2026
9dee0f9
cleaner race tracks
FinlaySanders Jun 24, 2026
b70edae
remove unused rewards
FinlaySanders Jun 24, 2026
5a4f034
small cleanup
FinlaySanders Jun 24, 2026
f2df2c5
random starting race gate
FinlaySanders Jun 24, 2026
01950a3
closed loop race tracks
FinlaySanders Jun 24, 2026
9ba7a5b
per task horizon
FinlaySanders Jun 24, 2026
96a2628
remove duplicate demo code
FinlaySanders Jun 24, 2026
e759cba
inline fn
FinlaySanders Jun 24, 2026
29844d4
weights bin
FinlaySanders Jun 24, 2026
3fd650f
3d model
FinlaySanders Jun 24, 2026
989caaf
c demo render speed fix
FinlaySanders Jun 24, 2026
75ffe56
race default in demo
FinlaySanders Jun 24, 2026
a167ff2
tricky logging fix
FinlaySanders Jun 25, 2026
eb99c3f
simple adr
FinlaySanders Jun 25, 2026
2c594da
dr floor
FinlaySanders Jun 26, 2026
f27413d
good hover config
FinlaySanders Jun 26, 2026
5739b09
action pen
FinlaySanders Jun 26, 2026
2cd89d8
vel pen
FinlaySanders Jun 26, 2026
e8d74dc
good sim2real config
FinlaySanders Jun 26, 2026
8ae04a3
sim2real config
FinlaySanders Jun 27, 2026
bc9cbe8
experimental hover rew changes
FinlaySanders Jun 27, 2026
e2c5796
minimal sweepable config
FinlaySanders Jun 27, 2026
6b7a0a4
conf
FinlaySanders Jun 27, 2026
35d54eb
conf
FinlaySanders Jun 27, 2026
fbe5d32
conf
FinlaySanders Jun 27, 2026
635d66b
remove adr
FinlaySanders Jun 27, 2026
f7baddf
cleanup
FinlaySanders Jun 27, 2026
322e333
sweep cleanup
FinlaySanders Jun 27, 2026
853985b
enable race
FinlaySanders Jun 27, 2026
d9c0f8a
split alpha dist
FinlaySanders Jun 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 67 additions & 61 deletions config/drone.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,109 +7,115 @@ num_buffers = 8
num_threads = 1

[env]
task = 0
num_drones = 64
max_rings = 10

# shared
alpha_dist = 1.4743813950542852
# multi-task step fractions
hover_frac = 1.0
race_frac = 1.0
sphere_frac = 0.0
cube_frac = 0.0
flag_frac = 0.0

# domain randomisation
dr = 0.05

# shared rewards
alpha_vel = 0.0
alpha_omega = 0.0
alpha_action = 0.0

# hover
alpha_hover = 0.04172693102535828
alpha_shaping = 2.4227498935775116
alpha_omega = 0.003412692849239442
hover_target_dist = 5
hover_dist = 0.022635883121253756
hover_vel = 0.011167386817995822
hover_omega = 0.04019810650173936
sphere_radius = 4.0
hover_target_dist = 2
hover_horizon = 1024
alpha_hover = 0.058713089382793605
hover_alpha_dist = 0.5936050283315333

# race
max_rings = 10
race_horizon = 2048
ring_reward = 1.0
collision_penalty = 0.0
time_penalty = 0.0
race_alpha_dist = 0.5936050283315333

[policy]
expansion_factor = 1
hidden_size = 64
num_layers = 2
hidden_size = 32
num_layers = 1

[train]
anneal_lr = 1
beta1 = 0.9605091218915885
beta2 = 0.9975342514897555
clip_coef = 0.2792041834527874
ent_coef = 0.009614464430623536
eps = 3.890998066747521e-12
gae_lambda = 0.8397140951280606
gamma = 0.9827341882497986
beta1 = 0.9205127440978271
beta2 = 0.9999599984257372
clip_coef = 0.24538079381694416
ent_coef = 5.3094837057682964e-05
eps = 4.831444291739578e-13
gae_lambda = 0.9861995160361671
gamma = 0.9891068682793783
gpus = 1
horizon = 64
learning_rate = 0.0036073110182039277
horizon = 128
learning_rate = 0.0037281345425369313
max_grad_norm = 0.1
min_lr_ratio = 0
minibatch_size = 4096
prio_alpha = 0.5664372304116252
prio_beta0 = 1
replay_ratio = 2.321476115575771
prio_alpha = 0.35078354102088527
prio_beta0 = 0.7847372909663064
replay_ratio = 3.0963958152520537
seed = 42
total_timesteps = 4.6927025e+07
vf_clip_coef = 5
vf_coef = 5
vtrace_c_clip = 5
vtrace_rho_clip = 3.2646604263658587
total_timesteps = 50_000_000
vf_clip_coef = 0.7067248245848509
vf_coef = 2.371511794283843
vtrace_c_clip = 4.84687280649928
vtrace_rho_clip = 1.7437113241448157

[sweep]
metric = perf

[sweep.train.total_timesteps]
distribution = log_normal
min = 3e7
max = 2e8
mean = 8e7
min = 1e7
max = 1e8
mean = 5e7
scale = auto

[sweep.env.alpha_dist]
# hover
[sweep.env.hover_alpha_dist]
distribution = log_normal
min = 0.1
max = 100.0
min = 0.001
max = 10.0
mean = 1.0
scale = auto

[sweep.env.alpha_omega]
distribution = log_normal
min = 0.0001
max = 1.0
mean = 0.001
scale = auto

[sweep.env.alpha_hover]
distribution = log_normal
min = 0.001
min = 0.0001
max = 1.0
mean = 0.01
scale = auto

[sweep.env.alpha_shaping]
# race
[sweep.env.race_alpha_dist]
distribution = log_normal
min = 0.01
min = 0.001
max = 10.0
mean = 1.0
scale = auto

[sweep.env.hover_dist]
[sweep.env.ring_reward]
distribution = log_normal
min = 0.001
max = 1.0
mean = 0.01
min = 0.1
max = 100.0
mean = 1.0
scale = auto

[sweep.env.hover_vel]
distribution = log_normal
min = 0.001
# fracs
[sweep.env.hover_frac]
distribution = uniform
min = 0.1
max = 1.0
mean = 0.01
scale = auto

[sweep.env.hover_omega]
distribution = log_normal
min = 0.01
max = 10.0
mean = 0.1
[sweep.env.race_frac]
distribution = uniform
min = 0.1
max = 1.0
scale = auto
128 changes: 102 additions & 26 deletions ocean/drone/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,61 +9,137 @@
#define Env DroneEnv
#include "vecenv.h"

#include "task_hover.h"
#include "task_race.h"

static const Task* LOG_TASK = NULL;
static float task_fracs[NUM_TASKS];

static void hover_config(DroneEnv* env, Dict* kwargs) {
HoverConfig* cfg = (HoverConfig*)calloc(1, sizeof(HoverConfig));
cfg->target_dist = dict_get(kwargs, "hover_target_dist")->value;
cfg->hover_dist = dict_get(kwargs, "hover_dist")->value;
cfg->hover_omega = dict_get(kwargs, "hover_omega")->value;
cfg->hover_vel = dict_get(kwargs, "hover_vel")->value;
cfg->alpha_dist = dict_get(kwargs, "alpha_dist")->value;
cfg->alpha_hover = dict_get(kwargs, "alpha_hover")->value;
cfg->alpha_shaping = dict_get(kwargs, "alpha_shaping")->value;
cfg->alpha_omega = dict_get(kwargs, "alpha_omega")->value;
cfg->alpha_dist = dict_get(kwargs, "hover_alpha_dist")->value;
cfg->sphere_radius = dict_get(kwargs, "sphere_radius")->value;
cfg->horizon = (int)dict_get(kwargs, "hover_horizon")->value;
env->task_config = cfg;
}

static void race_config(DroneEnv* env, Dict* kwargs) {
RaceConfig* cfg = (RaceConfig*)calloc(1, sizeof(RaceConfig));
cfg->max_rings = (int)dict_get(kwargs, "max_rings")->value;
cfg->ring_reward = dict_get(kwargs, "ring_reward")->value;
cfg->collision_penalty = dict_get(kwargs, "collision_penalty")->value;
cfg->time_penalty = dict_get(kwargs, "time_penalty")->value;
cfg->alpha_dist = dict_get(kwargs, "alpha_dist")->value;
cfg->alpha_dist = dict_get(kwargs, "race_alpha_dist")->value;
cfg->horizon = (int)dict_get(kwargs, "race_horizon")->value;
env->task_config = cfg;
}

void my_init(Env* env, Dict* kwargs) {
env->num_agents = (int)dict_get(kwargs, "num_drones")->value;

int task = (int)dict_get(kwargs, "task")->value;
if (task == 1) {
env->task = &TASK_RACE;
env->alpha_vel = dict_get(kwargs, "alpha_vel")->value;
env->alpha_omega = dict_get(kwargs, "alpha_omega")->value;
env->alpha_action = dict_get(kwargs, "alpha_action")->value;
env->dr = dict_get(kwargs, "dr")->value;

task_fracs[TASK_HOVER] = dict_get(kwargs, "hover_frac")->value;
task_fracs[TASK_RACE] = dict_get(kwargs, "race_frac")->value;
task_fracs[TASK_SPHERE] = dict_get(kwargs, "sphere_frac")->value;
task_fracs[TASK_CUBE] = dict_get(kwargs, "cube_frac")->value;
task_fracs[TASK_FLAG] = dict_get(kwargs, "flag_frac")->value;

float total = 0.0f;
for (int t = 0; t < NUM_TASKS; t++) {
total += task_fracs[t];
}

int idx = (int)env->rng;
float cum = 0.0f;
env->task = TASK_HOVER;
for (int t = 0; t < NUM_TASKS; t++) {
cum += task_fracs[t] / total;
if ((int)floorf((idx + 1) * cum) > (int)floorf(idx * cum)) {
env->task = (TaskType)t;
break;
}
}

if (env->task == TASK_RACE) {
race_config(env, kwargs);
} else {
env->task = &TASK_HOVER;
hover_config(env, kwargs);
}

env->task->init(env);

// will need changes for multi-task
assert(LOG_TASK == NULL || LOG_TASK == env->task);
LOG_TASK = env->task;

task_init(env);
init(env);
}

static inline float task_avg(float sum, float n) { return n > 0.0f ? sum / n : 0.0f; }

void my_log(Log* log, Dict* out) {
dict_set(out, "perf", log->perf);
dict_set(out, "score", log->score);
static int first = 1;

float perf = 0.0f, score = 0.0f;
int active = 0;
for (int t = 0; t < NUM_TASKS; t++) {
float n = log->task[t].n;
if (n <= 0.0f) continue;
perf += log->task[t].perf / n;
score += log->task[t].score / n;
active++;
}
dict_set(out, "perf", active > 0 ? perf / active : 0.0f);
dict_set(out, "score", active > 0 ? score / active : 0.0f);
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);

for (int i = 0; i < LOG_TASK->num_log_keys; i++)
dict_set(out, LOG_TASK->log_keys[i], log->task[i]);
if (log->task[TASK_HOVER].n > 0.0f || (first && task_fracs[TASK_HOVER] > 0.0f)) {
TaskLog* h = &log->task[TASK_HOVER];
dict_set(out, "hover/perf", task_avg(h->perf, h->n));
dict_set(out, "hover/score", task_avg(h->score, h->n));
dict_set(out, "hover/ema_dist", task_avg(h->keys[0], h->n));
dict_set(out, "hover/ema_vel", task_avg(h->keys[1], h->n));
dict_set(out, "hover/ema_omega", task_avg(h->keys[2], h->n));
dict_set(out, "hover/oob", task_avg(h->keys[3], h->n));
dict_set(out, "hover/episode_frac", h->n);
}
if (log->task[TASK_RACE].n > 0.0f || (first && task_fracs[TASK_RACE] > 0.0f)) {
TaskLog* r = &log->task[TASK_RACE];
dict_set(out, "race/perf", task_avg(r->perf, r->n));
dict_set(out, "race/score", task_avg(r->score, r->n));
dict_set(out, "race/rings_passed", task_avg(r->keys[0], r->n));
dict_set(out, "race/ring_collisions", task_avg(r->keys[1], r->n));
dict_set(out, "race/completed", task_avg(r->keys[2], r->n));
dict_set(out, "race/oob", task_avg(r->keys[3], r->n));
dict_set(out, "race/episode_frac", r->n);
}
if (log->task[TASK_SPHERE].n > 0.0f || (first && task_fracs[TASK_SPHERE] > 0.0f)) {
TaskLog* s = &log->task[TASK_SPHERE];
dict_set(out, "sphere/perf", task_avg(s->perf, s->n));
dict_set(out, "sphere/score", task_avg(s->score, s->n));
dict_set(out, "sphere/ema_dist", task_avg(s->keys[0], s->n));
dict_set(out, "sphere/ema_vel", task_avg(s->keys[1], s->n));
dict_set(out, "sphere/ema_omega", task_avg(s->keys[2], s->n));
dict_set(out, "sphere/oob", task_avg(s->keys[3], s->n));
dict_set(out, "sphere/episode_frac", s->n);
}
if (log->task[TASK_CUBE].n > 0.0f || (first && task_fracs[TASK_CUBE] > 0.0f)) {
TaskLog* c = &log->task[TASK_CUBE];
dict_set(out, "cube/perf", task_avg(c->perf, c->n));
dict_set(out, "cube/score", task_avg(c->score, c->n));
dict_set(out, "cube/ema_dist", task_avg(c->keys[0], c->n));
dict_set(out, "cube/ema_vel", task_avg(c->keys[1], c->n));
dict_set(out, "cube/ema_omega", task_avg(c->keys[2], c->n));
dict_set(out, "cube/oob", task_avg(c->keys[3], c->n));
dict_set(out, "cube/episode_frac", c->n);
}
if (log->task[TASK_FLAG].n > 0.0f || (first && task_fracs[TASK_FLAG] > 0.0f)) {
TaskLog* f = &log->task[TASK_FLAG];
dict_set(out, "flag/perf", task_avg(f->perf, f->n));
dict_set(out, "flag/score", task_avg(f->score, f->n));
dict_set(out, "flag/ema_dist", task_avg(f->keys[0], f->n));
dict_set(out, "flag/ema_vel", task_avg(f->keys[1], f->n));
dict_set(out, "flag/ema_omega", task_avg(f->keys[2], f->n));
dict_set(out, "flag/oob", task_avg(f->keys[3], f->n));
dict_set(out, "flag/episode_frac", f->n);
}

first = 0;
}
Loading
Loading