diff --git a/config/drone.ini b/config/drone.ini index c416d22a00..ea1d8879ac 100644 --- a/config/drone.ini +++ b/config/drone.ini @@ -7,109 +7,115 @@ num_buffers = 8 num_threads = 1 [env] -task = 0 num_drones = 64 -max_rings = 10 -# shared -alpha_dist = 1.4743813950542852 +# multi-task step fractions +hover_frac = 1.0 +race_frac = 1.0 +sphere_frac = 0.0 +cube_frac = 0.0 +flag_frac = 0.0 + +# domain randomisation +dr = 0.05 + +# shared rewards +alpha_vel = 0.0 +alpha_omega = 0.0 +alpha_action = 0.0 # hover -alpha_hover = 0.04172693102535828 -alpha_shaping = 2.4227498935775116 -alpha_omega = 0.003412692849239442 -hover_target_dist = 5 -hover_dist = 0.022635883121253756 -hover_vel = 0.011167386817995822 -hover_omega = 0.04019810650173936 +sphere_radius = 4.0 +hover_target_dist = 2 +hover_horizon = 1024 +alpha_hover = 0.058713089382793605 +hover_alpha_dist = 0.5936050283315333 # race +max_rings = 10 +race_horizon = 2048 ring_reward = 1.0 -collision_penalty = 0.0 -time_penalty = 0.0 +race_alpha_dist = 0.5936050283315333 [policy] expansion_factor = 1 -hidden_size = 64 -num_layers = 2 +hidden_size = 32 +num_layers = 1 [train] anneal_lr = 1 -beta1 = 0.9605091218915885 -beta2 = 0.9975342514897555 -clip_coef = 0.2792041834527874 -ent_coef = 0.009614464430623536 -eps = 3.890998066747521e-12 -gae_lambda = 0.8397140951280606 -gamma = 0.9827341882497986 +beta1 = 0.9205127440978271 +beta2 = 0.9999599984257372 +clip_coef = 0.24538079381694416 +ent_coef = 5.3094837057682964e-05 +eps = 4.831444291739578e-13 +gae_lambda = 0.9861995160361671 +gamma = 0.9891068682793783 gpus = 1 -horizon = 64 -learning_rate = 0.0036073110182039277 +horizon = 128 +learning_rate = 0.0037281345425369313 max_grad_norm = 0.1 min_lr_ratio = 0 minibatch_size = 4096 -prio_alpha = 0.5664372304116252 -prio_beta0 = 1 -replay_ratio = 2.321476115575771 +prio_alpha = 0.35078354102088527 +prio_beta0 = 0.7847372909663064 +replay_ratio = 3.0963958152520537 seed = 42 -total_timesteps = 4.6927025e+07 -vf_clip_coef = 5 -vf_coef = 5 -vtrace_c_clip = 5 -vtrace_rho_clip = 3.2646604263658587 +total_timesteps = 50_000_000 +vf_clip_coef = 0.7067248245848509 +vf_coef = 2.371511794283843 +vtrace_c_clip = 4.84687280649928 +vtrace_rho_clip = 1.7437113241448157 + +[sweep] +metric = perf [sweep.train.total_timesteps] distribution = log_normal -min = 3e7 -max = 2e8 -mean = 8e7 +min = 1e7 +max = 1e8 +mean = 5e7 scale = auto -[sweep.env.alpha_dist] +# hover +[sweep.env.hover_alpha_dist] distribution = log_normal -min = 0.1 -max = 100.0 +min = 0.001 +max = 10.0 mean = 1.0 scale = auto -[sweep.env.alpha_omega] -distribution = log_normal -min = 0.0001 -max = 1.0 -mean = 0.001 -scale = auto - [sweep.env.alpha_hover] distribution = log_normal -min = 0.001 +min = 0.0001 max = 1.0 mean = 0.01 scale = auto -[sweep.env.alpha_shaping] +# race +[sweep.env.race_alpha_dist] distribution = log_normal -min = 0.01 +min = 0.001 max = 10.0 mean = 1.0 scale = auto -[sweep.env.hover_dist] +[sweep.env.ring_reward] distribution = log_normal -min = 0.001 -max = 1.0 -mean = 0.01 +min = 0.1 +max = 100.0 +mean = 1.0 scale = auto -[sweep.env.hover_vel] -distribution = log_normal -min = 0.001 +# fracs +[sweep.env.hover_frac] +distribution = uniform +min = 0.1 max = 1.0 -mean = 0.01 scale = auto -[sweep.env.hover_omega] -distribution = log_normal -min = 0.01 -max = 10.0 -mean = 0.1 +[sweep.env.race_frac] +distribution = uniform +min = 0.1 +max = 1.0 scale = auto \ No newline at end of file diff --git a/ocean/drone/binding.c b/ocean/drone/binding.c index 605e2dccb4..bdf89e678c 100644 --- a/ocean/drone/binding.c +++ b/ocean/drone/binding.c @@ -9,21 +9,16 @@ #define Env DroneEnv #include "vecenv.h" -#include "task_hover.h" -#include "task_race.h" -static const Task* LOG_TASK = NULL; +static float task_fracs[NUM_TASKS]; static void hover_config(DroneEnv* env, Dict* kwargs) { HoverConfig* cfg = (HoverConfig*)calloc(1, sizeof(HoverConfig)); cfg->target_dist = dict_get(kwargs, "hover_target_dist")->value; - cfg->hover_dist = dict_get(kwargs, "hover_dist")->value; - cfg->hover_omega = dict_get(kwargs, "hover_omega")->value; - cfg->hover_vel = dict_get(kwargs, "hover_vel")->value; - cfg->alpha_dist = dict_get(kwargs, "alpha_dist")->value; cfg->alpha_hover = dict_get(kwargs, "alpha_hover")->value; - cfg->alpha_shaping = dict_get(kwargs, "alpha_shaping")->value; - cfg->alpha_omega = dict_get(kwargs, "alpha_omega")->value; + cfg->alpha_dist = dict_get(kwargs, "hover_alpha_dist")->value; + cfg->sphere_radius = dict_get(kwargs, "sphere_radius")->value; + cfg->horizon = (int)dict_get(kwargs, "hover_horizon")->value; env->task_config = cfg; } @@ -31,39 +26,120 @@ static void race_config(DroneEnv* env, Dict* kwargs) { RaceConfig* cfg = (RaceConfig*)calloc(1, sizeof(RaceConfig)); cfg->max_rings = (int)dict_get(kwargs, "max_rings")->value; cfg->ring_reward = dict_get(kwargs, "ring_reward")->value; - cfg->collision_penalty = dict_get(kwargs, "collision_penalty")->value; - cfg->time_penalty = dict_get(kwargs, "time_penalty")->value; - cfg->alpha_dist = dict_get(kwargs, "alpha_dist")->value; + cfg->alpha_dist = dict_get(kwargs, "race_alpha_dist")->value; + cfg->horizon = (int)dict_get(kwargs, "race_horizon")->value; env->task_config = cfg; } void my_init(Env* env, Dict* kwargs) { env->num_agents = (int)dict_get(kwargs, "num_drones")->value; - int task = (int)dict_get(kwargs, "task")->value; - if (task == 1) { - env->task = &TASK_RACE; + env->alpha_vel = dict_get(kwargs, "alpha_vel")->value; + env->alpha_omega = dict_get(kwargs, "alpha_omega")->value; + env->alpha_action = dict_get(kwargs, "alpha_action")->value; + env->dr = dict_get(kwargs, "dr")->value; + + task_fracs[TASK_HOVER] = dict_get(kwargs, "hover_frac")->value; + task_fracs[TASK_RACE] = dict_get(kwargs, "race_frac")->value; + task_fracs[TASK_SPHERE] = dict_get(kwargs, "sphere_frac")->value; + task_fracs[TASK_CUBE] = dict_get(kwargs, "cube_frac")->value; + task_fracs[TASK_FLAG] = dict_get(kwargs, "flag_frac")->value; + + float total = 0.0f; + for (int t = 0; t < NUM_TASKS; t++) { + total += task_fracs[t]; + } + + int idx = (int)env->rng; + float cum = 0.0f; + env->task = TASK_HOVER; + for (int t = 0; t < NUM_TASKS; t++) { + cum += task_fracs[t] / total; + if ((int)floorf((idx + 1) * cum) > (int)floorf(idx * cum)) { + env->task = (TaskType)t; + break; + } + } + + if (env->task == TASK_RACE) { race_config(env, kwargs); } else { - env->task = &TASK_HOVER; hover_config(env, kwargs); } - env->task->init(env); - - // will need changes for multi-task - assert(LOG_TASK == NULL || LOG_TASK == env->task); - LOG_TASK = env->task; - + task_init(env); init(env); } +static inline float task_avg(float sum, float n) { return n > 0.0f ? sum / n : 0.0f; } + void my_log(Log* log, Dict* out) { - dict_set(out, "perf", log->perf); - dict_set(out, "score", log->score); + static int first = 1; + + float perf = 0.0f, score = 0.0f; + int active = 0; + for (int t = 0; t < NUM_TASKS; t++) { + float n = log->task[t].n; + if (n <= 0.0f) continue; + perf += log->task[t].perf / n; + score += log->task[t].score / n; + active++; + } + dict_set(out, "perf", active > 0 ? perf / active : 0.0f); + dict_set(out, "score", active > 0 ? score / active : 0.0f); dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); - for (int i = 0; i < LOG_TASK->num_log_keys; i++) - dict_set(out, LOG_TASK->log_keys[i], log->task[i]); + if (log->task[TASK_HOVER].n > 0.0f || (first && task_fracs[TASK_HOVER] > 0.0f)) { + TaskLog* h = &log->task[TASK_HOVER]; + dict_set(out, "hover/perf", task_avg(h->perf, h->n)); + dict_set(out, "hover/score", task_avg(h->score, h->n)); + dict_set(out, "hover/ema_dist", task_avg(h->keys[0], h->n)); + dict_set(out, "hover/ema_vel", task_avg(h->keys[1], h->n)); + dict_set(out, "hover/ema_omega", task_avg(h->keys[2], h->n)); + dict_set(out, "hover/oob", task_avg(h->keys[3], h->n)); + dict_set(out, "hover/episode_frac", h->n); + } + if (log->task[TASK_RACE].n > 0.0f || (first && task_fracs[TASK_RACE] > 0.0f)) { + TaskLog* r = &log->task[TASK_RACE]; + dict_set(out, "race/perf", task_avg(r->perf, r->n)); + dict_set(out, "race/score", task_avg(r->score, r->n)); + dict_set(out, "race/rings_passed", task_avg(r->keys[0], r->n)); + dict_set(out, "race/ring_collisions", task_avg(r->keys[1], r->n)); + dict_set(out, "race/completed", task_avg(r->keys[2], r->n)); + dict_set(out, "race/oob", task_avg(r->keys[3], r->n)); + dict_set(out, "race/episode_frac", r->n); + } + if (log->task[TASK_SPHERE].n > 0.0f || (first && task_fracs[TASK_SPHERE] > 0.0f)) { + TaskLog* s = &log->task[TASK_SPHERE]; + dict_set(out, "sphere/perf", task_avg(s->perf, s->n)); + dict_set(out, "sphere/score", task_avg(s->score, s->n)); + dict_set(out, "sphere/ema_dist", task_avg(s->keys[0], s->n)); + dict_set(out, "sphere/ema_vel", task_avg(s->keys[1], s->n)); + dict_set(out, "sphere/ema_omega", task_avg(s->keys[2], s->n)); + dict_set(out, "sphere/oob", task_avg(s->keys[3], s->n)); + dict_set(out, "sphere/episode_frac", s->n); + } + if (log->task[TASK_CUBE].n > 0.0f || (first && task_fracs[TASK_CUBE] > 0.0f)) { + TaskLog* c = &log->task[TASK_CUBE]; + dict_set(out, "cube/perf", task_avg(c->perf, c->n)); + dict_set(out, "cube/score", task_avg(c->score, c->n)); + dict_set(out, "cube/ema_dist", task_avg(c->keys[0], c->n)); + dict_set(out, "cube/ema_vel", task_avg(c->keys[1], c->n)); + dict_set(out, "cube/ema_omega", task_avg(c->keys[2], c->n)); + dict_set(out, "cube/oob", task_avg(c->keys[3], c->n)); + dict_set(out, "cube/episode_frac", c->n); + } + if (log->task[TASK_FLAG].n > 0.0f || (first && task_fracs[TASK_FLAG] > 0.0f)) { + TaskLog* f = &log->task[TASK_FLAG]; + dict_set(out, "flag/perf", task_avg(f->perf, f->n)); + dict_set(out, "flag/score", task_avg(f->score, f->n)); + dict_set(out, "flag/ema_dist", task_avg(f->keys[0], f->n)); + dict_set(out, "flag/ema_vel", task_avg(f->keys[1], f->n)); + dict_set(out, "flag/ema_omega", task_avg(f->keys[2], f->n)); + dict_set(out, "flag/oob", task_avg(f->keys[3], f->n)); + dict_set(out, "flag/episode_frac", f->n); + } + + first = 0; } \ No newline at end of file diff --git a/ocean/drone/drone.c b/ocean/drone/drone.c index d32c683dbb..71e5278e1a 100644 --- a/ocean/drone/drone.c +++ b/ocean/drone/drone.c @@ -1,13 +1,44 @@ #include "drone.h" #include "puffernet.h" #include "render.h" -#include "task_hover.h" #include #ifdef __EMSCRIPTEN__ #include #endif +static void setup_task(DroneEnv* env, int task) { + task_close(env); + env->task = task; + + if (task == TASK_RACE) { + RaceConfig* cfg = (RaceConfig*)calloc(1, sizeof(RaceConfig)); + cfg->max_rings = 10; + cfg->horizon = 2048; + env->task_config = cfg; + } else { + HoverConfig* cfg = (HoverConfig*)calloc(1, sizeof(HoverConfig)); + cfg->target_dist = 1.0f; + cfg->sphere_radius = 4.0f; + cfg->horizon = 1024; + env->task_config = cfg; + } + task_init(env); + c_reset(env); +} + +// we render at 60Hz, but drone frames are 100Hz +static void step_realtime(DroneEnv* env, PufferNet* net) { + static double accum = 0.0; + accum += GetFrameTime(); + if (accum > 0.25) accum = 0.25; + while (accum >= ACTION_DT) { + forward_puffernet(net, env->observations, env->actions); + c_step(env); + accum -= ACTION_DT; + } +} + #ifdef __EMSCRIPTEN__ typedef struct { DroneEnv* env; @@ -16,18 +47,22 @@ typedef struct { void emscriptenStep(void* e) { WebRenderArgs* args = (WebRenderArgs*)e; - forward_puffernet(args->net, args->env->observations, args->env->actions); - c_step(args->env); + if (IsKeyPressed(KEY_SPACE)) { + setup_task(args->env, (args->env->task + 1) % NUM_TASKS); + } + step_realtime(args->env, args->net); c_render(args->env); } #endif -int main() { +int main(int argc, char** argv) { srand(time(NULL)); + int task = argc > 1 ? atoi(argv[1]) : TASK_RACE; + DroneEnv* env = calloc(1, sizeof(DroneEnv)); - env->num_agents = 16; - env->task = &TASK_HOVER; + env->num_agents = 64; + env->dr = 0.05f; // static 5% flat DR for the demo env->observations = (float*)calloc(env->num_agents * DRONE_OBS_SIZE, sizeof(float)); env->actions = (float*)calloc(env->num_agents * 4, sizeof(float)); @@ -35,26 +70,11 @@ int main() { env->terminals = (float*)calloc(env->num_agents, sizeof(float)); init(env); - - // task config — hardcoded for demo - HoverConfig* cfg = (HoverConfig*)calloc(1, sizeof(HoverConfig)); - cfg->target_dist = 5.0f; - cfg->hover_dist = 0.1f; - cfg->hover_omega = 0.1f; - cfg->hover_vel = 0.1f; - cfg->alpha_dist = 0.782192f; - cfg->alpha_hover = 0.071445f; - cfg->alpha_shaping = 3.9754f; - cfg->alpha_omega = 0.00135588f; - env->task_config = cfg; - - env->task->init(env); - - c_reset(env); + setup_task(env, task); Weights* weights = load_weights("resources/drone/drone_weights.bin"); int logit_sizes[4] = {1, 1, 1, 1}; - PufferNet* net = make_puffernet(weights, env->num_agents, DRONE_OBS_SIZE, 64, 1, logit_sizes, 4); + PufferNet* net = make_puffernet(weights, env->num_agents, DRONE_OBS_SIZE, 32, 1, logit_sizes, 4); #ifdef __EMSCRIPTEN__ WebRenderArgs args = {.env = env, .net = net}; @@ -64,8 +84,8 @@ int main() { SetTargetFPS(60); while (!WindowShouldClose()) { - forward_puffernet(net, env->observations, env->actions); - c_step(env); + if (IsKeyPressed(KEY_SPACE)) setup_task(env, (env->task + 1) % NUM_TASKS); + step_realtime(env, net); c_render(env); } diff --git a/ocean/drone/drone.h b/ocean/drone/drone.h index 3aaaa67a7d..8c40fcf97b 100644 --- a/ocean/drone/drone.h +++ b/ocean/drone/drone.h @@ -12,7 +12,17 @@ #include "dronelib.h" -#define HORIZON 1024 +#define HORIZON 2048 + +typedef enum { + TASK_HOVER = 0, + TASK_RACE = 1, + TASK_SPHERE = 2, + TASK_CUBE = 3, + TASK_FLAG = 4, +} TaskType; + +#define NUM_TASKS 5 typedef struct { float dist; @@ -21,42 +31,24 @@ typedef struct { float omega; } StepCache; -#define MAX_TASK_LOG_ENTRIES 16 +typedef struct { + float n; + float perf; + float score; + float keys[4]; +} TaskLog; typedef struct Log Log; struct Log { - float score; - float perf; float episode_return; float episode_length; - float task[MAX_TASK_LOG_ENTRIES]; float n; + TaskLog task[NUM_TASKS]; }; -static inline void log_task_add(Log* log, int idx, float value) { - if (idx < 0 || idx >= MAX_TASK_LOG_ENTRIES) return; - log->task[idx] += value; -} - typedef struct DroneEnv DroneEnv; typedef struct Client Client; -typedef struct { - const char* name; - const char* log_keys[MAX_TASK_LOG_ENTRIES]; - int num_log_keys; - - void (*init)(DroneEnv* env); - void (*close)(DroneEnv* env); - void (*env_reset)(DroneEnv* env); - void (*reset)(DroneEnv* env, Drone* agent, int idx); - float (*reward)(DroneEnv* env, Drone* agent, int idx, StepCache* cache); - bool (*done)(DroneEnv* env, Drone* agent, int idx, StepCache* cache); - - void (*log)(DroneEnv* env, Drone* agent, int idx, Log* log, StepCache* cache); - void (*render)(DroneEnv* env, Client* client); -} Task; - struct DroneEnv { float* observations; float* actions; @@ -69,23 +61,35 @@ struct DroneEnv { Drone* agents; Log log; - const Task* task; + TaskType task; void* task_config; void* task_state; + // shared reward shaping + float alpha_vel; + float alpha_omega; + float alpha_action; + + // domain randomisation + float dr; + Client* client; }; +#include "tasklib.h" + void compute_observations(DroneEnv* env) { for (int i = 0; i < env->num_agents; i++) compute_drone_observations(&env->agents[i], env->observations + i * DRONE_OBS_SIZE); } -void reset_agent_base(Drone* agent, unsigned int* rng) { +void reset_agent_base(DroneEnv* env, int idx) { + Drone* agent = &env->agents[idx]; Target* target = agent->target; memset(agent, 0, sizeof(Drone)); agent->target = target; - init_drone(agent, rng, 0.05f); + + init_drone(agent, &env->rng, env->dr); } void init(DroneEnv* env) { @@ -102,16 +106,16 @@ void add_log(DroneEnv* env, int idx, StepCache* cache) { env->log.episode_length += agent->episode_length; env->log.n += 1.0f; - if (env->task->log) env->task->log(env, agent, idx, &env->log, cache); + task_log(env, agent, idx, &env->log, cache); } void c_reset(DroneEnv* env) { - if (env->task->env_reset) env->task->env_reset(env); + task_env_reset(env); for (int i = 0; i < env->num_agents; i++) { Drone* agent = &env->agents[i]; - reset_agent_base(agent, &env->rng); - env->task->reset(env, agent, i); + reset_agent_base(env, i); + task_reset(env, agent, i); agent->prev_pos = agent->state.pos; } @@ -135,8 +139,22 @@ void c_step(DroneEnv* env) { .omega = norm3(agent->state.omega), }; - float reward = env->task->reward(env, agent, i, &cache); - bool done = env->task->done(env, agent, i, &cache); + float reward = task_reward(env, agent, i, &cache); + reward -= env->alpha_vel * cache.vel; + reward -= env->alpha_omega * cache.omega; + + float* action = &env->actions[4 * i]; + if (agent->episode_length > 1) { + float da = 0.0f; + for (int k = 0; k < 4; k++) { + float d = action[k] - agent->prev_action[k]; + da += d * d; + } + reward -= env->alpha_action * da; + } + for (int k = 0; k < 4; k++) agent->prev_action[k] = action[k]; + + bool done = task_done(env, agent, i, &cache); agent->episode_return += reward; env->rewards[i] = reward; @@ -144,8 +162,8 @@ void c_step(DroneEnv* env) { if (done) { add_log(env, i, &cache); - reset_agent_base(agent, &env->rng); - env->task->reset(env, agent, i); + reset_agent_base(env, i); + task_reset(env, agent, i); agent->prev_pos = agent->state.pos; } } @@ -156,7 +174,7 @@ void c_step(DroneEnv* env) { void c_close_client(Client* client); void c_close(DroneEnv* env) { - if (env->task != NULL && env->task->close != NULL) env->task->close(env); + task_close(env); for (int i = 0; i < env->num_agents; i++) free(env->agents[i].target); diff --git a/ocean/drone/dronelib.h b/ocean/drone/dronelib.h index baad646042..3d9ed29b1f 100644 --- a/ocean/drone/dronelib.h +++ b/ocean/drone/dronelib.h @@ -39,7 +39,7 @@ #define MARGIN_X (GRID_X - 1) #define MARGIN_Y (GRID_Y - 1) #define MARGIN_Z (GRID_Z - 1) -#define RING_RADIUS 2.0f +#define RING_RADIUS 0.5f #define V_TARGET 0.05f #define DRONE_OBS_SIZE 19 @@ -113,6 +113,7 @@ typedef struct { State state; Params params; Vec3 prev_pos; + float prev_action[4]; // last action, for the shared action-change penalty Target* target; float episode_return; @@ -221,6 +222,11 @@ static inline Vec3 random_pos(unsigned int* rng) { }; } +static inline bool out_of_bounds(Vec3 p, float scale) { + return fabsf(p.x) > GRID_X * scale || fabsf(p.y) > GRID_Y * scale || + fabsf(p.z) > GRID_Z * scale; +} + // physics static inline float rpm_hover(const Params* p) { diff --git a/ocean/drone/render.h b/ocean/drone/render.h index 73e169ad78..09f63d08b2 100644 --- a/ocean/drone/render.h +++ b/ocean/drone/render.h @@ -14,17 +14,20 @@ #define R (Color){255, 0, 0, 255} #define W (Color){255, 255, 255, 255} #define B (Color){0, 0, 255, 255} -Color COLORS[64] = {W, B, B, R, R, B, B, W, B, W, B, R, R, B, W, B, B, B, W, R, R, W, - B, B, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, B, B, W, R, - R, W, B, B, B, W, B, R, R, B, W, B, W, B, B, R, R, B, B, W}; +Color COLORS[64] = {B, B, B, R, R, R, R, R, + B, B, B, W, W, W, W, W, + B, B, B, R, R, R, R, R, + B, B, B, W, W, W, W, W, + R, R, R, R, R, R, R, R, + W, W, W, W, W, W, W, W, + R, R, R, R, R, R, R, R, + W, W, W, W, W, W, W, W}; #undef R #undef W #undef B // 3D model config -#define MODEL_SCALE_DEFAULT 5.0f -#define MODEL_SCALE_NORMAL 1.0f -#define MINIMAL_SPHERE_SIZE 0.1f +#define MODEL_SCALE_NORMAL 3.0f #define NUM_PROPELLERS 4 static const int PROP_MESH_IDX[NUM_PROPELLERS] = {8, 6, 5, 7}; static const float PROP_DIRS[NUM_PROPELLERS] = {1.0f, -1.0f, 1.0f, -1.0f}; @@ -56,7 +59,6 @@ struct Client { float* prop_angles; Vec3 prop_centers[NUM_PROPELLERS]; float model_scale; - int render_mode; // 0 = default (5.0x), 1 = normal (1.0x), 2 = minimal (sphere only) }; // Convert dronelib Quat to raylib Matrix @@ -262,8 +264,7 @@ Client* make_client(DroneEnv* env) { client->follow_mode = false; client->target_fps = 100; client->model_loaded = false; - client->model_scale = MODEL_SCALE_DEFAULT; - client->render_mode = 0; + client->model_scale = MODEL_SCALE_NORMAL; // Load 3D model const char* model_paths[] = {"resources/crazyflie.glb", "resources/drone/crazyflie.glb", @@ -411,6 +412,16 @@ void DrawDronePrimitive(Client* client, Drone* agent, float* actions, Color body } } +// Task-specific overlays +static void render_task(DroneEnv* env, Client* client) { + (void)client; + if (env->task != TASK_RACE) return; + RaceConfig* cfg = (RaceConfig*)env->task_config; + RaceState* state = (RaceState*)env->task_state; + for (int i = 0; i < cfg->max_rings; i++) + DrawRing3D(state->ring_buffer[i], 0.1f, GREEN, BLUE); +} + void c_render(DroneEnv* env) { if (env->client == NULL) { env->client = make_client(env); @@ -432,12 +443,7 @@ void c_render(DroneEnv* env) { // Get selected drone position for camera Vec3 drone_pos = env->agents[client->selected_drone].state.pos; - // Calculate min zoom based on render mode and hover_dist - float min_zoom = (client->render_mode == 2) ? MINIMAL_SPHERE_SIZE - : (client->render_mode == 1) ? 1.0f - : 5.0f; - - handle_camera_controls(client, drone_pos, min_zoom); + handle_camera_controls(client, drone_pos, 1.0f); handle_drone_selection(client, env->num_agents, dt); handle_fps_control(client, dt); @@ -462,24 +468,6 @@ void c_render(DroneEnv* env) { client->follow_mode = !client->follow_mode; } - if (IsKeyPressed(KEY_Z)) { - client->render_mode = (client->render_mode + 1) % 3; - if (client->render_mode == 0) { - client->model_scale = MODEL_SCALE_DEFAULT; - } else if (client->render_mode == 1) { - client->model_scale = MODEL_SCALE_NORMAL; - } - // render_mode 2 = minimal, drone hidden - - float new_min_zoom = (client->render_mode == 2) ? MINIMAL_SPHERE_SIZE - : (client->render_mode == 1) ? 1.0f - : 5.0f; - if (client->camera_distance < new_min_zoom) { - client->camera_distance = new_min_zoom; - update_camera_position(client, drone_pos); - } - } - // Update camera position every frame when in follow mode if (client->follow_mode) { update_camera_position(client, drone_pos); @@ -515,15 +503,7 @@ void c_render(DroneEnv* env) { bool is_selected = (i == client->selected_drone); Color body_color = (inspect_mode && is_selected) ? PUFF_GREEN : COLORS[i % 64]; - if (client->render_mode == 2) { - // Minimal mode: draw small sphere matching hover_dist size - float sphere_size = MINIMAL_SPHERE_SIZE; - // Use a distinct color (yellow/orange) to differentiate from target - Color drone_sphere_color = (inspect_mode && is_selected) ? (Color){255, 200, 0, 255} - : (Color){255, 165, 0, 200}; - DrawSphere((Vector3){agent->state.pos.x, agent->state.pos.y, agent->state.pos.z}, - sphere_size, drone_sphere_color); - } else if (client->use_3d_model && client->model_loaded) { + if (client->use_3d_model && client->model_loaded) { DrawDroneModel(client, agent, i, dt, body_color); } else { DrawDronePrimitive(client, agent, &env->actions[4 * i], body_color); @@ -564,21 +544,11 @@ void c_render(DroneEnv* env) { } // Task-specific rendering - if (env->task->render) env->task->render(env, client); + render_task(env, client); - // Targets (shown in inspect mode) - size based on render mode + // Targets (shown in inspect mode) if (inspect_mode) { - float target_size; - if (client->render_mode == 2) { - // Minimal mode: target size matches hover_dist - target_size = MINIMAL_SPHERE_SIZE; - } else if (client->render_mode == 1) { - // 1.0x scale: target proportional to drone at normal scale - target_size = 0.1f; - } else { - // 5.0x scale: target proportional to drone at default scale - target_size = 0.5f; - } + float target_size = 0.1f; for (int i = 0; i < env->num_agents; i++) { Vec3 t = env->agents[i].target->pos; @@ -593,7 +563,7 @@ void c_render(DroneEnv* env) { // Heads up display int y = 10; - DrawText(TextFormat("Task: %s", env->task->name), 10, y, 20, WHITE); + DrawText(TextFormat("Task: %s", task_name(env->task)), 10, y, 20, WHITE); y += 25; DrawText(TextFormat("Tick: %d / %d", env->tick, HORIZON), 10, y, 20, WHITE); y += 25; @@ -602,13 +572,6 @@ void c_render(DroneEnv* env) { if (client->model_loaded) { DrawText(TextFormat("Render: %s (M)", client->use_3d_model ? "3D Model" : "Primitive"), 10, y, 18, client->use_3d_model ? PUFF_GREEN : LIGHTGRAY); - y += 22; - const char* mode_names[] = {"5.0x", "1.0x", "Minimal"}; - Color mode_color = (client->render_mode == 2) ? YELLOW - : (client->render_mode == 1) ? PUFF_GREEN - : LIGHTGRAY; - DrawText(TextFormat("Scale: %s (Z)", mode_names[client->render_mode]), 10, y, 18, - mode_color); } y += 22; DrawText(TextFormat("Follow: %s (F)", client->follow_mode ? "ON" : "OFF"), 10, y, 18, @@ -676,6 +639,8 @@ void c_render(DroneEnv* env) { y += 18; DrawText("Mouse wheel: Zoom in/out", 10, y, 16, LIGHTGRAY); y += 18; + DrawText("Space: Change task", 10, y, 16, LIGHTGRAY); + y += 18; DrawText(TextFormat("Tab: Inspect mode [%s]", inspect_mode ? "ON" : "OFF"), 10, y, 16, inspect_mode ? PUFF_GREEN : LIGHTGRAY); diff --git a/ocean/drone/task_hover.h b/ocean/drone/task_hover.h index ef3c5d5bb0..578be27e3b 100644 --- a/ocean/drone/task_hover.h +++ b/ocean/drone/task_hover.h @@ -6,21 +6,17 @@ #define HOVER_SCORE_DIST_SCALE 0.01f #define HOVER_SCORE_VEL_SCALE 0.01f -#define HOVER_SCORE_OMEGA_SCALE 0.1f +#define HOVER_SCORE_OMEGA_SCALE 0.01f typedef struct { float target_dist; - float hover_dist; - float hover_omega; - float hover_vel; - float alpha_dist; float alpha_hover; - float alpha_shaping; - float alpha_omega; + float alpha_dist; + float sphere_radius; + int horizon; } HoverConfig; typedef struct { - float* prev_potential; float* score; float* perf; float* ema_dist; @@ -32,7 +28,6 @@ typedef struct { static void hover_init(DroneEnv* env) { HoverState* state = (HoverState*)calloc(1, sizeof(HoverState)); - state->prev_potential = (float*)calloc(env->num_agents, sizeof(float)); state->score = (float*)calloc(env->num_agents, sizeof(float)); state->perf = (float*)calloc(env->num_agents, sizeof(float)); state->ema_dist = (float*)calloc(env->num_agents, sizeof(float)); @@ -44,7 +39,6 @@ static void hover_init(DroneEnv* env) { static void hover_close(DroneEnv* env) { HoverState* state = (HoverState*)env->task_state; if (state != NULL) { - free(state->prev_potential); free(state->score); free(state->perf); free(state->ema_dist); @@ -57,30 +51,14 @@ static void hover_close(DroneEnv* env) { // helpers -static inline void hover_set_target(unsigned int* rng, Drone* agent, float target_dist) { +static inline Vec3 random_ball_offset(unsigned int* rng, float radius) { float u = rndf(0.0f, 1.0f, rng); float v = rndf(0.0f, 1.0f, rng); float z = 2.0f * v - 1.0f; float a = 2.0f * (float)M_PI * u; float r_xy = sqrtf(fmaxf(0.0f, 1.0f - z * z)); Vec3 dir = (Vec3){r_xy * cosf(a), r_xy * sinf(a), z}; - - float rad = target_dist * cbrtf(rndf(0.0f, 1.0f, rng)); - Vec3 p = add3(agent->state.pos, scalmul3(dir, rad)); - - agent->target->pos = (Vec3){ - clampf(p.x, -MARGIN_X, MARGIN_X), - clampf(p.y, -MARGIN_Y, MARGIN_Y), - clampf(p.z, -MARGIN_Z, MARGIN_Z), - }; - agent->target->vel = (Vec3){0.0f, 0.0f, 0.0f}; -} - -static inline float hover_potential(float dist, float vel, float omega, HoverConfig* cfg) { - float d = 1.0f / (1.0f + dist / cfg->hover_dist); - float v = 1.0f / (1.0f + vel / cfg->hover_vel); - float w = 1.0f / (1.0f + omega / cfg->hover_omega); - return d * (0.7f + 0.15f * v + 0.15f * w); + return scalmul3(dir, radius * cbrtf(rndf(0.0f, 1.0f, rng))); } static inline float hover_score(float dist, float vel, float omega) { @@ -91,38 +69,90 @@ static inline float hover_score(float dist, float vel, float omega) { return 1.0f / (1.0f + 0.05f * penalty); } -// callbacks - -static void hover_reset(DroneEnv* env, Drone* agent, int idx) { - HoverConfig* cfg = (HoverConfig*)env->task_config; +static void hover_reset_to(DroneEnv* env, Drone* agent, int idx, Vec3 target, float spawn_dist) { HoverState* state = (HoverState*)env->task_state; - agent->state.pos = random_pos(&env->rng); - hover_set_target(&env->rng, agent, cfg->target_dist); + agent->target->pos = target; + agent->target->vel = (Vec3){0.0f, 0.0f, 0.0f}; + agent->target->normal = (Vec3){0.0f, 0.0f, 0.0f}; + + Vec3 p = add3(target, random_ball_offset(&env->rng, spawn_dist)); + agent->state.pos = (Vec3){ + clampf(p.x, -MARGIN_X, MARGIN_X), + clampf(p.y, -MARGIN_Y, MARGIN_Y), + clampf(p.z, -MARGIN_Z, MARGIN_Z), + }; float dist = norm3(sub3(agent->target->pos, agent->state.pos)); float vel = norm3(agent->state.vel); float omega = norm3(agent->state.omega); - state->score[idx] = 0.0f; state->perf[idx] = hover_score(dist, vel, omega); state->ema_dist[idx] = dist; state->ema_vel[idx] = vel; state->ema_omega[idx] = omega; - state->prev_potential[idx] = hover_potential(dist, vel, omega, cfg); +} + +static inline Vec3 sphere_slot(int idx, int num_agents, float radius) { + float phi = (float)M_PI * (sqrtf(5.0f) - 1.0f); + float y = 1.0f - 2.0f * ((float)idx / (float)num_agents); + float r = sqrtf(fmaxf(0.0f, 1.0f - y * y)); + float theta = phi * (float)idx; + return (Vec3){radius * cosf(theta) * r, radius * sinf(theta) * r, radius * y}; +} + +static inline float cube_axis(int i, int side, float radius) { + if (side <= 1) return 0.0f; + return radius * (2.0f * (float)i / (float)(side - 1) - 1.0f); +} + +static inline Vec3 cube_slot(int idx, int num_agents, float radius) { + float r = radius * 0.57735027f; + int side = (int)ceilf(cbrtf((float)num_agents)); + int x = idx % side; + int y = (idx / side) % side; + int z = idx / (side * side); + return (Vec3){cube_axis(x, side, r), cube_axis(y, side, r), cube_axis(z, side, r)}; +} + +static inline Vec3 flag_slot(int idx) { + float y = (float)(idx % 8) - 3.5f; + float z = 2.5f - 0.75f * (float)(idx / 8); + return (Vec3){0.0f, y, z}; +} + +// callbacks + +static void hover_reset(DroneEnv* env, Drone* agent, int idx) { + HoverConfig* cfg = (HoverConfig*)env->task_config; + hover_reset_to(env, agent, idx, random_pos(&env->rng), cfg->target_dist); +} + +static void sphere_reset(DroneEnv* env, Drone* agent, int idx) { + HoverConfig* cfg = (HoverConfig*)env->task_config; + Vec3 slot = sphere_slot(idx, env->num_agents, cfg->sphere_radius); + hover_reset_to(env, agent, idx, slot, cfg->target_dist); +} + +static void cube_reset(DroneEnv* env, Drone* agent, int idx) { + HoverConfig* cfg = (HoverConfig*)env->task_config; + Vec3 slot = cube_slot(idx, env->num_agents, cfg->sphere_radius); + hover_reset_to(env, agent, idx, slot, cfg->target_dist); +} + +static void flag_reset(DroneEnv* env, Drone* agent, int idx) { + HoverConfig* cfg = (HoverConfig*)env->task_config; + hover_reset_to(env, agent, idx, flag_slot(idx), cfg->target_dist); } static float hover_reward(DroneEnv* env, Drone* agent, int idx, StepCache* cache) { HoverConfig* cfg = (HoverConfig*)env->task_config; HoverState* state = (HoverState*)env->task_state; - float curr = hover_potential(cache->dist, cache->vel, cache->omega, cfg); - float reward = cfg->alpha_dist * (cache->prev_dist - cache->dist) + cfg->alpha_hover * curr + - cfg->alpha_shaping * (curr - state->prev_potential[idx]) - - cfg->alpha_omega * cache->omega; - state->prev_potential[idx] = curr; - float score = hover_score(cache->dist, cache->vel, cache->omega); + float reward = cfg->alpha_hover * score; + reward += cfg->alpha_dist * (cache->prev_dist - cache->dist); + state->score[idx] += score; state->perf[idx] = 0.98f * state->perf[idx] + 0.02f * score; state->ema_dist[idx] = 0.99f * state->ema_dist[idx] + 0.01f * cache->dist; @@ -133,32 +163,18 @@ static float hover_reward(DroneEnv* env, Drone* agent, int idx, StepCache* cache static bool hover_done(DroneEnv* env, Drone* agent, int idx, StepCache* cache) { HoverConfig* cfg = (HoverConfig*)env->task_config; - return cache->dist > (cfg->target_dist + 1.0f) || agent->episode_length >= HORIZON; + return cache->dist > (cfg->target_dist + 1.0f) || agent->episode_length >= cfg->horizon; } static void hover_log(DroneEnv* env, Drone* agent, int idx, Log* log, StepCache* cache) { HoverConfig* cfg = (HoverConfig*)env->task_config; HoverState* state = (HoverState*)env->task_state; - log->score += state->score[idx]; - log->perf += state->perf[idx]; - log_task_add(log, 0, state->ema_dist[idx]); - log_task_add(log, 1, state->ema_vel[idx]); - log_task_add(log, 2, state->ema_omega[idx]); - log_task_add(log, 3, cache->dist > (cfg->target_dist + 1.0f) ? 1.0f : 0.0f); -} - -// definition - -static const Task TASK_HOVER = { - .name = "hover", - .log_keys = {"ema_dist", "ema_vel", "ema_omega", "oob"}, - .num_log_keys = 4, - .init = hover_init, - .close = hover_close, - .env_reset = NULL, - .reset = hover_reset, - .reward = hover_reward, - .done = hover_done, - .log = hover_log, - .render = NULL, -}; + TaskLog* t = &log->task[env->task]; + t->n += 1.0f; + t->perf += state->perf[idx]; + t->score += state->score[idx]; + t->keys[0] += state->ema_dist[idx]; + t->keys[1] += state->ema_vel[idx]; + t->keys[2] += state->ema_omega[idx]; + t->keys[3] += cache->dist > (cfg->target_dist + 1.0f) ? 1.0f : 0.0f; +} diff --git a/ocean/drone/task_race.h b/ocean/drone/task_race.h index a00ba6311f..56798c662e 100644 --- a/ocean/drone/task_race.h +++ b/ocean/drone/task_race.h @@ -2,14 +2,20 @@ #include "drone.h" +#define RACE_OOB_SCALE 2.0f + +#define RACE_RING_MIN_DIST (5.0f * RING_RADIUS) +#define RACE_RING_MAX_DIST 8.0f +#define RACE_RING_SEPARATION (3.0f * RING_RADIUS) +#define RACE_MAX_PLACE_ATTEMPTS 100 + // types typedef struct { int max_rings; float ring_reward; - float collision_penalty; - float time_penalty; float alpha_dist; + int horizon; } RaceConfig; typedef struct { @@ -45,13 +51,49 @@ static void race_close(DroneEnv* env) { // helpers -static inline void reset_rings(unsigned int* rng, Target* ring_buffer, int num_rings) { - ring_buffer[0] = rndring(rng, RING_RADIUS); - for (int i = 1; i < num_rings; i++) { - do { - ring_buffer[i] = rndring(rng, RING_RADIUS); - } while (norm3(sub3(ring_buffer[i].pos, ring_buffer[i - 1].pos)) < 2.0f * RING_RADIUS); +static inline bool ring_overlaps(const Target* rings, int count, Vec3 pos) { + for (int i = 0; i < count; i++) + if (norm3(sub3(rings[i].pos, pos)) < RACE_RING_SEPARATION) return true; + return false; +} + +static inline bool in_gap_band(Vec3 a, Vec3 b) { + float d = norm3(sub3(a, b)); + return d >= RACE_RING_MIN_DIST && d <= RACE_RING_MAX_DIST; +} + +static inline Target gen_next_ring(unsigned int* rng, const Target* rings, int count, + const Target* close) { + const Target* prev = &rings[count - 1]; + Target best = rndring(rng, RING_RADIUS); + bool have_fallback = false; + for (int attempt = 0; attempt < RACE_MAX_PLACE_ATTEMPTS; attempt++) { + Target ring = rndring(rng, RING_RADIUS); + if (!in_gap_band(ring.pos, prev->pos)) continue; + if (ring_overlaps(rings, count, ring.pos)) continue; + if (!have_fallback) { best = ring; have_fallback = true; } + if (close != NULL && !in_gap_band(ring.pos, close->pos)) continue; + return ring; } + return best; +} + +static inline Vec3 path_normal(const Target* rings, int n, int i) { + if (n < 2) return (Vec3){0.0f, 0.0f, 1.0f}; + Vec3 dir = sub3(rings[(i + 1) % n].pos, rings[(i - 1 + n) % n].pos); + float len = norm3(dir); + return len > 1e-6f ? scalmul3(dir, 1.0f / len) : (Vec3){0.0f, 0.0f, 1.0f}; +} + +static inline void center_rings(Target* rings, int n) { + Vec3 lo = rings[0].pos, hi = rings[0].pos; + for (int i = 1; i < n; i++) { + lo.x = fminf(lo.x, rings[i].pos.x); hi.x = fmaxf(hi.x, rings[i].pos.x); + lo.y = fminf(lo.y, rings[i].pos.y); hi.y = fmaxf(hi.y, rings[i].pos.y); + lo.z = fminf(lo.z, rings[i].pos.z); hi.z = fmaxf(hi.z, rings[i].pos.z); + } + Vec3 mid = scalmul3(add3(lo, hi), 0.5f); + for (int i = 0; i < n; i++) rings[i].pos = sub3(rings[i].pos, mid); } static inline int check_ring(Drone* drone, Target* ring) { @@ -70,8 +112,10 @@ static inline int check_ring(Drone* drone, Target* ring) { Vec3 intersection = add3(drone->prev_pos, scalmul3(dir, t)); float d = norm3(sub3(intersection, ring->pos)); - if (d < (ring->radius - 0.5f) && valid_dir) return 1; - if (d < ring->radius + 0.5f) return -1; + // margins scale with radius + float margin = 0.1f * ring->radius; + if (d < (ring->radius - margin) && valid_dir) return 1; + if (d < ring->radius + margin) return -1; } return 0; } @@ -81,80 +125,77 @@ static inline int check_ring(Drone* drone, Target* ring) { static void race_env_reset(DroneEnv* env) { RaceConfig* cfg = (RaceConfig*)env->task_config; RaceState* state = (RaceState*)env->task_state; - reset_rings(&env->rng, state->ring_buffer, cfg->max_rings); + + state->ring_buffer[0] = rndring(&env->rng, RING_RADIUS); + for (int i = 1; i < cfg->max_rings; i++) { + const Target* close = (i == cfg->max_rings - 1) ? &state->ring_buffer[0] : NULL; + state->ring_buffer[i] = gen_next_ring(&env->rng, state->ring_buffer, i, close); + } + + center_rings(state->ring_buffer, cfg->max_rings); + + for (int i = 0; i < cfg->max_rings; i++) { + state->ring_buffer[i].normal = path_normal(state->ring_buffer, cfg->max_rings, i); + } } static void race_reset(DroneEnv* env, Drone* agent, int idx) { + RaceConfig* cfg = (RaceConfig*)env->task_config; RaceState* state = (RaceState*)env->task_state; - do { - agent->state.pos = random_pos(&env->rng); - } while (norm3(sub3(agent->state.pos, state->ring_buffer[0].pos)) < 2.0f * RING_RADIUS); + int g = (int)(rand_r(&env->rng) % cfg->max_rings); + Target* gate = &state->ring_buffer[g]; + + float back = rndf(1.0f, 3.0f, &env->rng); + Vec3 pos = sub3(gate->pos, scalmul3(gate->normal, back)); + pos = add3(pos, (Vec3){rndf(-0.3f, 0.3f, &env->rng), rndf(-0.3f, 0.3f, &env->rng), + rndf(-0.3f, 0.3f, &env->rng)}); + agent->state.pos = (Vec3){ + clampf(pos.x, -MARGIN_X, MARGIN_X), + clampf(pos.y, -MARGIN_Y, MARGIN_Y), + clampf(pos.z, -MARGIN_Z, MARGIN_Z), + }; - state->ring_idx[idx] = 0; + state->ring_idx[idx] = g; state->rings_passed[idx] = 0; state->collisions[idx] = 0.0f; - *agent->target = state->ring_buffer[0]; + *agent->target = *gate; } static float race_reward(DroneEnv* env, Drone* agent, int idx, StepCache* cache) { RaceConfig* cfg = (RaceConfig*)env->task_config; RaceState* state = (RaceState*)env->task_state; + // Distance-progress shaping toward the active gate; speed/omega/action penalties are shared (c_step). float reward = cfg->alpha_dist * (cache->prev_dist - cache->dist); int result = check_ring(agent, &state->ring_buffer[state->ring_idx[idx]]); if (result == 1) { state->rings_passed[idx]++; - state->ring_idx[idx]++; - if (state->ring_idx[idx] < cfg->max_rings) - *agent->target = state->ring_buffer[state->ring_idx[idx]]; + state->ring_idx[idx] = (state->ring_idx[idx] + 1) % cfg->max_rings; + *agent->target = state->ring_buffer[state->ring_idx[idx]]; reward += cfg->ring_reward; } else if (result == -1) { state->collisions[idx] += 1.0f; - reward -= cfg->collision_penalty; } - reward -= cfg->time_penalty; return reward; } static bool race_done(DroneEnv* env, Drone* agent, int idx, StepCache* cache) { RaceConfig* cfg = (RaceConfig*)env->task_config; - RaceState* state = (RaceState*)env->task_state; - return state->rings_passed[idx] >= cfg->max_rings || agent->episode_length >= HORIZON; + return out_of_bounds(agent->state.pos, RACE_OOB_SCALE) || agent->episode_length >= cfg->horizon; } static void race_log(DroneEnv* env, Drone* agent, int idx, Log* log, StepCache* cache) { RaceConfig* cfg = (RaceConfig*)env->task_config; RaceState* state = (RaceState*)env->task_state; - float completed = state->rings_passed[idx] >= cfg->max_rings ? 1.0f : 0.0f; - log->score += (float)state->rings_passed[idx]; - log->perf += completed; - log_task_add(log, 0, (float)state->rings_passed[idx]); - log_task_add(log, 1, state->collisions[idx]); - log_task_add(log, 2, completed); -} - -static void race_render(DroneEnv* env, Client* client) { - RaceConfig* cfg = (RaceConfig*)env->task_config; - RaceState* state = (RaceState*)env->task_state; - for (int i = 0; i < cfg->max_rings; i++) - DrawRing3D(state->ring_buffer[i], 0.2f, GREEN, BLUE); + TaskLog* t = &log->task[TASK_RACE]; + t->n += 1.0f; + t->perf += fminf((float)state->rings_passed[idx] / (float)cfg->max_rings, 1.0f); + t->score += (float)state->rings_passed[idx]; + t->keys[0] += (float)state->rings_passed[idx]; + t->keys[1] += state->collisions[idx]; + t->keys[2] += state->rings_passed[idx] >= cfg->max_rings ? 1.0f : 0.0f; + t->keys[3] += out_of_bounds(agent->state.pos, RACE_OOB_SCALE) ? 1.0f : 0.0f; } - -// definition - -static const Task TASK_RACE = { - .name = "race", - .log_keys = {"rings_passed", "ring_collisions", "completed"}, - .num_log_keys = 3, - .init = race_init, - .close = race_close, - .env_reset = race_env_reset, - .reset = race_reset, - .reward = race_reward, - .done = race_done, - .log = race_log, - .render = race_render, -}; diff --git a/ocean/drone/tasklib.h b/ocean/drone/tasklib.h new file mode 100644 index 0000000000..515badf90b --- /dev/null +++ b/ocean/drone/tasklib.h @@ -0,0 +1,68 @@ +#pragma once + +#include "task_hover.h" +#include "task_race.h" + + +const char* task_name(TaskType task) { + switch (task) { + case TASK_HOVER: return "hover"; + case TASK_RACE: return "race"; + case TASK_SPHERE: return "sphere"; + case TASK_CUBE: return "cube"; + case TASK_FLAG: return "flag"; + } + return "?"; +} + +void task_init(DroneEnv* env) { + switch (env->task) { + case TASK_RACE: race_init(env); break; + default: hover_init(env); break; + } +} + +void task_close(DroneEnv* env) { + switch (env->task) { + case TASK_RACE: race_close(env); break; + default: hover_close(env); break; + } +} + +void task_env_reset(DroneEnv* env) { + switch (env->task) { + case TASK_RACE: race_env_reset(env); break; + default: break; + } +} + +void task_reset(DroneEnv* env, Drone* agent, int idx) { + switch (env->task) { + case TASK_HOVER: hover_reset(env, agent, idx); break; + case TASK_SPHERE: sphere_reset(env, agent, idx); break; + case TASK_CUBE: cube_reset(env, agent, idx); break; + case TASK_FLAG: flag_reset(env, agent, idx); break; + case TASK_RACE: race_reset(env, agent, idx); break; + } +} + +float task_reward(DroneEnv* env, Drone* agent, int idx, StepCache* cache) { + switch (env->task) { + case TASK_RACE: return race_reward(env, agent, idx, cache); + default: return hover_reward(env, agent, idx, cache); + } +} + +bool task_done(DroneEnv* env, Drone* agent, int idx, StepCache* cache) { + switch (env->task) { + case TASK_RACE: return race_done(env, agent, idx, cache); + default: return hover_done(env, agent, idx, cache); + } +} + +void task_log(DroneEnv* env, Drone* agent, int idx, Log* log, StepCache* cache) { + switch (env->task) { + case TASK_RACE: race_log(env, agent, idx, log, cache); break; + default: hover_log(env, agent, idx, log, cache); break; + } +} diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 36e27bf42a..2c9f618176 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -146,7 +146,9 @@ def _params_from_puffer_sweep(sweep_config, only_include=None): for name, param in sweep_config.items(): if name in ('method', 'metric', 'metric_distribution', 'goal', 'downsample', 'use_gpu', 'prune_pareto', - 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs'): + 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs', + 'match_enemy_model_path', 'match_num_games', 'match_enemy_hidden_size', + 'match_enemy_num_layers'): continue assert isinstance(param, dict), f'Param {name} is not a dict' diff --git a/resources/drone/crazyflie.glb b/resources/drone/crazyflie.glb new file mode 100644 index 0000000000..39a3772e43 Binary files /dev/null and b/resources/drone/crazyflie.glb differ diff --git a/resources/drone/drone.blend b/resources/drone/drone.blend new file mode 100644 index 0000000000..aa114fde3e Binary files /dev/null and b/resources/drone/drone.blend differ diff --git a/resources/drone/drone_weights.bin b/resources/drone/drone_weights.bin index df5a47c211..cc7dac039b 100644 Binary files a/resources/drone/drone_weights.bin and b/resources/drone/drone_weights.bin differ