diff --git a/configs/aarch64_defconfig b/configs/aarch64_defconfig index 447424ea5..a061970ef 100644 --- a/configs/aarch64_defconfig +++ b/configs/aarch64_defconfig @@ -162,6 +162,7 @@ BR2_PACKAGE_CURIOS_HTTPD=y BR2_PACKAGE_CURIOS_NFTABLES=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/configs/aarch64_minimal_defconfig b/configs/aarch64_minimal_defconfig index c00579d02..99ad64997 100644 --- a/configs/aarch64_minimal_defconfig +++ b/configs/aarch64_minimal_defconfig @@ -131,6 +131,7 @@ BR2_PACKAGE_NETD=y BR2_PACKAGE_CONFD_TEST_MODE=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/configs/arm_defconfig b/configs/arm_defconfig index c08ba158a..0789b6a67 100644 --- a/configs/arm_defconfig +++ b/configs/arm_defconfig @@ -149,6 +149,7 @@ BR2_PACKAGE_NETD=y BR2_PACKAGE_CONFD_TEST_MODE=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/configs/arm_minimal_defconfig b/configs/arm_minimal_defconfig index b9090d2f9..b112f3bdf 100644 --- a/configs/arm_minimal_defconfig +++ b/configs/arm_minimal_defconfig @@ -129,6 +129,7 @@ BR2_PACKAGE_NETD=y BR2_PACKAGE_CONFD_TEST_MODE=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/configs/riscv64_defconfig b/configs/riscv64_defconfig index b687676fc..fa400a510 100644 --- a/configs/riscv64_defconfig +++ b/configs/riscv64_defconfig @@ -181,6 +181,7 @@ BR2_PACKAGE_CONFD=y BR2_PACKAGE_NETD=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/configs/x86_64_defconfig b/configs/x86_64_defconfig index 981cabf5b..cb2e8ba5c 100644 --- a/configs/x86_64_defconfig +++ b/configs/x86_64_defconfig @@ -156,6 +156,7 @@ BR2_PACKAGE_CURIOS_HTTPD=y BR2_PACKAGE_CURIOS_NFTABLES=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/configs/x86_64_minimal_defconfig b/configs/x86_64_minimal_defconfig index 893163ec5..4afe6c951 100644 --- a/configs/x86_64_minimal_defconfig +++ b/configs/x86_64_minimal_defconfig @@ -128,6 +128,7 @@ BR2_PACKAGE_NETD=y BR2_PACKAGE_CONFD_TEST_MODE=y BR2_PACKAGE_GENCERT=y BR2_PACKAGE_STATD=y +BR2_PACKAGE_YANGERD=y BR2_PACKAGE_FACTORY=y BR2_PACKAGE_FINIT_PLUGIN_HOTPLUG=y BR2_PACKAGE_FINIT_PLUGIN_HOOK_SCRIPTS=y diff --git a/package/Config.in b/package/Config.in index 6998f5804..86de30fa0 100644 --- a/package/Config.in +++ b/package/Config.in @@ -13,6 +13,7 @@ source "$BR2_EXTERNAL_INFIX_PATH/package/curios-httpd/Config.in" source "$BR2_EXTERNAL_INFIX_PATH/package/curios-nftables/Config.in" source "$BR2_EXTERNAL_INFIX_PATH/package/gencert/Config.in" source "$BR2_EXTERNAL_INFIX_PATH/package/statd/Config.in" +source "$BR2_EXTERNAL_INFIX_PATH/package/yangerd/Config.in" source "$BR2_EXTERNAL_INFIX_PATH/package/factory/Config.in" source "$BR2_EXTERNAL_INFIX_PATH/package/faux/Config.in" source "$BR2_EXTERNAL_INFIX_PATH/package/finit/Config.in" diff --git a/package/yangerd/Config.in b/package/yangerd/Config.in new file mode 100644 index 000000000..1720c401b --- /dev/null +++ b/package/yangerd/Config.in @@ -0,0 +1,7 @@ +config BR2_PACKAGE_YANGERD + bool "yangerd" + depends on BR2_PACKAGE_HOST_GO_TARGET_ARCH_SUPPORTS + help + Operational data daemon for YANG/NETCONF/RESTCONF. + Replaces Python yanger scripts with a persistent Go daemon + serving operational data over a Unix socket IPC protocol. diff --git a/package/yangerd/yangerd.conf b/package/yangerd/yangerd.conf new file mode 100644 index 000000000..e472e68ca --- /dev/null +++ b/package/yangerd/yangerd.conf @@ -0,0 +1,3 @@ +service <> name:yangerd log:prio:daemon.notice,tag:yangerd \ + env:-/etc/default/yangerd \ + [2345] yangerd -- Operational data daemon diff --git a/package/yangerd/yangerd.mk b/package/yangerd/yangerd.mk new file mode 100644 index 000000000..b12ad4b51 --- /dev/null +++ b/package/yangerd/yangerd.mk @@ -0,0 +1,35 @@ +################################################################################ +# +# yangerd +# +################################################################################ + +YANGERD_VERSION = 1.0.0 +YANGERD_SITE = $(BR2_EXTERNAL_INFIX_PATH)/src/yangerd +YANGERD_SITE_METHOD = local +YANGERD_GOMOD = github.com/kernelkit/infix/src/yangerd +YANGERD_LICENSE = BSD-2-Clause +YANGERD_LICENSE_FILES = LICENSE +YANGERD_REDISTRIBUTE = NO + +YANGERD_BUILD_TARGETS = cmd/yangerd cmd/yangerctl +YANGERD_INSTALL_BINS = yangerd yangerctl + +define YANGERD_INSTALL_EXTRA + $(INSTALL) -D -m 0644 $(YANGERD_PKGDIR)/yangerd.conf \ + $(FINIT_D)/available/yangerd.conf + ln -sf ../available/yangerd.conf $(FINIT_D)/enabled/yangerd.conf + $(INSTALL) -d $(TARGET_DIR)/etc/default + echo '# yangerd build-time feature flags (generated by yangerd.mk)' \ + > $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_ENABLE_WIFI=$(if $(BR2_PACKAGE_IW),true,false)' \ + >> $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_ENABLE_CONTAINERS=$(if $(BR2_PACKAGE_PODMAN),true,false)' \ + >> $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_ENABLE_GPS=$(if $(BR2_PACKAGE_GPSD),true,false)' \ + >> $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_LOG_LEVEL=deug' >> $(TARGET_DIR)/etc/default/yangerd +endef +YANGERD_POST_INSTALL_TARGET_HOOKS += YANGERD_INSTALL_EXTRA + +$(eval $(golang-package)) diff --git a/src/confd/src/core.c b/src/confd/src/core.c index 956b5501c..4d4365bcd 100644 --- a/src/confd/src/core.c +++ b/src/confd/src/core.c @@ -670,6 +670,15 @@ static int change_cb(sr_session_ctx_t *session, uint32_t sub_id, const char *mod return SR_ERR_SYS; } + /* + Send sighup to yangerd to trigger a poll of polled values. + This will make sure that there is no stale data. + */ + if (systemf("initctl -b reload yangerd")) { + EMERG("Failed reloading yangerd"); + return SR_ERR_SYS; + } + AUDIT("The new configuration has been applied."); } diff --git a/src/statd/Makefile.am b/src/statd/Makefile.am index 727583daa..2e6d58d54 100644 --- a/src/statd/Makefile.am +++ b/src/statd/Makefile.am @@ -2,7 +2,7 @@ DISTCLEANFILES = *~ *.d ACLOCAL_AMFLAGS = -I m4 sbin_PROGRAMS = statd -statd_SOURCES = statd.c shared.c shared.h journal.c journal_retention.c journal.h avahi.c avahi.h +statd_SOURCES = statd.c shared.c shared.h journal.c journal_retention.c journal.h avahi.c avahi.h yangerd.c yangerd.h statd_CPPFLAGS = -D_DEFAULT_SOURCE -D_GNU_SOURCE statd_CFLAGS = -W -Wall -Wextra statd_CFLAGS += $(jansson_CFLAGS) $(libyang_CFLAGS) $(sysrepo_CFLAGS) diff --git a/src/statd/statd.c b/src/statd/statd.c index dac055836..8530d6964 100644 --- a/src/statd/statd.c +++ b/src/statd/statd.c @@ -21,23 +21,16 @@ #include #include #include -#include #include #include #include -#include #include "shared.h" #include "journal.h" #include "avahi.h" +#include "yangerd.h" -/* New kernel feature, not in sys/mman.h yet */ -#ifndef MFD_NOEXEC_SEAL -#define MFD_NOEXEC_SEAL 0x0008U -#endif - -#define YANGER_BINPATH YANGER_DIR"/yanger" #define XPATH_MAX PATH_MAX #define XPATH_IFACE_BASE "/ietf-interfaces:interfaces" #define XPATH_ROUTING_BASE "/ietf-routing:routing/control-plane-protocols/control-plane-protocol" @@ -59,6 +52,7 @@ TAILQ_HEAD(sub_head, sub); struct sub { struct ev_io watcher; sr_subscription_ctx_t *sr_sub; + char key[XPATH_MAX]; /* yangerd key, derived from the subscription xpath */ TAILQ_ENTRY(sub) entries; @@ -74,98 +68,60 @@ struct statd { struct mdns_ctx mdns; /* mDNS neighbor monitor */ }; -static int ly_add_yanger_data(const struct ly_ctx *ctx, struct lyd_node **parent, - char *yanger_args[]) +static int ly_add_yangerd_data(const struct ly_ctx *ctx, struct lyd_node **parent, + const char *path) { - FILE *stream; + char *json = NULL; + size_t len = 0; int err; - int fd; - - fd = memfd_create("yanger_tmpfile", MFD_CLOEXEC | MFD_NOEXEC_SEAL); - if (fd == -1) { - ERROR("Error, unable to create memfd"); - return SR_ERR_SYS; - } - /* Wrap the file descriptor in a FILE stream for fwrite */ - stream = fdopen(fd, "w+"); - if (stream == NULL) { - ERROR("Error, unable to fdopen memfd"); - close(fd); - return SR_ERR_SYS; - } - - err = fsystemv(yanger_args, NULL, stream, NULL); + err = yangerd_query(path, &json, &len); if (err) { - ERROR("Error, running yanger"); - fclose(stream); + free(json); + ERROR("yangerd: query failed for %s", path); return SR_ERR_SYS; } - fflush(stream); - - if (lseek(fd, 0, SEEK_SET) == (off_t)-1) { - ERROR("Error, unable reset stream (seek)"); - fclose(stream); - return SR_ERR_SYS; - } + NOTE("yangerd: got %zu bytes JSON for %s", len, path); - err = lyd_parse_data_fd(ctx, fd, LYD_JSON, LYD_PARSE_ONLY, 0, parent); + err = lyd_parse_data_mem(ctx, json, LYD_JSON, LYD_PARSE_ONLY, 0, parent); if (err) ERROR("Error, parsing yanger data (%d): %s", err, ly_errmsg(ctx)); - fclose(stream); - /* Note: fclose() already closes the underlying fd from fdopen() */ - + free(json); return err; } -static char *xpath_extract(const char *xpath, const char *key) +static const char *xpath_to_yangerd_path(const char *xpath, char *buf, size_t bufsz) { - char *res = NULL; - const char *ptr; - const char *end; - - /* (also checks if key exist) */ - ptr = strstr(xpath, key); - if (!ptr) - return NULL; + const char *start, *slash; + size_t len; - ptr += strlen(key); - - end = strchr(ptr, '\''); - if (!end) { - ERROR("Can't find end quote for %s (sanity check)", key); - return NULL; + if (!xpath || !*xpath || !strcmp(xpath, "*") || !strcmp(xpath, "/*")) { + buf[0] = '\0'; + return buf; } - if ((end - ptr) >= XPATH_MAX) { - ERROR("Value for %s is to long (sanity check)", key); - return NULL; - } + start = xpath; + if (*start == '/') + start++; - res = calloc((end - ptr) + 1, sizeof(char)); - if (!res) - return NULL; + slash = strchr(start, '/'); + len = slash ? (size_t)(slash - start) : strlen(start); - strncpy(res, ptr, end - ptr); - res[end - ptr] = '\0'; + if (len >= bufsz) + len = bufsz - 1; - return res; + memcpy(buf, start, len); + buf[len] = '\0'; + + return buf; } -static int sr_iface_cb(sr_session_ctx_t *session, uint32_t, const char *model, +static int sr_iface_cb(sr_session_ctx_t *session, uint32_t, const char *, const char *, const char *xpath, uint32_t, struct lyd_node **parent, __attribute__((unused)) void *priv) { - char *yanger_args[5] = { - YANGER_BINPATH, - (char *)model, - NULL, - NULL, - NULL - }; - char *ifname = NULL; const struct ly_ctx *ctx; sr_conn_ctx_t *con; int err; @@ -184,34 +140,25 @@ static int sr_iface_cb(sr_session_ctx_t *session, uint32_t, const char *model, return SR_ERR_INTERNAL; } - ifname = xpath_extract(xpath, "[name='"); - if (ifname) { - yanger_args[2] = "-p"; - yanger_args[3] = ifname; - } - err = ly_add_yanger_data(ctx, parent, yanger_args); + err = ly_add_yangerd_data(ctx, parent, "ietf-interfaces:interfaces"); if (err) - ERROR("Error adding interface yanger data"); + ERROR("Error adding interface data (err %d)", err); sr_release_context(con); - return SR_ERR_OK; + return err ? SR_ERR_INTERNAL : SR_ERR_OK; } -static int sr_generic_cb(sr_session_ctx_t *session, uint32_t, const char *model, +static int sr_generic_cb(sr_session_ctx_t *session, uint32_t, const char *, const char *, const char *xpath, uint32_t, - struct lyd_node **parent, __attribute__((unused)) void *priv) + struct lyd_node **parent, void *priv) { - char *yanger_args[5] = { - YANGER_BINPATH, - (char *)model, - NULL - }; + struct sub *sub = priv; const struct ly_ctx *ctx; sr_conn_ctx_t *con; sr_error_t err; - DEBUG("Incoming generic query for xpath: %s", xpath); + DEBUG("Incoming generic query for xpath: %s -> key %s", xpath, sub->key); con = sr_session_get_connection(session); if (!con) { @@ -225,9 +172,9 @@ static int sr_generic_cb(sr_session_ctx_t *session, uint32_t, const char *model, return SR_ERR_INTERNAL; } - err = ly_add_yanger_data(ctx, parent, yanger_args); + err = ly_add_yangerd_data(ctx, parent, sub->key); if (err) - ERROR("Error adding yanger data"); + ERROR("Error adding data for %s", sub->key); sr_release_context(con); @@ -238,11 +185,6 @@ static int sr_ospf_cb(sr_session_ctx_t *session, uint32_t, const char *, const char *, const char *xpath, uint32_t, struct lyd_node **parent, __attribute__((unused)) void *priv) { - char *yanger_args[5] = { - YANGER_BINPATH, - "ietf-ospf", - NULL - }; const struct ly_ctx *ctx; sr_conn_ctx_t *con; sr_error_t err; @@ -261,9 +203,9 @@ static int sr_ospf_cb(sr_session_ctx_t *session, uint32_t, const char *, return SR_ERR_INTERNAL; } - err = ly_add_yanger_data(ctx, parent, yanger_args); + err = ly_add_yangerd_data(ctx, parent, "ietf-routing:routing"); if (err) - ERROR("Error adding yanger data"); + ERROR("Error adding OSPF data"); sr_release_context(con); @@ -274,11 +216,6 @@ static int sr_rip_cb(sr_session_ctx_t *session, uint32_t, const char *, const char *, const char *xpath, uint32_t, struct lyd_node **parent, __attribute__((unused)) void *priv) { - char *yanger_args[5] = { - YANGER_BINPATH, - "ietf-rip", - NULL - }; const struct ly_ctx *ctx; sr_conn_ctx_t *con; sr_error_t err; @@ -297,9 +234,9 @@ static int sr_rip_cb(sr_session_ctx_t *session, uint32_t, const char *, return SR_ERR_INTERNAL; } - err = ly_add_yanger_data(ctx, parent, yanger_args); + err = ly_add_yangerd_data(ctx, parent, "ietf-routing:routing"); if (err) - ERROR("Error adding yanger data"); + ERROR("Error adding RIP data"); sr_release_context(con); @@ -310,11 +247,6 @@ static int sr_bfd_cb(sr_session_ctx_t *session, uint32_t, const char *, const char *, const char *xpath, uint32_t, struct lyd_node **parent, __attribute__((unused)) void *priv) { - char *yanger_args[5] = { - YANGER_BINPATH, - "ietf-bfd-ip-sh", - NULL - }; const struct ly_ctx *ctx; sr_conn_ctx_t *con; sr_error_t err; @@ -333,9 +265,9 @@ static int sr_bfd_cb(sr_session_ctx_t *session, uint32_t, const char *, return SR_ERR_INTERNAL; } - err = ly_add_yanger_data(ctx, parent, yanger_args); + err = ly_add_yangerd_data(ctx, parent, "ietf-routing:routing"); if (err) - ERROR("Error adding yanger data"); + ERROR("Error adding BFD data"); sr_release_context(con); @@ -379,7 +311,16 @@ static int subscribe(struct statd *statd, char *model, char *xpath, sub = malloc(sizeof(struct sub)); memset(sub, 0, sizeof(struct sub)); - DEBUG("Subscribe to events for \"%s\"", xpath); + /* + * Derive the yangerd key from the (static) subscription xpath here, + * once. The generic callback must NOT derive it from the runtime + * request xpath sysrepo hands it -- that is unreliable and yields a + * bare "system-state" for /ietf-system:system-state, which yangerd + * (keyed "ietf-system:system-state") cannot match. + */ + xpath_to_yangerd_path(xpath, sub->key, sizeof(sub->key)); + + DEBUG("Subscribe to events for \"%s\" (key \"%s\")", xpath, sub->key); err = sr_oper_get_subscribe(statd->sr_ses, model, xpath, cb, sub, SR_SUBSCR_DEFAULT | SR_SUBSCR_NO_THREAD | SR_SUBSCR_DONE_ONLY, &sub->sr_sub); diff --git a/src/statd/yangerd.c b/src/statd/yangerd.c new file mode 100644 index 000000000..328dff5ed --- /dev/null +++ b/src/statd/yangerd.c @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "yangerd.h" + +static const char *yangerd_socket_path(void) +{ + const char *env; + + env = getenv("YANGERD_SOCKET"); + if (env && *env) + return env; + + return YANGERD_SOCKET_DEFAULT; +} + +static int yangerd_connect(void) +{ + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + struct timeval tv = { .tv_sec = YANGERD_TIMEOUT_SEC }; + const char *path; + int fd; + + path = yangerd_socket_path(); + if (strlen(path) >= sizeof(addr.sun_path)) { + ERROR("yangerd socket path too long: %s", path); + return -1; + } + strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + + fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd < 0) { + ERROR("yangerd: socket(): %s", strerror(errno)); + return -1; + } + + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); + + if (connect(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + DEBUG("yangerd: connect(%s): %s", path, strerror(errno)); + close(fd); + return -1; + } + + return fd; +} + +static int yangerd_write_all(int fd, const void *buf, size_t len) +{ + const unsigned char *p = buf; + + while (len > 0) { + ssize_t n = write(fd, p, len); + + if (n < 0) { + if (errno == EINTR) + continue; + return -1; + } + p += n; + len -= n; + } + + return 0; +} + +static int yangerd_read_all(int fd, void *buf, size_t len) +{ + unsigned char *p = buf; + + while (len > 0) { + ssize_t n = read(fd, p, len); + + if (n < 0) { + if (errno == EINTR) + continue; + return -1; + } + if (n == 0) { + errno = ECONNRESET; + return -1; + } + p += n; + len -= n; + } + + return 0; +} + +static int yangerd_send_request(int fd, const char *path) +{ + json_t *req; + char *json_str; + size_t json_len; + unsigned char hdr[5]; + int rc = -1; + + req = json_pack("{s:s, s:s}", "method", "get", "path", path); + if (!req) + return -1; + + json_str = json_dumps(req, JSON_COMPACT); + json_decref(req); + if (!json_str) + return -1; + + json_len = strlen(json_str); + if (json_len > YANGERD_MAX_PAYLOAD) { + free(json_str); + return -1; + } + + hdr[0] = YANGERD_PROTO_VERSION; + hdr[1] = (json_len >> 24) & 0xff; + hdr[2] = (json_len >> 16) & 0xff; + hdr[3] = (json_len >> 8) & 0xff; + hdr[4] = (json_len >> 0) & 0xff; + + if (yangerd_write_all(fd, hdr, sizeof(hdr)) < 0) + goto out; + if (yangerd_write_all(fd, json_str, json_len) < 0) + goto out; + + rc = 0; +out: + free(json_str); + return rc; +} + +static int yangerd_recv_response(int fd, char **buf, size_t *len) +{ + unsigned char hdr[5]; + uint32_t payload_len; + json_error_t jerr; + json_t *resp; + json_t *status; + json_t *data; + char *body; + char *data_str; + + *buf = NULL; + *len = 0; + + if (yangerd_read_all(fd, hdr, sizeof(hdr)) < 0) + return -1; + + if (hdr[0] != YANGERD_PROTO_VERSION) { + ERROR("yangerd: protocol version mismatch: got %u, want %u", + hdr[0], YANGERD_PROTO_VERSION); + return -1; + } + + payload_len = ((uint32_t)hdr[1] << 24) | + ((uint32_t)hdr[2] << 16) | + ((uint32_t)hdr[3] << 8) | + ((uint32_t)hdr[4]); + + if (payload_len > YANGERD_MAX_PAYLOAD) { + ERROR("yangerd: payload too large: %u", payload_len); + return -1; + } + + body = malloc(payload_len + 1); + if (!body) + return -1; + + if (yangerd_read_all(fd, body, payload_len) < 0) { + free(body); + return -1; + } + body[payload_len] = '\0'; + + resp = json_loads(body, 0, &jerr); + free(body); + if (!resp) { + ERROR("yangerd: invalid response JSON: %s", jerr.text); + return -1; + } + + status = json_object_get(resp, "status"); + if (!json_is_string(status) || strcmp(json_string_value(status), "ok")) { + json_t *msg = json_object_get(resp, "message"); + + ERROR("yangerd: request failed: %s", + json_is_string(msg) ? json_string_value(msg) : "unknown"); + json_decref(resp); + return -1; + } + + data = json_object_get(resp, "data"); + if (!data || json_is_null(data)) { + json_decref(resp); + *buf = strdup("{}"); + *len = 2; + return 0; + } + + data_str = json_dumps(data, JSON_COMPACT); + json_decref(resp); + if (!data_str) + return -1; + + *buf = data_str; + *len = strlen(data_str); + + return 0; +} + +int yangerd_query(const char *path, char **buf, size_t *len) +{ + int fd; + int rc; + + *buf = NULL; + *len = 0; + + fd = yangerd_connect(); + if (fd < 0) + return -1; + + if (yangerd_send_request(fd, path) < 0) { + ERROR("yangerd: failed sending request for %s", path); + close(fd); + return -1; + } + + rc = yangerd_recv_response(fd, buf, len); + if (rc < 0) + ERROR("yangerd: failed reading response for %s", path); + + close(fd); + + return rc; +} diff --git a/src/statd/yangerd.h b/src/statd/yangerd.h new file mode 100644 index 000000000..e1421a292 --- /dev/null +++ b/src/statd/yangerd.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef STATD_YANGERD_H_ +#define STATD_YANGERD_H_ + +#include + +#define YANGERD_SOCKET_DEFAULT "/run/yangerd.sock" +#define YANGERD_TIMEOUT_SEC 5 +#define YANGERD_MAX_PAYLOAD (4 << 20) /* 4 MiB, matches Go side */ +#define YANGERD_PROTO_VERSION 0x01 + +/** + * yangerd_query() - Query yangerd daemon for operational YANG data + * @path: YANG model path, e.g. "ietf-interfaces:interfaces" + * @buf: Output pointer to malloc'd JSON string (caller must free) + * @len: Output length of JSON data + * + * Connects to the yangerd Unix socket, sends a "get" request for @path, + * reads the framed response, and extracts the "data" field as a JSON + * string. The socket path defaults to %YANGERD_SOCKET_DEFAULT but can + * be overridden with the YANGERD_SOCKET environment variable. + * + * Return: 0 on success, -1 on error (buf is set to NULL). + */ +int yangerd_query(const char *path, char **buf, size_t *len); + +#endif diff --git a/src/yangerd/.gitignore b/src/yangerd/.gitignore new file mode 100644 index 000000000..d5b437a9f --- /dev/null +++ b/src/yangerd/.gitignore @@ -0,0 +1,3 @@ +# Build artifacts (root-level binaries only; do not match cmd/ source dirs) +/yangerd +/yangerctl diff --git a/src/yangerd/LICENSE b/src/yangerd/LICENSE new file mode 100644 index 000000000..bf7aa8c9e --- /dev/null +++ b/src/yangerd/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2025 The KernelKit Authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of copyright holders nor the names of + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/yangerd/cmd/yangerctl/main.go b/src/yangerd/cmd/yangerctl/main.go new file mode 100644 index 000000000..a4ef55d30 --- /dev/null +++ b/src/yangerd/cmd/yangerctl/main.go @@ -0,0 +1,110 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/ipc" +) + +const defaultSocket = "/run/yangerd.sock" +const defaultTimeout = 5 * time.Second + +func main() { + socket := defaultSocket + timeout := defaultTimeout + + args := os.Args[1:] + for len(args) > 0 && len(args[0]) > 0 && args[0][0] == '-' { + switch args[0] { + case "--socket": + if len(args) < 2 { + die("--socket requires an argument") + } + socket = args[1] + args = args[2:] + case "--timeout": + if len(args) < 2 { + die("--timeout requires an argument") + } + d, err := time.ParseDuration(args[1]) + if err != nil { + die("invalid duration: %v", err) + } + timeout = d + args = args[2:] + default: + die("unknown flag: %s", args[0]) + } + } + + if len(args) == 0 { + usage() + } + + client := ipc.NewClient(socket, timeout) + + switch args[0] { + case "get": + if len(args) < 2 { + die("get requires a path argument") + } + resp, err := client.Get(args[1]) + if err != nil { + die("get: %v", err) + } + printResponse(resp) + case "dump": + resp, err := client.Get("/") + if err != nil { + die("dump: %v", err) + } + printResponse(resp) + case "health": + resp, err := client.Health() + if err != nil { + die("health: %v", err) + } + printResponse(resp) + default: + die("unknown command: %s", args[0]) + } +} + +func printResponse(resp *ipc.Response) { + if resp.Code == 503 { + fmt.Fprintf(os.Stderr, "yangerd is starting up\n") + os.Exit(3) + } + if resp.Status == "error" { + fmt.Fprintf(os.Stderr, "error %d: %s\n", resp.Code, resp.Message) + if resp.Code == 404 { + os.Exit(2) + } + os.Exit(1) + } + + var out []byte + if resp.Data != nil { + out, _ = json.MarshalIndent(json.RawMessage(resp.Data), "", " ") + } else { + out, _ = json.MarshalIndent(resp, "", " ") + } + fmt.Println(string(out)) +} + +func usage() { + fmt.Fprintf(os.Stderr, "Usage: yangerctl [--socket path] [--timeout dur] [args]\n\n") + fmt.Fprintf(os.Stderr, "Commands:\n") + fmt.Fprintf(os.Stderr, " get Query a YANG subtree\n") + fmt.Fprintf(os.Stderr, " dump Dump entire tree\n") + fmt.Fprintf(os.Stderr, " health Show daemon health\n") + os.Exit(1) +} + +func die(format string, args ...interface{}) { + fmt.Fprintf(os.Stderr, "yangerctl: "+format+"\n", args...) + os.Exit(1) +} diff --git a/src/yangerd/cmd/yangerd/main.go b/src/yangerd/cmd/yangerd/main.go new file mode 100644 index 000000000..65077290b --- /dev/null +++ b/src/yangerd/cmd/yangerd/main.go @@ -0,0 +1,434 @@ +package main + +import ( + "context" + "encoding/json" + "log" + "log/slog" + "os" + "os/signal" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/bridgebatch" + "github.com/kernelkit/infix/src/yangerd/internal/collector" + "github.com/kernelkit/infix/src/yangerd/internal/config" + "github.com/kernelkit/infix/src/yangerd/internal/containermonitor" + "github.com/kernelkit/infix/src/yangerd/internal/dbusmonitor" + "github.com/kernelkit/infix/src/yangerd/internal/ethmonitor" + "github.com/kernelkit/infix/src/yangerd/internal/frrvty" + "github.com/kernelkit/infix/src/yangerd/internal/fswatcher" + "github.com/kernelkit/infix/src/yangerd/internal/ipbatch" + "github.com/kernelkit/infix/src/yangerd/internal/ipc" + "github.com/kernelkit/infix/src/yangerd/internal/iwmonitor" + "github.com/kernelkit/infix/src/yangerd/internal/lldpmonitor" + "github.com/kernelkit/infix/src/yangerd/internal/monitor" + "github.com/kernelkit/infix/src/yangerd/internal/sysreaders" + "github.com/kernelkit/infix/src/yangerd/internal/tree" + "github.com/kernelkit/infix/src/yangerd/internal/wgquery" + "github.com/kernelkit/infix/src/yangerd/internal/zapiwatcher" +) + +// osFileChecker implements iface.FileChecker using the real filesystem. +type osFileChecker struct{} + +func (osFileChecker) Exists(path string) bool { + _, err := os.Stat(path) + return err == nil +} + +func (osFileChecker) ReadFile(path string) (string, error) { + b, err := os.ReadFile(path) + if err != nil { + return "", err + } + return string(b), nil +} + +func main() { + cfg := config.Load() + log.SetFlags(0) + + t := tree.New() + ready := &atomic.Bool{} + + srv := ipc.NewServer(t, ready) + if err := srv.Listen(cfg.Socket); err != nil { + log.Fatalf("listen %s: %v", cfg.Socket, err) + } + defer os.Remove(cfg.Socket) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var wg sync.WaitGroup + cmd := collector.ExecRunner{} + fs := collector.OSFileReader{} + collectors := []collector.Collector{ + collector.NewSystemCollector(cmd, fs, cfg.PollSystem), + collector.NewRoutingCollector(cmd, cfg.PollRouting), + collector.NewNTPCollector(cmd, cfg.PollNTP), + collector.NewHardwareCollector(cmd, fs, cfg.PollHardware, cfg.EnableWifi, cfg.EnableGPS), + } + pokeCh := make(chan struct{}, len(collectors)) + collector.RunAll(ctx, &wg, t, collectors, pokeCh) + + inst := collector.DBusInstaller{} + t.RegisterProvider("ietf-system:system-state", func() json.RawMessage { + live := collector.LiveSystemState(fs) + installerOverlay := collector.MergeInstaller(t.GetCached("ietf-system:system-state"), inst) + if installerOverlay == nil { + return live + } + var base map[string]json.RawMessage + if json.Unmarshal(live, &base) != nil { + return live + } + var overlay map[string]json.RawMessage + if json.Unmarshal(installerOverlay, &overlay) != nil { + return live + } + for k, v := range overlay { + base[k] = v + } + merged, err := json.Marshal(base) + if err != nil { + return live + } + return merged + }) + + if data := collector.BootPlatform(fs); data != nil { + t.Merge("ietf-system:system-state", data) + } + if data := collector.BootSoftware(ctx, cmd); data != nil { + t.Merge("ietf-system:system-state", data) + } + + slogLog := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slogLevel(cfg.LogLevel)})) + + linkBatch, err := ipbatch.New(ctx, slogLog, ipbatch.WithStats(), ipbatch.WithDetails()) + if err != nil { + log.Fatalf("start link batch: %v", err) + } + defer linkBatch.Close() + + addrBatch, err := ipbatch.New(ctx, slogLog, ipbatch.WithDetails()) + if err != nil { + log.Fatalf("start addr batch: %v", err) + } + defer addrBatch.Close() + + neighBatch, err := ipbatch.New(ctx, slogLog) + if err != nil { + log.Fatalf("start neigh batch: %v", err) + } + defer neighBatch.Close() + + brBatch, err := bridgebatch.New(ctx, slogLog) + if err != nil { + log.Fatalf("start bridge batch: %v", err) + } + defer brBatch.Close() + + nlmon := monitor.New(linkBatch, addrBatch, neighBatch, brBatch, t, osFileChecker{}, slogLog) + + ethMon, err := ethmonitor.New(slogLog, cmd) + if err != nil { + slogLog.Warn("ethmonitor unavailable, continuing without it", "err", err) + } else { + ethMon.SetOnUpdate(nlmon.SetEthernetData) + nlmon.SetEthRefresh(ethMon.RefreshInterface) + wg.Add(1) + go func() { + defer wg.Done() + if err := ethMon.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("ethmonitor exited", "err", err) + } + }() + } + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + <-nlmon.WaitReady() + for { + links := nlmon.Links() + for ifname, data := range wgquery.Query(links) { + nlmon.SetWireguardData(ifname, data) + } + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + } + }() + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(cfg.PollSTP) + defer ticker.Stop() + <-nlmon.WaitReady() + for { + nlmon.RefreshSTP() + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + } + }() + + wg.Add(1) + go func() { + defer wg.Done() + for { + if err := nlmon.Run(ctx); err != nil { + if ctx.Err() != nil { + return + } + slogLog.Error("nlmonitor exited, restarting in 1s", "err", err) + select { + case <-ctx.Done(): + return + case <-time.After(time.Second): + } + } else { + return + } + } + }() + + if cfg.EnableWifi { + iwmon := iwmonitor.New(slogLog) + iwmon.SetOnUpdate(nlmon.SetWifiData) + iwmon.SetOnPhyChange(func() { + select { + case pokeCh <- struct{}{}: + default: + } + }) + wg.Add(1) + go func() { + defer wg.Done() + if err := iwmon.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("iwmonitor exited", "err", err) + } + }() + } + + if cfg.EnableLLDP { + lldpmon := lldpmonitor.New(t, slogLog) + wg.Add(1) + go func() { + defer wg.Done() + if err := lldpmon.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("lldpmonitor exited", "err", err) + } + }() + } + + if cfg.EnableContainers { + ctrmon := containermonitor.New(t, cmd, fs, slogLog) + wg.Add(1) + go func() { + defer wg.Done() + if err := ctrmon.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("containermonitor exited", "err", err) + } + }() + } + + zapi := zapiwatcher.New(t, frrvty.New(""), slogLog) + wg.Add(1) + go func() { + defer wg.Done() + if err := zapi.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("zapiwatcher exited", "err", err) + } + }() + + if cfg.EnableDHCP || cfg.EnableFirewall { + dbusMon := dbusmonitor.New(t, slogLog) + wg.Add(1) + go func() { + defer wg.Done() + if err := dbusMon.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("dbusmonitor exited", "err", err) + } + }() + } + + fsw, err := fswatcher.New(t, slogLog) + if err != nil { + log.Fatalf("start fswatcher: %v", err) + } + + fwdAgg := sysreaders.NewForwardingAggregator() + forwardingPaths := []string{ + "/proc/sys/net/ipv4/conf/*/forwarding", + "/proc/sys/net/ipv6/conf/*/forwarding", + } + for _, pattern := range forwardingPaths { + matches, globErr := filepath.Glob(pattern) + if globErr != nil { + slogLog.Warn("fswatcher glob failed", "pattern", pattern, "err", globErr) + continue + } + for _, path := range matches { + if err := fsw.Watch(path, fswatcher.WatchHandler{ + TreeKey: routingTreeKey, + ReadFunc: fwdAgg.HandleForwardingChange, + Debounce: 100 * time.Millisecond, + UseMerge: true, + }); err != nil { + slogLog.Warn("fswatcher watch failed", "path", path, "err", err) + } + } + } + if err := fsw.Watch("/etc/hostname", fswatcher.WatchHandler{ + TreeKey: "ietf-system:system", + ReadFunc: sysreaders.ReadHostname, + Debounce: 200 * time.Millisecond, + UseMerge: true, + }); err != nil { + slogLog.Warn("fswatcher watch failed", "path", "/etc/hostname", "err", err) + } + if err := fsw.WatchSymlink("/etc/localtime", fswatcher.WatchHandler{ + TreeKey: "ietf-system:system", + ReadFunc: sysreaders.ReadTimezone, + Debounce: 200 * time.Millisecond, + UseMerge: true, + }); err != nil { + slogLog.Warn("fswatcher watch failed", "path", "/etc/localtime", "err", err) + } + usersHandler := fswatcher.WatchHandler{ + TreeKey: "ietf-system:system", + ReadFunc: sysreaders.ReadUsers, + Debounce: 200 * time.Millisecond, + UseMerge: true, + } + if err := fsw.Watch("/etc/shadow", usersHandler); err != nil { + slogLog.Warn("fswatcher watch failed", "path", "/etc/shadow", "err", err) + } + if err := fsw.WatchDir(sysreaders.SSHDKeysDir, usersHandler); err != nil { + slogLog.Warn("fswatcher watch failed", "path", sysreaders.SSHDKeysDir, "err", err) + } + bootOrderHandler := fswatcher.WatchHandler{ + TreeKey: "ietf-system:system-state", + ReadFunc: makeBootOrderReader(t, cmd), + Debounce: 200 * time.Millisecond, + UseMerge: true, + } + // Watch the parent directory, not the file: fw_setenv (U-Boot) and + // grub-editenv may rewrite the env via a temp file + rename, which + // gives it a new inode that a direct file watch never sees. Watching + // the directory catches the Create/Rename (and still catches in-place + // writes), so a boot-order change after a RAUC install is reflected + // without waiting for a reboot. + for _, path := range []string{"/mnt/aux/grub/grubenv", "/mnt/aux/uboot.env"} { + if err := fsw.WatchSymlink(path, bootOrderHandler); err != nil { + slogLog.Debug("fswatcher boot-order watch skipped", "path", path, "err", err) + } + } + dnsHandler := fswatcher.WatchHandler{ + TreeKey: "ietf-system:system-state", + ReadFunc: sysreaders.ReadDNSResolver, + Debounce: 200 * time.Millisecond, + UseMerge: true, + } + for _, path := range []string{"/etc/resolv.conf.head", "/var/lib/misc/resolv.conf"} { + if err := fsw.WatchSymlink(path, dnsHandler); err != nil { + slogLog.Warn("fswatcher dns watch failed", "path", path, "err", err) + } + } + // Container operational data is handled by containermonitor (a + // `podman events` stream), not the fswatcher. + fsw.InitialRead() + wg.Add(1) + go func() { + defer wg.Done() + if err := fsw.Run(ctx); err != nil && ctx.Err() == nil { + slogLog.Error("fswatcher exited", "err", err) + } + }() + + go func() { + <-nlmon.WaitReady() + ready.Store(true) + }() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP) + + go func() { + for sig := range sigCh { + if sig == syscall.SIGHUP { + log.Printf("SIGHUP: triggering immediate re-poll") + for range len(collectors) { + pokeCh <- struct{}{} + } + continue + } + cancel() + return + } + }() + + if err := srv.Serve(ctx); err != nil { + log.Fatalf("serve: %v", err) + } + + wg.Wait() +} + +func slogLevel(s string) slog.Level { + switch strings.ToLower(s) { + case "debug": + return slog.LevelDebug + case "warn", "warning": + return slog.LevelWarn + case "error": + return slog.LevelError + default: + return slog.LevelInfo + } +} + +const routingTreeKey = "ietf-routing:routing" + +func makeBootOrderReader(t *tree.Tree, cmd collector.CommandRunner) func(string) (json.RawMessage, error) { + return func(_ string) (json.RawMessage, error) { + bootOrder := collector.ReadBootOrder(context.TODO(), cmd) + + raw := t.Get("ietf-system:system-state") + var state map[string]interface{} + if raw != nil { + json.Unmarshal(raw, &state) + } + if state == nil { + state = make(map[string]interface{}) + } + + sw, _ := state["infix-system:software"].(map[string]interface{}) + if sw == nil { + sw = make(map[string]interface{}) + } + + if bootOrder != nil { + sw["boot-order"] = bootOrder + } else { + delete(sw, "boot-order") + } + + return json.Marshal(map[string]interface{}{"infix-system:software": sw}) + } +} diff --git a/src/yangerd/doc/yangerd-design.md b/src/yangerd/doc/yangerd-design.md new file mode 100644 index 000000000..95fc203bc --- /dev/null +++ b/src/yangerd/doc/yangerd-design.md @@ -0,0 +1,5965 @@ +# yangerd — Design Document + +**Status:** DRAFT +**Date:** 2026-02-24 +**Author:** (Engineering Team) + +## Table of Contents + +- [Revision History](#revision-history) +- [1. Introduction](#1-introduction) + - [1.1 Purpose](#11-purpose) + - [1.2 Problem Statement](#12-problem-statement) + - [1.3 Solution Summary](#13-solution-summary) + - [1.4 Relationship to Infix Components](#14-relationship-to-infix-components) +- [2. Requirements & Constraints](#2-requirements--constraints) + - [2.1 Functional Requirements](#21-functional-requirements) + - [2.2 Non-Functional Requirements](#22-non-functional-requirements) + - [2.3 Explicit Scope Boundaries](#23-explicit-scope-boundaries-what-this-is-not) + - [Not a sysrepo plugin](#not-a-sysrepo-plugin) + - [Not a NETCONF or RESTCONF server](#not-a-netconf-or-restconf-server) + - [Not CGo](#not-cgo) + - [Not a replacement for confd](#not-a-replacement-for-confd) + - [Not a YANG validator](#not-a-yang-validator) + - [Not a push or streaming daemon](#not-a-push-or-streaming-daemon) + - [Not responsible for container namespace data (Phase 1)](#not-responsible-for-container-namespace-data-phase-1) + - [2.4 Hard Constraints](#24-hard-constraints) + - [2.5 YANG-Model JSON Output Compatibility](#25-yang-model-json-output-compatibility) + - [2.5.1 Top-level JSON Structure Per Module](#251-top-level-json-structure-per-module) + - [2.5.2 Concrete JSON Output Examples](#252-concrete-json-output-examples) + - [2.5.3 Structural Rules](#253-structural-rules) + - [2.5.4 Field Transformation Reference](#254-field-transformation-reference) + - [2.5.5 Validation Strategy](#255-validation-strategy) +- [3. Architecture Overview](#3-architecture-overview) + - [3.1 Component Diagram](#31-component-diagram) + - [3.2 Data Flow Diagrams](#32-data-flow-diagrams) + - [3.3 Component Responsibilities](#33-component-responsibilities) +- [4. Detailed Design](#4-detailed-design) + - [4.1 Netlink Monitor Subsystem](#41-netlink-monitor-subsystem) + - [4.1bis ip batch Subprocess Manager](#41bis-ip-batch-subprocess-manager) + - [4.1ter File Watcher Subsystem](#41ter-file-watcher-subsystem) + - [4.1quater Bridge Monitor Subsystem](#41quater-bridge-monitor-subsystem) + - [4.1quinquies IW Event Monitor Subsystem](#41quinquies-iw-event-monitor-subsystem) + - [4.1sexies Ethtool Netlink Monitor Subsystem](#41sexies-ethtool-netlink-monitor-subsystem) + - [4.1octies ZAPI Watcher Subsystem (Zebra Route Redistribution)](#41octies-zapi-watcher-subsystem-zebra-route-redistribution) + - [4.1novies D-Bus Monitor Subsystem](#41novies-d-bus-monitor-subsystem) + - [4.1decies LLDP Monitor Subsystem](#41decies-lldp-monitor-subsystem) + - [4.1undecies mDNS Monitor Subsystem](#41undecies-mdns-monitor-subsystem) + - [4.1septies Event-Triggered Batch Re-read Pattern (All Netlink Events)](#41septies-event-triggered-batch-re-read-pattern-all-netlink-events) + - [4.2 In-Memory Data Tree](#42-in-memory-data-tree) + - [4.3 IPC Protocol Specification](#43-ipc-protocol-specification) + - [4.4 Supplementary Collectors](#44-supplementary-collectors) + - [4.5 statd Integration](#45-statd-integration) + - [4.6 yangerctl CLI](#46-yangerctl-cli) + - [4.7 Design Decisions](#47-design-decisions) + - [4.8 Monitoring & Observability](#48-monitoring--observability) + - [Health Endpoint](#health-endpoint) + - [Metrics Tracked](#metrics-tracked) + - [Log Levels](#log-levels) + - [4.9 Security Considerations](#49-security-considerations) + - [Socket Permissions](#socket-permissions) + - [Linux Capabilities](#linux-capabilities) + - [Trust Boundary](#trust-boundary) +- [5. Data Source Matrix](#5-data-source-matrix) + - [5.1 ietf-interfaces](#51-ietf-interfaces) + - [5.2 ietf-routing](#52-ietf-routing) + - [5.3 ietf-hardware](#53-ietf-hardware) + - [5.4 ietf-system](#54-ietf-system) + - [5.5 ietf-ntp](#55-ietf-ntp) + - [5.6 ieee802-dot1ab-lldp](#56-ieee802-dot1ab-lldp) + - [5.7 infix-containers](#57-infix-containers) + - [5.8 infix-dhcp-server](#58-infix-dhcp-server) + - [5.9 infix-firewall](#59-infix-firewall) + - [5.9bis infix-services](#59bis-infix-services) + - [5.10 Summary Table](#510-summary-table) + - [5.11 Module-by-Module Mapping](#511-module-by-module-mapping) +- [6. Project Structure](#6-project-structure) + - [6.1 Go Project Layout](#61-go-project-layout) + - [6.2 Package Descriptions](#62-package-descriptions) + - [6.3 Key Dependencies](#63-key-dependencies) + - [6.4 Buildroot Integration](#64-buildroot-integration) +- [7. Deployment & Operations](#7-deployment--operations) + - [7.1 Finit Service File](#71-finit-service-file) + - [7.2 Socket Permissions](#72-socket-permissions) + - [7.3 Environment Variables](#73-environment-variables) + - [7.4 Startup Sequence](#74-startup-sequence) + - [7.5 Local Development](#75-local-development) + - [7.6 Buildroot Package](#76-buildroot-package) + - [7.7 Cross-Compilation](#77-cross-compilation) +- [8. Testing Strategy](#8-testing-strategy) + - [8.1 Unit Tests](#81-unit-tests) + - [8.2 Integration Tests](#82-integration-tests) + - [8.3 Regression Tests](#83-regression-tests) + - [8.4 Race Detector Policy](#84-race-detector-policy) + - [8.5 Testability Contracts (Interface Boundaries)](#85-testability-contracts-interface-boundaries) + - [8.6 Verification Loop (Definition of Done)](#86-verification-loop-definition-of-done) +- [9. Migration Plan](#9-migration-plan) + - [9.1 Module Inventory](#91-module-inventory) +- [10. Risk Assessment](#10-risk-assessment) + - [10.1 Detailed Risks](#101-detailed-risks) + - [10.2 Risk Summary](#102-risk-summary) +- [Appendices](#appendices) + - [A.1 Netlink Group Reference](#a1-netlink-group-reference) + - [A.2 YANG Module Registry](#a2-yang-module-registry) + - [A.3 Glossary](#a3-glossary) +- [Troubleshooting Guide](#troubleshooting-guide) + - [IPC Connection Issues](#ipc-connection-issues) + - [Stale Data in the Tree](#stale-data-in-the-tree) + - [Performance Bottlenecks](#performance-bottlenecks) +- [Detailed IPC Examples](#detailed-ipc-examples) + - [Example 1: Full Interface List Query](#example-1-full-interface-list-query) + - [Example 2: Routing Table Query](#example-2-routing-table-query) + +## Revision History + +| Date | Revision | Description | Author | +|------|----------|-------------|--------| +| 2026-02-24 | 0.1 | Initial draft from implementation proposal. | Assistant | +| 2026-02-24 | 0.2 | Added reactive file watcher and bridge monitor subsystems; converted ~8 data sources to REACTIVE. | Assistant | +| 2026-02-24 | 0.3 | Added iw event monitor subsystem for reactive 802.11 wireless monitoring; converted WiFi from POLLING to REACTIVE. | Assistant | +| 2026-02-24 | 0.4 | Moved `last-change` from NOT COLLECTED to REACTIVE; added oper-status tracking in link event handler with `time.Now()` timestamp. | Assistant | +| 2026-02-24 | 0.5 | Added ethtool netlink monitor subsystem for reactive speed/duplex/auto-negotiation via `ETHNL_MCGRP_MONITOR` genetlink multicast; converted 3 ethtool leaves from POLLING 30s to REACTIVE; ethtool collector becomes hybrid (reactive settings + polling statistics). | Assistant | +| 2026-02-24 | 0.6 | RTM_NEWLINK full interface re-read: on any link event, the event dispatcher now writes a full set of queries (`link show dev`, `-s link show dev`, `addr show dev`) to ip batch for atomic interface state; also triggers `ethmonitor.RefreshInterface()` for ethtool re-query since `ETHNL_MCGRP_MONITOR` does NOT fire on link up/down. Cross-subsystem coordination between link monitor and ethmonitor. | Assistant | +| 2026-02-24 | 0.7 | Kernel 6.18 cleanup: stripped all fallback/degradation hedging for ethtool netlink. Infix targets Linux 6.18 exclusively; ethtool netlink is unconditionally available. Removed all `kernel < 5.6`, `graceful degradation`, and `polling fallback` references across 23 locations. | Assistant | +| 2026-02-24 | 0.8 | Event-triggered batch re-read for ALL netlink event types: address, route, and neighbor events (both RTM_NEW* and RTM_DEL*) now use the same event-as-trigger pattern as link events. Each event triggers a full re-read of the affected state via ip batch; event content is not parsed for data. Delete events produce a re-read that omits the removed entity. Updated data source matrix, design decisions, appendix A.1, module-by-module mapping, and project structure throughout. | Assistant | +| 2026-02-24 | 0.9 | Added section 2.5: YANG-Model JSON Output Compatibility. Formal requirement that yangerd must produce JSON output structurally identical to the current Python yanger scripts (RFC 7951 module-qualified keys, list-as-array, augmentation prefixes, counter-as-string, presence-as-null). Includes top-level JSON structure table for all 14 models, concrete JSON examples for every module, 14 structural rules, field transformation reference, and validation strategy. | Assistant | +| 2026-02-24 | 0.10 | Corrected routing data source attribution: route table re-reads now use `vtysh` (FRRouting) instead of `ip batch`, because vtysh is the authoritative source for the complete routing table including all protocol routes (kernel, connected, static, OSPF, RIP) with enriched metadata (source protocol, distance, metric, active/installed flags). Updated data source matrix (section 5.2), route event handler code, initial state dump, batch query examples, event-trigger tables, module-by-module mapping, appendix A.1, glossary, and all related prose throughout the document. Added dedicated Route Table Collector section (5b) for the reactive vtysh-based RIB collection. | Assistant | +| 2026-02-24 | 0.11 | Build-time feature flags and binary-present assumption. WiFi (`YANGERD_ENABLE_WIFI`), containers (`YANGERD_ENABLE_CONTAINERS`), and GPS (`YANGERD_ENABLE_GPS`) are now opt-in build features controlled by runtime environment variables in `/etc/default/yangerd`, written by the Buildroot recipe based on `BR2_PACKAGE_*` selections. When a feature is disabled, its collectors and monitors are not started — no runtime binary detection is performed. All tool binaries (`iw`, `iproute2`, `bridge`, `vtysh`, `nft`, `chronyc`, `dmidecode`, etc.) are guaranteed present on target when their feature is enabled; removed all "if binary absent" hedging. Updated env vars table, startup sequence, Buildroot recipe, `internal/config/` description, collector failure behaviors, data source matrix, module-by-module mapping, migration table, project tree, appendix model table, risk assessment, and design rationale throughout. | Assistant | +| 2026-02-25 | 0.12 | Replaced `ip monitor -json` and `bridge monitor -json` subprocess-based event monitoring with native Go netlink subscriptions via `vishvananda/netlink`. iproute2 investigation confirmed that `ip monitor -json` and `bridge monitor -json` never produce JSON output (the `-json` flag is parsed globally but the JSON writer `_jw` is never allocated in `do_ipmonitor()` or `bridge/monitor.c`). Events are now received as typed Go structs (`LinkUpdate`, `AddrUpdate`, `RouteUpdate`, `NeighUpdate`) on dedicated channels. Bridge FDB events arrive via `NeighSubscribeWithOptions`; bridge VLAN via `LinkSubscribeWithOptions`; bridge MDB via raw netlink `RTNLGRP_MDB` subscription. Event-as-trigger pattern preserved: all events trigger full re-reads via `ip batch`, `bridge batch`, or `vtysh`. Subprocess count drops from FIVE to THREE (`ip batch`, `bridge batch`, `iw event`). Updated architecture diagrams, component table, `EventMonitor` code (now `NLMonitor`), bridge monitor code, design rationale, data source matrix, module-by-module mapping, project structure, risk assessment, and glossary throughout. | Assistant | +| 2026-02-25 | 0.13 | Replaced vtysh-based route table collection with a streaming ZAPI watcher (`internal/zapiwatcher/`) that connects directly to FRR zebra's zserv unix socket (`/var/run/frr/zserv.api`), subscribes to route redistribution notifications via ZAPI v6, and receives both the initial RIB dump and incremental route add/delete updates. This captures routes in zebra's RIB that are not present in the Linux kernel FIB (unresolvable nexthop, lost admin-distance election, ECMP overflow, table-map filtered). Automatic reconnection with exponential backoff handles zebra restarts; stale routes are cleared atomically on reconnect via full replacement. vtysh is retained for OSPF/RIP/BFD protocol-specific collectors only. Uses `github.com/osrg/gobgp/v4/pkg/zebra` for ZAPI message framing. Updated architecture diagrams (section 3), NLMonitor (section 4.1), event-triggered batch re-read (section 4.1septies), added new section 4.1octies (ZAPI Watcher Subsystem), design decisions (section 4.7), data source matrix (section 5), module-by-module mapping (section 5.11), project structure (section 6), deployment startup sequence (section 7), migration plan (section 9), risk assessment (section 10, including new Risk 11), and appendix A.1 throughout. | Assistant | +| 2026-02-25 | 0.14 | Removed hwmon/thermal sensor files from inotify-based fswatcher -- sysfs pseudo-files do not emit inotify events (kernel generates values on `read()`, never calls `fsnotify_modify()`). Hardware sensors are now collected exclusively by `collector/hardware.go` via polling at 10-second intervals. Updated fswatcher watched paths table (removed 4 hwmon/thermal entries), glob expansion paragraph, inotify limitations section, hardware collector interval (30s->10s), data source matrix (REACTIVE->POLLING for sensors), summary table counts, module-by-module mapping strategy, fswatcher package description, and fsnotify dependency description throughout. | Assistant | +| 2026-02-25 | 0.15 | Review-driven fixes. (1) Socket ownership corrected from `root:yangerd` to `root:statd`. (2) OSPF/RIP/BFD collector intervals normalized to 10s. (3) Hardware collector interval corrected from 30s to 10s (missed in 0.14). (4) Grammar fix: "every 1 seconds" to "every second". (5) IPC protocol version field added: 1-byte version header before 4-byte length in framing (`[ver:1][length:4][JSON body]`); updated framing diagram, architecture intro, yangerd.c defines/read/write code, test descriptions, and glossary. (6) yangerd.c partial read bug fixed: replaced single `read()` with accumulating loop for short reads on Unix sockets. (7) Per-model locking redesign: replaced single `sync.RWMutex` with per-model `modelEntry` structs each containing their own `sync.RWMutex`; writers for different YANG modules never block each other; added `GetMulti()` for multi-module IPC concatenation; updated solution summary, architecture diagram, ethmonitor/ZAPI watcher concurrency sections, core Tree type, design rationale, design decision, project tree, package descriptions, startup sequence, test descriptions, and race policy throughout. | Assistant | +| 2026-02-26 | 0.16 | Removed all remaining netlink route subscription references (routes are sourced exclusively from the ZAPI watcher). Removed `RTNLGRP_IPV4_ROUTE` and `RTNLGRP_IPV6_ROUTE` rows from appendix A.1 table. Updated appendix A.1 intro text, RTNLGRP glossary entry (six groups → four groups), NLMonitor architecture, event-triggered re-read section, design decisions, project tree, package descriptions, and dependency table throughout. Added VRF out-of-scope declaration to Section 2.3. | Assistant | +| 2026-03-02 | 0.17 | Removed all yanger.py coexistence, fallback, and phased-migration references. yangerd ships all 13 modules as a single delivery and completely replaces the Python yanger scripts -- no fallback path, no rollback to Python, no phased rollout. Rewrote Section 9 Migration Plan (single delivery), Risk 4 (503 = retry, not Python fallback), risk summary table, Appendix A.2 (removed Phase column), glossary `sr_oper_get_subscribe` entry, validation strategy (golden-file based, not Python comparison), regression tests (YANG-schema validation, not live Python comparison), and removed fallback integration test. Removed Coexistence Strategy and Rollback subsections. | Assistant | +| 2026-03-02 | 0.18 | Bridge data collection is now fully reactive via netlink events as triggers + `bridge -json -batch -` for state re-reads. Removed all bridge polling references. STP port state is now sourced from netlink `RTM_NEWLINK` events carrying `IFLA_BRPORT_STATE` in `IFLA_PROTINFO` (not inotify on `/sys/class/net/
/brport/state`). Updated Section 4.1.2 (bridge event channels with STP, event-as-trigger pattern), Section 4.4.3 intro (bridge excluded from polling collectors), design rationale (inotify: removed STP; bridge batch: confirmed STP), summary table (fswatcher: removed brport state; REACTIVE row: added bridge event triggers), migration section `bridge.py` (removed fswatcher/inotify for STP), summary table bridge row (removed fswatcher reference), fswatcher package description (removed brport/state), nlmonitor package description (added STP events), migration module table bridge row (removed `/sys/class/net` reference), Risk 7 (removed bridge ports from inotify exhaustion), Risk 8 (added STP to reactive data list), glossary inotify (removed STP), glossary bridge netlink events (added STP), and appendix A.1 RTNLGRP_MDB notes throughout. | Assistant | +| 2026-03-04 | 0.19 | D-Bus reactive monitoring: dnsmasq DHCP and firewall data collection moved from polling to reactive via D-Bus signal subscriptions. Added D-Bus Monitor Subsystem (Section 4.1novies) using `godbus/dbus/v5` `AddMatchSignal()` for dnsmasq (`DHCPLeaseAdded/Deleted/Updated`) and firewalld (`Reloaded`, `NameOwnerChanged`) signals. dnsmasq lease file watching moved from fswatcher inotify to D-Bus signal triggers (re-read lease file + `GetMetrics()` on each signal). Firewall data moved from 30-second polling of `nft list ruleset -j` to firewalld D-Bus signal triggers. Updated architecture diagram, data flow diagrams (added 3.2.8 D-Bus Monitor Reactive Path), component responsibilities, File Watcher Subsystem (removed DHCP leases from watched paths), collector specifications (#10 DHCP, #11 Firewall), design rationale (added D-Bus Monitor rationale; updated inotify rationale), data source matrix (DHCP/firewall rows now REACTIVE), summary table (moved leaf counts from POLLING to REACTIVE), module-by-module migration (DHCP/firewall strategies), project structure (added `dbusmonitor/`), package descriptions (added dbusmonitor, updated fswatcher/collector), dependency table (updated godbus/fsnotify), startup sequence, risk assessment (updated Risk 3, added Risk 12 for D-Bus service unavailability), and glossary (added D-Bus monitor entry, updated inotify/reactive entries) throughout. | Assistant | +| 2026-03-04 | 0.20 | NTP data collection optimized: replaced `exec chronyc` subprocess spawning with native Go cmdmon protocol via `github.com/facebook/time/ntp/chrony` (Apache-2.0). Investigation confirmed chrony has no D-Bus interface, no event-driven socket protocol, and no subscribe mechanism -- the cmdmon UDS protocol (`/var/run/chrony/chronyd.sock`) is strictly request-response. Polling remains the only supported monitoring approach. Updated NTP collector (#8), data source matrix (NTP rows), module-by-module mapping (ietf_ntp.py), migration table, project tree, dependency table, summary table description, appendix model table, field transformation reference, and polling glossary entry throughout. | Assistant | +| 2026-03-04 | 0.21 | Architectural review fixes. (1) IPBatch/BridgeBatch error handling: documented pipe EOF detection, immediate error return, restart coordination with ErrBatchDead sentinel and canary-query validation. (2) errgroup lifecycle: clarified that all Run() methods swallow errors internally, only returning on ctx.Done() -- errgroup is purely a goroutine join point, not a failure propagation mechanism. (3) ZAPI disconnect behavior: route subtree is cleared immediately on disconnect (not served stale). (4) Netlink resubscription: full-scope re-read of all entities for the affected event type after any subscription error. (5) IPC server: explicitly documented that tree serves last-known-good state during subprocess restart windows. (6) Health endpoint: defined response schema with per-subsystem state/restart-count/PID and per-model last-updated timestamps; added `updated time.Time` to modelEntry struct. (7) OSPF/RIP/BFD polling intervals corrected from 5s to 10s in data source matrix and collector specifications. (8) Socket group corrected from `yangerd` to `statd`. (9) NLMonitor terminology standardized. (10) Ethmonitor: no fallback on genetlink failure, must use target kernel. (11) Feature flags renamed from "build-time" to "runtime feature flags." (12) D-Bus error paths: log+serve-stale for parse errors, explicit timeouts for D-Bus calls and nft. (13) External command timeouts: all exec.Command uses exec.CommandContext with per-command timeouts. (14) GetMulti eventual consistency documented as explicit design choice. (15) Text parser test fixtures for iw/vtysh. (16) Fswatcher path-to-YANG-leaf mapping. (17) GetMulti lock ordering safety comment. (18) Added YANGERD_POLL_INTERVAL_NTP env var (default 60s). | Assistant | +| 2026-03-04 | 0.22 | Second review pass: fixed 6 copy-paste regressions (duplicated modelEntry/Set/GetMulti/health schema, misplaced consistency note, missing package header), 8 consistency issues (timeout policy, failure philosophy exceptions, BridgeBatch ErrBatchDead, D-Bus code timeouts, yangerctl health output, dead/alive mapping, socket group, NTP env var), and 9 architectural additions (startup readiness protocol, graceful shutdown, memory bounds, security model with Finit snippet, IPC method mapping, config reload policy, signal handling, iw parser robustness, Phase-2 container namespace design). | Assistant | +| 2026-03-04 | 0.23 | Firewall data source corrected: replaced all `nft list ruleset -j` references with firewalld D-Bus method calls, matching the Python `infix_firewall.py` implementation. `refreshFirewall()` now takes `conn *dbus.Conn` and queries firewalld directly (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). Updated data source matrix (nftables YANG paths replaced with firewalld zone/policy/service paths), signal subscription table, differences table, collector #11 spec, design rationale, external command timeouts, migration section (reversed: D-Bus is kept, not replaced), summary migration table, project tree (`transformNftRuleset()` renamed to `buildFirewallTree()`), dbusmonitor package description, appendix model table, and glossary D-Bus Monitor entry throughout. | Assistant | +| 2026-03-05 | 0.24 | Added testability architecture: Section 8.5 defines Go interface contracts for all 9 external dependencies (netlink, ip batch, bridge batch, D-Bus, ZAPI, ethtool, chrony, command execution, file I/O), with interface definitions, production/mock implementation table, and import restriction rule. Section 8.6 defines the verification loop (definition of done): 4-step build/vet/test/golden-file workflow executable on a developer workstation with no target hardware, golden-file capture process from running Python yanger, YANG schema validation via yanglint in CI, and 8-point per-module completion checklist. | Assistant | +| 2026-03-27 | 0.25 | Review-driven corrections: LLDP converted from polling to reactive (`lldpcli -f json0 watch`); added `infix-services:mdns` module (migrated from statd/avahi.c via avahi D-Bus); added LLDPMonitor and mDNS Monitor subsystem sections; reconciled health endpoint schema (4.3.5 vs 4.8); fixed module counts; added container lifecycle reactive recommendation; added polling justification notes; fixed `routing-state` deprecation, typos, and formatting throughout. | Assistant | +| 2026-03-30 | 0.26 | Hostname and timezone collection moved from polling to reactive inotify via fswatcher. Hostname: moved from SystemCollector `os.Hostname()` at 300s to inotify on `/etc/hostname` via `Watch()`. Timezone: moved from SystemCollector `os.Readlink("/etc/localtime")` at 300s to inotify on `/etc/localtime` via new `WatchSymlink()` method. `WatchSymlink()` watches the parent directory instead of the file itself because Go's `fsnotify` follows symlinks — `watcher.Add("/etc/localtime")` would watch the target file, not the symlink entry, so symlink replacements via `ln -sf` would go undetected. The handler is registered under the full file path so that directory-level `Create`/`Rename` events on `/etc/localtime` match the handler lookup. `readTimezone()` handles both named timezones (strip zoneinfo prefix) and Etc/GMT±N offsets (with POSIX sign inversion). SystemCollector now uses `tree.Merge()` for `ietf-system:system` (was `tree.Set()`) so fswatcher hostname/timezone updates are not clobbered by subsequent poll cycles. Added `WatchSymlink` subsection to fswatcher documentation, `/etc/localtime` to watched paths table. Updated fswatcher intro, system collector specification, data source matrix, module-by-module mapping, and summary table throughout. | Assistant | +| 2026-03-30 | 0.27 | On-demand providers for live system state. Uptime, current-datetime, boot-datetime, memory usage, load average, and filesystem usage moved from polling (SystemCollector at 60s/300s) to on-demand computation at IPC request time. Added `OnDemandFunc` type and `RegisterProvider()` to `Tree` — when a provider is registered for a tree key, `Get()`/`GetMulti()` calls the provider function and shallow-merges the result with cached data, returning fresh values without mutating the cache. `internal/collector/live.go` implements `LiveSystemState()` which reads `/proc/uptime`, `/proc/meminfo`, `/proc/loadavg`, and calls `syscall.Statfs()` on `/`, `/var`, `/cfg` at the moment of each IPC request. SystemCollector no longer collects clock, memory, load average, or filesystem data — it retains only users, platform, software (RAUC/initctl), DNS, and services. Updated Tree type documentation (providers map, RegisterProvider, modified Get/GetMulti), SystemCollector specification (#7), data source matrix (clock/resource-usage rows now ON-DEMAND), summary table (moved leaf counts from POLLING to ON-DEMAND), module-by-module mapping (ietf_system.py strategy), project structure (added live.go), package descriptions (updated collector, tree), and summary migration table throughout. | +| 2026-03-30 | 0.28 | SystemCollector decomposition complete. Users moved from polling (SystemCollector at 300s) to reactive fswatcher: `Watch("/etc/shadow")` for password/account changes, `WatchDir("/var/run/sshd/")` for SSH authorized key changes. `WatchDir()` is a new fswatcher method that watches a directory and fires the handler for any file create/write/remove event within it. Platform data (os-release, uname) moved from polling to boot-once: `BootPlatform()` in `internal/collector/boot.go` runs once at startup, result merged into tree and never re-read. Software data (RAUC slot status, installation status) moved from polling to boot-once: `BootSoftware()` in `internal/collector/boot.go` runs once at startup. Boot order moved from polling to reactive fswatcher: `Watch("/mnt/aux/grub/grubenv")` and `Watch("/mnt/aux/uboot.env")` trigger `ReadBootOrder()` which re-runs `fw_printenv`/`grub-editenv` and patches the software subtree. SystemCollector stripped to DNS resolver and Finit services only — all other data now handled by fswatcher reactive handlers, boot-once initialization, or on-demand providers. Updated collector specification #7, data source matrix (users/platform/software/boot-order rows), module-by-module mapping (ietf_system.py strategy), and summary migration table throughout. | Assistant | +| 2026-03-30 | 0.29 | NTP collector now populates the `infix-system:ntp` augmentation under `ietf-system:system-state` in addition to `ietf-ntp:ntp`. The Infix YANG model (`infix-system.yang`) augments `/sys:system-state` with `container ntp` → `list source` (keyed by address) containing `state` (enum: selected, candidate, outlier, unusable, falseticker, unstable), `mode` (enum: server, peer, local-clock), `stratum`, and `poll`. `addSources()` parses the same `chronyc -c sources` output as `addAssociations()` (shared to avoid double subprocess invocation), maps chronyc indicators (`*→selected`, `+→candidate`, `-→outlier`, `?→unusable`, `x→falseticker`, `~→unstable`) and mode indicators (`^→server`, `=→peer`), skips reference clocks (mode `#`) and invalid stratum, and merges the result into `ietf-system:system-state` via `tree.Merge()`. This satisfies the integration test `test/case/system/ntp_client/test.py` which reads from `/ietf-system:system-state/infix-system:ntp/sources/source` and checks for `state == "selected"`. | Assistant | + +--- + +## 1. Introduction + +### 1.1 Purpose +This document specifies the design for `yangerd`, a high-performance Go daemon that manages operational data for the Infix network OS. It serves as the authoritative technical reference for implementation, deployment, and testing. + +### 1.2 Problem Statement +`statd` is the operational data daemon for Infix. On every NETCONF or RESTCONF poll that touches an operational subtree, `statd` invokes `ly_add_yanger_data()`, which calls `fsystemv()` to fork and exec the `yanger` Python script. Each invocation starts a fresh CPython interpreter, imports the relevant module (one of 14 total YANG modules in the target design; 13 current migration modules in legacy Python/C paths), runs the collection logic, prints JSON to stdout, and exits. + +The interpreter start-up cost alone is approximately 200 milliseconds per invocation. With 13 `sr_oper_get_subscribe()` callbacks registered in `subscribe_to_all()`, a worst-case full-tree poll triggers 13 sequential forks, for a cumulative delay of roughly 2.6 seconds before sysrepo can return data to the requestor. + +Beyond latency, the architecture has two structural weaknesses: +1. **No state preservation:** Every fork re-reads the same kernel interfaces, re-parses the same `ip` command output, and re-queries the same D-Bus services, even when nothing has changed. +2. **Memory churn:** Each Python process allocates its own heap and module cache, producing high memory churn under repeated polling. + +### 1.3 Solution Summary +`yangerd` (Architecture Option C -- IPC Indirection) is a pure Go daemon with no CGo dependency. It monitors Linux netlink events natively via `vishvananda/netlink` subscriptions (`LinkSubscribeWithOptions`, `AddrSubscribeWithOptions`, `NeighSubscribeWithOptions`), receiving typed Go structs on dedicated channels. Each event triggers a full re-read of the affected state: link, address, and neighbor data are re-queried through a persistent `ip -json -force -batch -` subprocess, bridge state through `bridge -json -batch -`. Route data is sourced from a streaming ZAPI connection to FRR zebra's zserv socket (not from netlink events or vtysh). Supplementary collectors handle data not exposed via netlink (ethtool genetlink, iw event, D-Bus, /proc/sys). All collected data is maintained in an in-memory YANG JSON tree with per-model `sync.RWMutex` locking -- each YANG module key has its own read-write mutex, so writers for different modules never block each other and readers only contend with writers of the same module. `statd` queries this tree over a Unix domain socket (`/run/yangerd.sock`) using a lightweight JSON/length-prefixed framing protocol, replacing the fork/exec path with a socket read. On multi-module IPC requests, per-model read locks are acquired individually, data is read and concatenated into the response. + +### 1.4 Relationship to Infix Components +- **statd:** The primary consumer. It translates sysrepo operational data requests into `yangerd` IPC queries. +- **sysrepo/libyang:** `yangerd` produces JSON fragments that `statd` parses into libyang trees for sysrepo. +- **confd:** Operates in parallel. `confd` handles the configuration (write) path, while `yangerd` handles the operational (read) path. +- **netopeer2/rousette:** External management endpoints that eventually receive data collected by `yangerd`. + +--- + +## 2. Requirements & Constraints + +### 2.1 Functional Requirements +- **Real-time Monitoring:** Must subscribe to netlink events for link, address, and neighbor changes. Route data is sourced from a streaming ZAPI connection to FRR zebra. +- **Comprehensive Collection:** Must implement collectors/monitors for all 14 supported YANG modules (13 migrated modules + 1 new module: `infix-services:mdns` migrated from `statd/avahi.c`). +- **In-Memory Cache:** Maintain a synchronized, pre-serialized JSON tree of all operational state. +- **IPC Server:** Provide a Unix socket server for concurrent client queries. +- **Health Reporting:** Expose internal monitor and collector status. +- **CLI Tool:** Provide a `yangerctl` utility for manual inspection and debugging. + +### 2.2 Non-Functional Requirements +- **Sub-millisecond query latency:** `statd` callbacks receive a JSON response from an in-memory read — no process spawning, no disk I/O on the hot path. +- **Reactive link state:** netlink events update the in-memory tree within microseconds of the kernel event, eliminating staleness. +- **Elimination of Python startup overhead:** the 200 milliseconds per-invocation interpreter cost is removed entirely; current per-subtree fork chains are replaced by in-memory IPC reads. +- **Single consolidated daemon:** `yangerd` replaces 25+ Python collector scripts with typed Go collector functions, simplifying deployment, logging, and error handling. +- **Pure Go cross-compilation:** No CGo dependency for easy cross-builds across ARM, AArch64, RISC-V, and x86_64. + +### 2.3 Explicit Scope Boundaries (What This Is NOT) + +This section defines explicit scope boundaries for yangerd. Its purpose is to prevent future scope creep, clarify integration responsibility, and help contributors quickly determine whether a proposed change belongs in yangerd, statd, confd, or elsewhere. + +#### Not a sysrepo plugin + +yangerd has no sysrepo dependency and registers no `sr_*` callbacks. It does not link against `libsysrepo.so`, does not open a sysrepo connection, and has no knowledge of sysrepo session handles, subscription IDs, or event types. The sysrepo integration layer lives entirely in `statd.c`: it is statd that calls `sr_oper_get_subscribe()`, receives the sysrepo callback, queries yangerd over the Unix socket, and calls `lyd_parse_data_mem()` to parse the result into a libyang tree. + +Adding sysrepo to yangerd would defeat the zero-C-dependency constraint (sysrepo is a C library with no Go bindings), reintroduce link-time complexity against `libyang` and `libsysrepo`, and blur the separation of concerns that makes yangerd testable in isolation. The IPC boundary between yangerd and statd is intentional and permanent. + +#### Not a NETCONF or RESTCONF server + +yangerd does not speak NETCONF XML, RESTCONF JSON+XML, gRPC, YANG push, or any IETF management protocol. It has a private, non-standard IPC protocol (1-byte version + 4-byte big-endian length + JSON payload over a Unix domain socket) whose sole consumer is statd. It cannot be queried directly by a NETCONF client, a RESTCONF client, or a browser. + +The management protocol endpoints in Infix remain `netopeer2` (NETCONF) and `rousette` (RESTCONF). yangerd is not a replacement for, competitor to, or extension of either. It is a data collection and aggregation daemon that feeds statd, which feeds sysrepo, which feeds the management protocol layer. + +#### Not CGo + +yangerd contains zero C code. There are no `import "C"` directives, no `#cgo LDFLAGS` or `#cgo CFLAGS` pragmas, no `.c` source files, and no calls to `C.*` functions. This is a hard, non-negotiable constraint. + +The reason is Buildroot cross-compilation. CGo requires a matching C cross-compiler toolchain (sysroot, headers, and libraries) for each target architecture (arm, aarch64, riscv64, x86_64). Managing four CGo toolchains in Buildroot is brittle and error-prone. Pure-Go cross-compilation requires only `GOARCH` and `GOOS` environment variables — no sysroot, no linker flags, no host-target library matching. + +Any future requirement that would necessitate calling a C library (e.g., direct access to a vendor-specific kernel module via an ioctl not wrapped by any Go package) must be implemented as a separate standalone binary that yangerd invokes as a subprocess, maintaining the CGo boundary outside yangerd itself. + +#### Not a replacement for confd + +yangerd collects and serves operational (read-only, runtime) data. It never writes to the sysrepo running datastore, never handles a NETCONF `` or `` RPC, never processes a RESTCONF PATCH, PUT, or POST, and never modifies the system configuration in any way. + +Configuration management — translating NETCONF/RESTCONF configuration changes into Linux network configuration (via `ip`, `bridge`, `nft`, and other tools) — remains entirely within `confd`. There is no proposed overlap or merge between confd and yangerd. They are complementary daemons with non-overlapping responsibilities: confd handles the write path, yangerd handles the read path. + +#### Not a YANG validator + +yangerd does not parse YANG module files, does not load `.yang` schemas via libyang, and does not validate that JSON values conform to YANG type constraints (ranges, patterns, enumerations, must-expressions, etc.). It stores and retrieves opaque `json.RawMessage` blobs keyed by YANG path string. The blobs are produced by yangerd's own collector functions and are assumed to be structurally valid. + +YANG validation — ensuring that the JSON returned by yangerd is well-typed, range-checked, and list-keyed correctly — is performed by libyang inside statd when `lyd_parse_data_mem()` is called on the JSON blob. If a collector produces malformed JSON or a value outside a YANG type's range, libyang will reject it and statd will log the error. yangerd is deliberately schema-agnostic to avoid introducing a libyang dependency. + +#### Not a push or streaming daemon + +yangerd does not emit spontaneous outbound messages. It does not implement YANG push (RFC 8641), does not maintain persistent subscriptions, and does not send SSE, WebSocket, or gRPC stream frames. Its communication model is strictly pull-on-demand: statd connects, sends a request, receives a response, and disconnects. (Or, if the connection is kept alive, sends the next request on the same connection — but there is no server-initiated message.) + +Reactive netlink events update yangerd's internal tree continuously, but these updates are internal state changes only — they do not trigger any outbound notification to statd or to any other consumer. Consumers see the updates only when they issue the next pull request. This simplicity is intentional: it avoids the complexity of managing subscriber lists, flow control, and partial-failure handling in a push model. + +#### Not responsible for container namespace data (Phase 1) + +Collecting operational data from inside a container namespace (e.g., the interface list or routing table as seen from within a podman container) requires opening a netlink socket in the specific network namespace of that container. This involves calling `netlink.NewHandleAt(ns)` with a namespace file descriptor, which in turn requires reading `/proc//ns/net` for the container's PID — a non-trivial and error-prone operation that differs between rootful and rootless podman. + +This complexity is explicitly deferred to Phase 2. In Phase 1, yangerd's netlink monitors operate exclusively in the host (init) network namespace and report the host's view of all interfaces, routes, and neighbours. Container-internal interfaces that appear in the host namespace (veth pairs) are included; interfaces visible only from inside the container are not. + +#### Not VRF-aware + +yangerd operates exclusively in the default VRF. It does not subscribe to non-default VRF route tables, does not open netlink sockets in non-default VRF contexts, and does not distinguish routes by VRF ID in its in-memory tree. The ZAPI watcher connects to zebra using `zebra.VRFDefault` and subscribes to route redistribution for the default VRF only. The ZAPI v6 wire format includes a VRF ID field in every message header, but yangerd treats all messages as belonging to VRF 0 (default) and ignores messages with non-zero VRF IDs. Multi-VRF support is explicitly out of scope for both Phase 1 and Phase 2. + +### 2.4 Hard Constraints +- **No CGo.** +- **No direct sysrepo access.** +- **No YANG validation in yangerd.** + +### 2.5 YANG-Model JSON Output Compatibility + +yangerd MUST produce JSON output that is structurally identical to the current Python yanger scripts. The output is consumed by `statd`, which passes it to `lyd_parse_data_mem()` in libyang for validation against the installed YANG schemas. Any deviation in key names, nesting structure, module prefixes, or value encoding will cause libyang to reject the data. + +This is a hard implementation constraint, not a best-effort goal. The Go collectors must transform `iproute2`, `ethtool`, `iw`, `vtysh`, D-Bus, and filesystem data into the exact same YANG-model JSON structure that the Python yanger scripts produce today. The canonical format specification is the Python source code in `src/statd/python/yanger/`. + +#### 2.5.1 Top-Level JSON Structure Per Module + +Each yanger module returns a JSON object with one or more YANG-module-prefixed top-level keys (RFC 7951 module-qualified names). yangerd must produce the same top-level keys for each module path. + +| YANG Module | yanger Python Module | Top-Level JSON Key(s) | +|-------------|---------------------|-----------------------| +| `ietf-interfaces` | `ietf_interfaces` | `"ietf-interfaces:interfaces"` | +| `ietf-routing` | `ietf_routing` | `"ietf-routing:routing"` | +| `ietf-hardware` | `ietf_hardware` | `"ietf-hardware:hardware"` | +| `ietf-system` | `ietf_system` | `"ietf-system:system"`, `"ietf-system:system-state"` | +| `ietf-ntp` | `ietf_ntp` | `"ietf-ntp:ntp"` (nested inside out dict via `insert()`) | +| `ieee802-dot1ab-lldp` | `infix_lldp` | `"ieee802-dot1ab-lldp:lldp"` | +| `infix-containers` | `infix_containers` | `"infix-containers:containers"` | +| `infix-dhcp-server` | `infix_dhcp_server` | `"infix-dhcp-server:dhcp-server"` | +| `infix-firewall` | `infix_firewall` | `"infix-firewall:firewall"` | +| `infix-services` | `(new — migrated from statd/avahi.c)` | `"infix-services:mdns"` | +| `ietf-ospf` | `ietf_ospf` | `"ietf-routing:routing"` (with nested `control-plane-protocols`) | +| `ietf-rip` | `ietf_rip` | `"ietf-routing:routing"` (with nested `control-plane-protocols`) | +| `ietf-bfd-ip-sh` | `ietf_bfd_ip_sh` | `"ietf-routing:routing"` (with nested `control-plane-protocols`) | + + +#### 2.5.2 Concrete JSON Output Examples + +The following examples show the exact JSON structures that yangerd must produce for each module. These are derived directly from the Python source code. + +**ietf-interfaces** (`ietf_interfaces/__init__.py`, `link.py`): +```json +{ + "ietf-interfaces:interfaces": { + "interface": [ + { + "type": "infix-if-type:ethernet", + "name": "eth0", + "if-index": 2, + "admin-status": "up", + "oper-status": "up", + "phys-address": "02:00:00:00:00:01", + "statistics": { + "in-octets": "123456789012", + "out-octets": "987654321098" + }, + "ietf-ip:ipv4": { + "mtu": 1500, + "address": [ + { + "ip": "192.168.1.1", + "prefix-length": 24, + "origin": "static" + } + ] + }, + "ietf-ip:ipv6": { + "mtu": 1500, + "address": [ + { + "ip": "fe80::1", + "prefix-length": 64, + "origin": "link-layer" + } + ] + }, + "ieee802-ethernet-interface:ethernet": { + "auto-negotiation": { + "enable": true + }, + "speed": "1.0", + "duplex": "full", + "statistics": { + "frame": { + "out-frames": "12345", + "out-multicast-frames": "100", + "out-broadcast-frames": "50", + "in-frames": "67890", + "in-multicast-frames": "200", + "in-broadcast-frames": "75", + "in-total-frames": "68000", + "in-error-fcs-frames": "0", + "in-error-undersize-frames": "0", + "in-error-oversize-frames": "0", + "infix-ethernet-interface:out-good-octets": "9876543", + "infix-ethernet-interface:in-good-octets": "12345678" + } + } + } + } + ] + } +} +``` + +**ietf-interfaces with bridge augmentation** (`bridge.py`): +```json +{ + "name": "br0", + "type": "infix-if-type:bridge", + "infix-interfaces:bridge": { + "vlans": { + "proto": "ieee802-dot1q-types:c-vlan", + "vlan": [ + { + "vid": 1, + "untagged": ["br0", "eth0"], + "tagged": ["eth1"], + "multicast": { + "snooping": true, + "querier": "auto" + }, + "multicast-filters": { + "multicast-filter": [ + { + "group": "239.1.1.1", + "ports": [ + { + "port": "eth0", + "state": "permanent" + } + ] + } + ] + } + } + ] + }, + "stp": { + "force-protocol": "rstp", + "hello-time": 2, + "forward-delay": 15, + "max-age": 20, + "transmit-hold-count": 6, + "max-hops": 20, + "cist": { + "priority": 32768, + "bridge-id": { + "priority": 32768, + "system-id": 0, + "address": "02:00:00:00:00:01" + }, + "root-id": { + "priority": 32768, + "system-id": 0, + "address": "02:00:00:00:00:01" + }, + "root-port": "eth0", + "topology-change": { + "count": 1, + "in-progress": false, + "port": "eth0", + "time": "2026-02-24T11:00:00+0000" + } + } + } + } +} + +**ietf-interfaces with WireGuard augmentation** (`wireguard.py`): +```json +{ + "name": "wg0", + "type": "infix-if-type:wireguard", + "infix-interfaces:wireguard": { + "peer-status": { + "peer": [ + { + "public-key": "aGVsbG8gd29ybGQ=", + "connection-status": "up", + "latest-handshake": "2026-02-24T12:00:00+00:00", + "endpoint-address": "192.168.1.1", + "endpoint-port": 51820, + "transfer": { + "tx-bytes": "123456", + "rx-bytes": "654321" + } + } + ] + } + } +} +``` + +**ietf-interfaces with WiFi augmentation** (`wifi.py`): + +WiFi output depends on the interface mode. In AP mode: +```json +{ + "name": "wlan0", + "type": "infix-if-type:wifi", + "infix-interfaces:wifi": { + "access-point": { + "ssid": "MyNetwork", + "stations": { + "station": [ + { + "mac": "02:00:00:00:00:05", + "signal": -45, + "rx_bitrate": 400.0, + "tx_bitrate": 866.7, + "connected_time": 3600, + "inactive_time": 100 + } + ] + } + } + } +} +``` + +In station (client) mode: +```json +{ + "name": "wlan0", + "type": "infix-if-type:wifi", + "infix-interfaces:wifi": { + "station": { + "ssid": "MyNetwork", + "signal-strength": -45, + "rx-speed": 400, + "tx-speed": 866, + "scan-results": [ + { + "bssid": "02:00:00:00:00:01", + "ssid": "MyNetwork", + "signal-strength": -42, + "encryption": ["WPA2-Personal"], + "channel": 36 + } + ] + } + } +} +``` + +**ietf-interfaces with VLAN augmentation** (`vlan.py`): +```json +{ + "name": "eth0.10", + "type": "infix-if-type:vlan", + "infix-interfaces:vlan": { + "tag-type": "ieee802-dot1q-types:c-vlan", + "id": 10, + "lower-layer-if": "eth0" + } +} +``` + +**ietf-interfaces with LAG augmentation** (`lag.py`): + +LACP mode: +```json +{ + "name": "bond0", + "type": "infix-if-type:lag", + "infix-interfaces:lag": { + "mode": "lacp", + "lacp": { + "mode": "active", + "rate": "fast", + "hash": "layer3-4", + "aggregator-id": 1, + "actor-key": 13, + "partner-key": 13, + "partner-mac": "02:00:00:00:00:03", + "system-priority": 65535 + }, + "link-monitor": { + "debounce": { + "up": 0, + "down": 0 + } + } + } +} +``` + +Static (non-LACP) mode: +```json +{ + "name": "bond0", + "type": "infix-if-type:lag", + "infix-interfaces:lag": { + "mode": "static", + "static": { + "mode": "balance-xor", + "hash": "layer3+4" + }, + "link-monitor": { + "debounce": { + "up": 0, + "down": 0 + } + } + } +} +``` + +**ietf-interfaces with LAG member augmentation** (`lag.py:lower()`): +```json +{ + "name": "eth0", + "infix-interfaces:lag-port": { + "lag": "bond0", + "state": "active", + "link-failures": 0, + "lacp": { + "aggregator-id": 1, + "actor-state": "AD", + "partner-state": "AD" + } + } +} +``` + +**ietf-interfaces with tunnel augmentation** (`tun.py`): +```json +{ + "name": "gre0", + "type": "infix-if-type:gre", + "infix-interfaces:gre": { + "local": "10.0.0.1", + "remote": "10.0.0.2" + } +} +``` + +**ietf-interfaces with veth augmentation** (`veth.py`): +```json +{ + "name": "veth0", + "type": "infix-if-type:veth", + "infix-interfaces:veth": { + "peer": "veth1" + } +} +``` + +**ietf-interfaces with container-network augmentation** (`container.py`): + +Container network interfaces include `interface_common()` fields plus +container-specific data. The `description` holds the kernel-internal +interface name while `name` is the user-facing container network name: +```json +{ + "type": "iana-if-type:ethernetCsmacd", + "name": "cni0", + "if-index": 42, + "admin-status": "up", + "oper-status": "up", + "phys-address": "02:00:00:00:00:01", + "description": "real-kernel-name", + "statistics": { + "in-octets": "1234", + "out-octets": "5678" + }, + "infix-interfaces:container-network": { + "containers": ["mycontainer"] + } +} +``` + +**ietf-routing** (`ietf_routing.py`): + +The routing module produces two ribs named `ipv4` and `ipv6`. The +`interfaces` list is populated with interfaces that have forwarding +enabled. Route fields use fully qualified YANG names: +```json +{ + "ietf-routing:routing": { + "interfaces": { + "interface": ["eth0", "eth1"] + }, + "ribs": { + "rib": [ + { + "name": "ipv4", + "address-family": "ipv4", + "routes": { + "route": [ + { + "ietf-ipv4-unicast-routing:destination-prefix": "192.168.1.0/24", + "source-protocol": "infix-routing:kernel", + "route-preference": 100, + "active": [null], + "last-updated": "2026-02-24T12:00:00+00:00", + "next-hop": { + "next-hop-list": { + "next-hop": [ + { + "ietf-ipv4-unicast-routing:address": "10.0.0.1", + "infix-routing:installed": [null] + } + ] + } + } + }, + { + "ietf-ipv4-unicast-routing:destination-prefix": "10.0.0.0/8", + "source-protocol": "direct", + "route-preference": 0, + "active": [null], + "last-updated": "2026-02-24T00:00:00+00:00", + "next-hop": { + "outgoing-interface": "eth0" + } + } + ] + } + }, + { + "name": "ipv6", + "address-family": "ipv6" + } + ] + } + } +} +``` + +**ietf-hardware** (`ietf_hardware.py`): + +The hardware module builds a component list from multiple sources: +mainboard, VPD, USB ports, hwmon sensors, thermal zones, WiFi radios, +and GPS receivers. Sensor data uses `value-type` + `value-scale` +instead of combined types: +```json +{ + "ietf-hardware:hardware": { + "component": [ + { + "name": "mainboard", + "class": "iana-hardware:chassis", + "mfg-name": "Kernelkit", + "serial-num": "ABC123", + "state": { + "admin-state": "unknown", + "oper-state": "enabled" + } + }, + { + "name": "cpu", + "class": "iana-hardware:sensor", + "sensor-data": { + "value": 42000, + "value-type": "celsius", + "value-scale": "milli", + "value-precision": 0, + "value-timestamp": "2026-02-24T12:00:00+00:00", + "oper-status": "ok" + } + }, + { + "name": "sfp0", + "class": "iana-hardware:module" + }, + { + "name": "sfp0-temperature", + "class": "iana-hardware:sensor", + "parent": "sfp0", + "sensor-data": { + "value": 35500, + "value-type": "celsius", + "value-scale": "milli", + "value-precision": 0, + "value-timestamp": "2026-02-24T12:00:00+00:00", + "oper-status": "ok" + } + }, + { + "name": "sfp0-voltage", + "class": "iana-hardware:sensor", + "parent": "sfp0", + "description": "Vcc", + "sensor-data": { + "value": 3300, + "value-type": "volts-DC", + "value-scale": "milli", + "value-precision": 0, + "value-timestamp": "2026-02-24T12:00:00+00:00", + "oper-status": "ok" + } + }, + { + "name": "usb1", + "class": "infix-hardware:usb", + "state": { + "admin-state": "unlocked", + "oper-state": "enabled" + } + }, + { + "name": "radio0", + "class": "infix-hardware:wifi", + "description": "WiFi Radio radio0", + "infix-hardware:wifi-radio": { + "bands": [{"band": "1", "name": "2.4 GHz", "ht-capable": true}], + "driver": "mt7915e", + "max-interfaces": {"ap": 4}, + "supported-channels": [1, 6, 11], + "num-virtual-interfaces": 1 + } + }, + { + "name": "gps0", + "class": "infix-hardware:gps", + "description": "GPS/GNSS Receiver", + "infix-hardware:gps-receiver": { + "device": "/dev/gps0", + "driver": "u-blox", + "activated": true, + "fix-mode": "3d", + "latitude": "57.708870", + "longitude": "11.974560", + "altitude": "45.2", + "satellites-visible": 12, + "satellites-used": 8, + "pps-available": true + } + } + ] + } +} +``` + +**ietf-system** (`ietf_system.py`): + +The system module splits output between `ietf-system:system` (config- +visible state) and `ietf-system:system-state` (operational). DNS +resolver includes both static and DHCP-learned servers with origin +tracking. NTP source state is under `infix-system:ntp`. Resource +usage includes filesystem utilization: +```json +{ + "ietf-system:system": { + "hostname": "infix", + "authentication": { + "user": [ + { + "name": "admin", + "password": "$6$...", + "infix-system:shell": "infix-system:clish", + "authorized-key": [ + { + "name": "admin-key-0", + "algorithm": "ssh-ed25519", + "key-data": "AAAA..." + } + ] + } + ] + }, + "clock": { + "timezone-name": "Europe/Stockholm" + } + }, + "ietf-system:system-state": { + "platform": { + "os-name": "Infix", + "os-version": "25.02.0", + "os-release": "20260224", + "machine": "x86_64" + }, + "clock": { + "current-datetime": "2026-02-24T12:00:00+00:00", + "boot-datetime": "2026-02-24T00:00:00+00:00" + }, + "infix-system:software": { + "compatible": "infix-x86_64", + "booted": "rootfs.0", + "slot": [], + "installer": {} + }, + "infix-system:ntp": { + "sources": { + "source": [ + { + "address": "192.168.1.1", + "mode": "server", + "state": "selected", + "stratum": 2, + "poll": 6 + } + ] + } + }, + "infix-system:services": { + "service": [ + { + "pid": 1234, + "name": "syslogd", + "status": "running", + "description": "System log daemon", + "statistics": { + "memory-usage": "4096", + "uptime": "86400", + "restart-count": 0 + } + } + ] + }, + "infix-system:dns-resolver": { + "options": { + "timeout": 5, + "attempts": 2 + }, + "server": [ + { + "address": "8.8.8.8", + "origin": "static" + }, + { + "address": "192.168.1.1", + "origin": "dhcp", + "interface": "eth0" + } + ], + "search": ["example.com"] + }, + "infix-system:resource-usage": { + "memory": { + "total": "4096000", + "free": "2048000", + "available": "3072000" + }, + "load-average": { + "load-1min": "0.15", + "load-5min": "0.10", + "load-15min": "0.05" + }, + "filesystem": [ + { + "mount-point": "/", + "size": "2097152", + "used": "524288", + "available": "1572864" + }, + { + "mount-point": "/var", + "size": "1048576", + "used": "262144", + "available": "786432" + }, + { + "mount-point": "/cfg", + "size": "65536", + "used": "4096", + "available": "61440" + } + ] + } + } +} +``` + +**ietf-ntp** (`ietf_ntp.py`): +```json +{ + "ietf-ntp:ntp": { + "associations": { + "association": [ + { + "address": "192.168.1.1", + "local-mode": "ietf-ntp:client", + "isconfigured": true, + "stratum": 2, + "prefer": true, + "reach": 255, + "poll": 6, + "now": 12, + "offset": "0.123", + "delay": "1.456", + "dispersion": "0.089" + } + ] + }, + "clock-state": { + "system-status": { + "clock-state": "ietf-ntp:synchronized", + "clock-stratum": 2, + "clock-refid": "GPS ", + "nominal-freq": "1000000000.0000", + "actual-freq": "1000000000.0012", + "clock-precision": -20, + "clock-offset": "0.001", + "root-delay": "1.234", + "root-dispersion": "0.567", + "reference-time": "2026-02-25T10:30:15.12Z", + "sync-state": "ietf-ntp:clock-synchronized", + "infix-ntp:last-offset": "0.000001234", + "infix-ntp:rms-offset": "0.000002345", + "infix-ntp:residual-freq": "0.012", + "infix-ntp:skew": "0.034", + "infix-ntp:update-interval": "64.0" + } + }, + "refclock-master": { + "master-stratum": 2 + }, + "port": 123, + "ntp-statistics": { + "packet-received": 1000, + "packet-dropped": 5, + "packet-sent": 950, + "packet-sent-fail": 0 + } + } +} +``` + +**ieee802-dot1ab-lldp** (`infix_lldp.py`): +```json +{ + "ieee802-dot1ab-lldp:lldp": { + "port": [ + { + "name": "eth0", + "dest-mac-address": "01:80:C2:00:00:0E", + "remote-systems-data": [ + { + "time-mark": 3600, + "remote-index": 1, + "chassis-id-subtype": "mac-address", + "chassis-id": "02:00:00:00:00:01", + "port-id-subtype": "interface-name", + "port-id": "eth0" + } + ] + } + ] + } +} +``` + +**infix-containers** (`infix_containers.py`): +```json +{ + "infix-containers:containers": { + "container": [ + { + "name": "mycontainer", + "id": "abc123def456", + "image": "docker.io/library/alpine:latest", + "image-id": "sha256:abc123", + "running": true, + "status": "Up 2 hours", + "command": "/bin/sh", + "network": { + "interface": [{"name": "podnet"}], + "publish": ["0.0.0.0:8080:80/tcp"] + }, + "resource-limit": { + "memory": "524288", + "cpu": 1000 + }, + "resource-usage": { + "memory": "32768", + "cpu": "2.50", + "block-io": { + "read": "1024", + "write": "512" + }, + "net-io": { + "received": "2048", + "sent": "1024" + }, + "pids": 5 + } + } + ] + } +} +``` + +**infix-dhcp-server** (`infix_dhcp_server.py`): +```json +{ + "infix-dhcp-server:dhcp-server": { + "statistics": { + "out-offers": 95, + "out-acks": 88, + "out-naks": 2, + "in-declines": 0, + "in-discovers": 100, + "in-requests": 90, + "in-releases": 10, + "in-informs": 3 + }, + "leases": { + "lease": [ + { + "expires": "2026-02-25T12:00:00+00:00", + "address": "192.168.1.100", + "phys-address": "02:00:00:00:00:01", + "hostname": "client1", + "client-id": "01:02:00:00:00:00:01" + } + ] + } + } +} +``` + +**infix-firewall** (`infix_firewall.py`): +```json +{ + "infix-firewall:firewall": { + "default": "public", + "logging": "off", + "lockdown": false, + "zone": [ + { + "name": "public", + "short": "Public", + "immutable": false, + "description": "For use in public areas", + "interface": ["eth0"], + "network": [], + "action": "reject", + "service": ["ssh", "dhcpv6-client"], + "port-forward": [ + { + "lower": 443, + "proto": "tcp", + "to": { + "addr": "192.168.2.10", + "port": 443 + } + } + ] + } + ], + "policy": [ + { + "name": "allow-host-ipv6", + "action": "accept", + "priority": -15000, + "ingress": ["HOST"], + "egress": ["ANY"] + } + ], + "service": [ + { + "name": "ssh", + "description": "Secure Shell", + "port": [ + { + "lower": 22, + "proto": "tcp" + } + ] + } + ] + } +} +``` + +**ietf-ospf** (`ietf_ospf.py`): +```json +{ + "ietf-routing:routing": { + "control-plane-protocols": { + "control-plane-protocol": [ + { + "type": "infix-routing:ospfv2", + "name": "default", + "ietf-ospf:ospf": { + "ietf-ospf:router-id": "10.0.0.1", + "ietf-ospf:address-family": "ipv4", + "ietf-ospf:areas": { + "ietf-ospf:area": [ + { + "ietf-ospf:area-id": "0.0.0.0", + "ietf-ospf:interfaces": { + "ietf-ospf:interface": [ + { + "name": "eth0", + "state": "dr", + "enabled": true, + "passive": false, + "interface-type": "broadcast", + "ietf-ospf:neighbors": { + "ietf-ospf:neighbor": [] + } + } + ] + } + } + ] + }, + "ietf-ospf:local-rib": { + "ietf-ospf:route": [ + { + "prefix": "192.168.1.0/24", + "route-type": "intra-area", + "metric": 10, + "next-hops": { + "next-hop": [ + { + "next-hop": "10.0.0.2" + } + ] + } + } + ] + } + } + } + ] + } + } +} +``` + +**ietf-rip** (`ietf_rip.py`): +```json +{ + "ietf-routing:routing": { + "control-plane-protocols": { + "ietf-routing:control-plane-protocol": [ + { + "type": "infix-routing:ripv2", + "name": "default", + "ietf-rip:rip": { + "distance": 120, + "default-metric": 1, + "timers": { + "update-interval": 30, + "invalid-interval": 180, + "flush-interval": 240 + }, + "interfaces": { + "interface": [ + { + "interface": "eth0", + "oper-status": "up", + "send-version": "2", + "receive-version": "2" + } + ] + }, + "ipv4": { + "routes": { + "route": [ + { + "ipv4-prefix": "192.168.50.0/24", + "metric": 2, + "route-type": "rip", + "next-hop": "192.168.50.2", + "interface": "eth0" + } + ] + }, + "neighbors": { + "neighbor": [ + { + "ipv4-address": "192.168.50.2", + "bad-packets-rcvd": 0, + "bad-routes-rcvd": 0 + } + ] + } + }, + "num-of-routes": 1 + } + } + ] + } + } +} +``` + +**ietf-bfd-ip-sh** (`ietf_bfd_ip_sh.py`): +```json +{ + "ietf-routing:routing": { + "control-plane-protocols": { + "control-plane-protocol": [ + { + "type": "infix-routing:bfdv1", + "name": "bfd", + "ietf-bfd:bfd": { + "ietf-bfd-ip-sh:ip-sh": { + "sessions": { + "session": [ + { + "interface": "eth0", + "dest-addr": "10.0.0.2", + "local-discriminator": 1, + "remote-discriminator": 2, + "session-running": { + "local-state": "up", + "remote-state": "up", + "local-diagnostic": "none", + "detection-mode": "async-without-echo", + "negotiated-rx-interval": 300000, + "negotiated-tx-interval": 300000, + "detection-time": 900000 + }, + "path-type": "ietf-bfd-types:path-ip-sh", + "ip-encapsulation": true + } + ] + } + } + } + } + ] + } + } +} +``` + +#### 2.5.3 Structural Rules + +The following rules govern the JSON output format. These MUST be followed by all Go collectors. + +1. **Module-prefixed top-level keys (RFC 7951).** Every top-level key in the returned JSON object uses the YANG module name as a prefix, separated by a colon: `"ietf-interfaces:interfaces"`, `"ietf-routing:routing"`, `"infix-firewall:firewall"`. This is mandated by RFC 7951 section 4 for module-qualified names. + +2. **Module-prefixed augmentation keys.** When a YANG augmentation from a different module adds nodes to a container, the augmented nodes use the augmenting module's prefix: `"ietf-ip:ipv4"`, `"infix-interfaces:bridge"`, `"ieee802-ethernet-interface:ethernet"`, `"infix-system:shell"`, `"infix-routing:area-id"`, `"ietf-ospf:ospf"`. Nodes from the same module as their parent do NOT carry a prefix (e.g., `"name"`, `"type"`, `"oper-status"` inside `ietf-interfaces`). + +3. **YANG lists are JSON arrays.** Every YANG `list` is encoded as a JSON key whose value is an array of objects. The key name is the YANG list name: `"interface": [...]`, `"component": [...]`, `"lease": [...]`, `"route": [...]`, `"association": [...]`, `"session": [...]`. + +4. **YANG leaf-lists are JSON arrays of scalars.** YANG `leaf-list` nodes are encoded as arrays of strings or numbers: `"boot-order": ["rootfs.0", "rootfs.1"]`, `"containers": ["mycontainer"]`. + +5. **Presence containers as `[null]`.** YANG presence containers (containers whose mere existence carries semantic meaning) are encoded as `[null]` per RFC 7951 section 6.9. Example from `wifi.py`: `"active": [null]`. + +6. **Large counters as strings.** Any YANG type that can exceed 32-bit range (`uint64`, `counter64`, `yang:gauge64`, `yang:zero-based-counter64`) MUST be encoded as a JSON string, not a number. JavaScript/JSON numbers lose precision beyond 2^53. Examples: `"in-octets": "123456789012"`, `"tx-bytes": "123456"`, `"memory-usage": "4096"`, `"size": "12345678"`. + +7. **Decimal values with fixed fraction digits.** YANG `decimal64` types and certain formatted numeric strings must use a fixed number of fraction digits matching the YANG type definition. Examples from NTP: `"offset": "0.123"` (3 fraction digits), `"nominal-freq": "1000000000.0000"` (4 fraction digits), `"infix-ntp:last-offset": "0.000000001"` (9 fraction digits). CPU percentage: `"cpu": "2.50"` (2 fraction digits). + +8. **Boolean values are JSON booleans.** YANG `boolean` leaves are encoded as JSON `true` or `false`, not strings: `"running": true`, `"masquerade": false`, `"immutable": true`, `"passive": false`. + +9. **Integer values are JSON numbers.** YANG integer types (`int8`, `int16`, `int32`, `uint8`, `uint16`, `uint32`) are encoded as JSON numbers: `"if-index": 2`, `"vid": 1`, `"stratum": 2`, `"priority": 32767`. Exception: `uint64` and `counter64` are strings (rule 6). + +10. **YANG identity references use module-prefixed strings.** YANG `identityref` values include the defining module's prefix: `"ietf-ntp:client"`, `"infix-routing:ospfv2"`, `"infix-if-type:ethernet"`, `"ietf-bfd-types:path-ip-sh"`, `"ieee802-dot1q-types:c-vlan"`, `"infix-system:bash"`. + +11. **YANG enumeration values are lowercase strings.** YANG `enumeration` values are encoded as their enum string: `"oper-status": "up"`, `"duplex": "full"`, `"state": "active"`, `"action": "reject"`. + +12. **Empty or absent containers are omitted.** If a collector has no data for an optional container, it omits the key entirely rather than including an empty object. Exception: containers that serve as structural anchors (e.g., `"ietf-ospf:neighbors": {}`) may be included empty when required by the YANG schema for list parent nodes. + +13. **Timestamps use YANG `date-and-time` format.** All timestamps follow RFC 3339 / ISO 8601 with timezone offset using colon separator: `"2026-02-24T12:00:00+00:00"`. The Python `YangDate` class in `common.py` formats this as `strftime("%Y-%m-%dT%H:%M:%S%z")` with a colon inserted in the timezone offset. + +14. **The `insert()` helper pattern.** The Python code uses `common.insert(obj, *path_and_value)` to build nested structures. This is equivalent to creating nested dicts along a path. Go collectors should build the equivalent nested `map[string]interface{}` structure directly. + +#### 2.5.4 Field Transformation Reference + +The following table documents key transformations from Linux data sources to YANG JSON keys. Go collectors must replicate these transformations exactly. + +| Linux Source | Linux Field | YANG JSON Key | Transform | Example | +|-------------|-------------|---------------|-----------|---------| +| `ip -j link show` | `ifname` | `"name"` | Direct copy | `"eth0"` | +| `ip -j link show` | `link_type` + `info_kind` | `"type"` | `iplink2yang_type()` map | `"infix-if-type:ethernet"` | +| `ip -j link show` | `operstate` | `"oper-status"` | Lowercase | `"up"`, `"down"` | +| `ip -j link show` | `ifindex` | `"if-index"` | Direct copy (int) | `2` | +| `ip -j link show` | `address` | `"phys-address"` | Direct copy | `"02:00:00:00:00:01"` | +| `ip -j link show` | `mtu` | `"mtu"` | Direct copy (int) | `1500` | +| `ip -s -j link show` | `stats64.rx.bytes` | `"in-octets"` | `str()` (string) | `"123456789012"` | +| `ip -s -j link show` | `stats64.tx.bytes` | `"out-octets"` | `str()` (string) | `"987654321098"` | +| `ip -s -j link show` | `stats64.rx.packets` | `"in-unicast-pkts"` | `str()` (string) | `"12345"` | +| `ip -s -j link show` | `stats64.tx.packets` | `"out-unicast-pkts"` | `str()` (string) | `"67890"` | +| `ip -j addr show` | `local` | `"ip"` (in address list) | Direct copy | `"192.168.1.1"` | +| `ip -j addr show` | `prefixlen` | `"prefix-length"` | Direct copy (int) | `24` | +| `ethtool --json ` | `speed` | `"speed"` | Mbps string | `"1000"` | +| `ethtool --json ` | `duplex` | `"duplex"` | Lowercase | `"full"` | +| `ethtool --json -S ` | group counters | `"statistics"."frame".*` | `str()` for uint64 | `"12345"` | +| `wg show dump` | `rx_bytes` | `"rx-bytes"` | `str()` (string) | `"654321"` | +| `wg show dump` | `tx_bytes` | `"tx-bytes"` | `str()` (string) | `"123456"` | +| `wg show dump` | `latest_handshake` | `"latest-handshake"` | RFC 3339 timestamp | `"2026-02-24T12:00:00+00:00"` | +| `/proc/meminfo` | `MemTotal` | `"total"` | KiB as string | `"4096000"` | +| `/proc/loadavg` | fields 0-2 | `"load-1min"` etc. | Direct copy (string) | `"0.15"` | +| chrony cmdmon protocol (sources) | fields 2-9 | `"address"`, `"stratum"`, etc. | Typed Go struct | see `ietf_ntp.py` | +| `vtysh -c 'show ip ospf ...'` | JSON fields | `"ietf-ospf:*"` | Module-prefixed keys | see `ietf_ospf.py` | +| `lldpcli show neighbors -f json` | `interface.*` | `"port"` list | Restructure per-port | see `infix_lldp.py` | +| firewalld D-Bus | zone/policy/service | `"infix-firewall:firewall"` | D-Bus to YANG map | see `infix_firewall.py` | +| `podman ps/inspect/stats` | container fields | `"infix-containers:containers"` | Restructure + cgroup parse | see `infix_containers.py` | + +#### 2.5.5 Validation Strategy + +To ensure yangerd's JSON output is correct and schema-compliant: + +1. **Golden-file tests.** For each YANG module, golden reference files capture the expected JSON structure from a known-good system state (`golden/.json`). The Go integration tests compare yangerd's output against these golden files using a structural JSON diff (ignoring value differences for volatile fields like timestamps and counters, but requiring identical key structure and nesting). Golden files are committed to the repository and updated when YANG models change or output format is intentionally modified. + +2. **libyang validation.** The Go integration tests pass yangerd's output through `lyd_parse_data_mem()` (via a small C test harness or by running `yanglint`) to verify that libyang accepts the JSON. This is the ultimate acceptance criterion -- if libyang rejects the output, it is wrong regardless of what the golden file says. + +3. **Replay-based testing.** yangerd supports a replay mode (reading captured `ip`, `ethtool`, `iw`, etc. output from files) to enable deterministic testing. This allows byte-exact comparison of output for the same input data across test runs. + +4. **Per-module smoke tests.** Each Go collector function has unit tests that verify the JSON key structure for representative inputs. These tests assert the presence of module-prefixed keys, correct list-vs-object encoding, and string-vs-number encoding for counter types. +--- + +## 3. Architecture Overview + +### 3.1 Component Diagram + +``` + ┌─────────────────────────────────────────────────────────────────┐ + │ yangerd (Go) │ + │ │ + netlink subs ────────►│ NLMonitor (event dispatcher) in-memory YANG tree │ + (vishvananda/netlink) │ │ ┌──────────────────┐ │ + LinkUpdate ch │ ▼ │ │ │ + AddrUpdate ch │ ip -json -batch - ◄────────► │ per-model │ │ + NeighUpdate ch │ (persistent query │ RWMutex-guarded │ │ + │ subprocess) │ subtrees │ │ + │ │ │ │ + bridge netlink ──────►│ bridge event dispatcher │ /ietf-interfaces │ │ + (NeighSub + LinkSub │ │ │ /ietf-routing │ │ + + raw RTNLGRP_MDB) │ ▼ │ /ietf-hardware │ │ + │ bridge -json -batch - ◄────► │ /ietf-system │ │ + │ (persistent query │ ... │ │ + │ subprocess) │ │ │ + │ └────────┬─────────┘ │ + iw event -t ─────────►│ wifi event dispatcher │ │ + (802.11 station, │ │ │ │ + auth, scan, │ ▼ │ │ + channel events) │ iw dev info/station ────────────►│ │ + │ (text parse + re-query) │ │ + │ │ │ + D-Bus Monitor ────────►│ ┌──────────────┐ │ │ + (godbus/dbus/v5) │ │ dbusmonitor │──────────────────────►│ │ + dnsmasq signals │ │ (reactive │ │ │ + (DHCPLease*) │ │ D-Bus sigs) │ │ │ + firewalld signals │ └──────────────┘ │ │ + (Reloaded, │ │ │ + NameOwnerChanged) │ │ │ + │ │ │ + File Watcher ────────────▶│ ┌──────────────┐ │ │ + (fsnotify/inotify) │ │ fswatcher │──────────────────────▶│ │ + /proc/sys forwarding │ │ (reactive │ │ │ + │ │ file I/O) │ │ │ + │ │ │ + ethtool genetlink ───►│ ┌──────────────┐ │ │ + ETHNL_MCGRP_MONITOR │ │ ethmonitor │──────────────────────►│ │ + (speed, duplex, │ │ (reactive │ │ │ + autoneg NTFs) │ │ genetlink) │ │ │ + │ └──────────────┘ │ │ + │ │ │ + ZAPI (zserv) ────────►│ ┌──────────────┐ │ │ + /var/run/frr/ │ │ zapiwatcher │──────────────────────►│ │ + zserv.api │ │ (streaming │ │ │ + (REDISTRIBUTE_ │ │ ZAPI v6) │ │ │ + ROUTE_ADD/DEL) │ └──────────────┘ │ │ + │ │ │ + Supplementary ───────►│ ┌──────────────┐ │ │ + ethtool stats poll │ │ ethtool coll │──────────────────────►│ │ + vtysh CLI (OSPF/RIP) │ │ vtysh coll │──────────────────────►│ │ + wgctrl WireGuard │ │ wgctrl coll │──────────────────────►│ │ + /proc/sys polling │ └──────────────┘ │ │ + │ IPC server ◄───────────────┘ │ + │ /run/yangerd.sock │ + │ SOCK_STREAM, ver(1) + 4-byte BE length + JSON │ + └─────────────────────────────────────────────────────────────────┘ + ▲ + │ yangerd_query(path) + │ Unix socket read/write + ┌─────────────────┴───────────────────────────────────┐ + │ statd (C daemon) │ + │ │ + │ 13 x sr_oper_get_subscribe() │ + │ on callback: │ + │ ly_add_yangerd_data() │ + │ -> yangerd_query(path) -- primary path │ + │ │ + │ lyd_parse_data_mem(ctx, buf) │ + └──────────────────────────────────┬──────────────────┘ + │ + ┌────────────────▼────────────────┐ + │ sysrepo / libyang │ + │ operational datastore │ + │ NETCONF / RESTCONF consumers │ + └─────────────────────────────────┘ +``` + +### 3.2 Data Flow Diagrams + +#### 3.2.1 Netlink Reactive Path (Event to Tree) +``` +Kernel NLMonitor event dispatcher ip batch ethmonitor Tree + | | | | | | + |--RTM_NEWLINK----->| | | | | + | |---LinkUpdate-->| | | | + | | | | | | + | | | === Full Interface Re-read (3 queries) ===| + | | |---link show--->| | | + | | |<--JSON resp----| | | + | | |--s link show-->| | | + | | |<--JSON resp----| | | + | | |---addr show--->| | | + | | |<--JSON resp----| | | + | | | | | | + | | |---tree.Set(link+stats+addr)-------------->| + | | | | | | + | | | === Cross-subsystem ethtool re-query === | + | | |---RefreshInterface()------->| | + | | | | etClient.LinkInfo() | + | | | | etClient.LinkMode() | + | | | |---tree.Set(ethernet)------>| + | | | | | | + +Note: Netlink event types link, addr, and neigh use the event as a trigger to re-read +full current state via ip batch. Route data is sourced exclusively from the ZAPI +watcher's streaming connection to zebra (see Section 4.1octies) -- yangerd does not +subscribe to netlink route groups. For link events specifically, the re-read is a +3-query set plus ethtool cross-trigger. +``` + +#### 3.2.2 Statd Query Path (Request to Response) +``` +statd (sysrepo cb) yangerd IPC Server In-Memory Tree + | | | + |---Length + JSON Req-->| | + | |---tree.Get(path)---->| + | | (Read Lock) | + | |<-------JSON Blob-----| + |<--Length + JSON Resp--| | +``` + + +#### 3.2.4 File Watcher Reactive Path (File Change to Tree) +``` +Kernel fsnotify fswatcher os.ReadFile Tree + | | | | | + |--inotify event--->| | | | + | |---IN_MODIFY--->| | | + | | |--debounce 200ms | + | | |---read file-->| | + | | | |---file data->| + | | |---tree.Set()---------------->| + | | | | | +``` + +Note: The fswatcher monitors procfs forwarding flags (`/proc/sys/net/ipv4/conf/*/forwarding`), `/etc/hostname`, and `/etc/localtime` (via `WatchSymlink`) using inotify. DHCP lease file watching has been moved to the D-Bus Monitor Subsystem (Section 3.2.8), which reacts to dnsmasq D-Bus signals instead of inotify file events. + +#### 3.2.5 Bridge Monitor Reactive Path (Bridge Event to Tree) +``` + +Kernel bridge netlink bridge dispatcher bridge batch Tree + | | | | | + |--RTNL bridge evt->| | | | + | |---NL event---->| | | + | | |---query cmd-->| | + | | | |---JSON resp->| + | | |---tree.Set()---------------->| + | | | | | +``` + +#### 3.2.6 IW Event Reactive Path (802.11 Event to Tree) +``` +Kernel iw event -t wifi dispatcher exec iw dev Tree + | | | | | + |--nl80211 event--->| | | | + | |---text line--->| | | + | | |---parse line | | + | | |---exec iw---->| | + | | | |---text out-->| + | | |---parse + tree.Set()-------->| + | | | | | +``` + +Note: Unlike the core netlink subscriptions (which receive typed Go structs via `vishvananda/netlink` channels) and bridge netlink events, `iw event` produces human-readable text, not structured data. The wifi dispatcher must parse each text line to extract the event type and interface name, then run `iw dev info` / `iw dev station dump` and parse their text output into structured JSON for tree storage. This adds a text-parsing layer absent from the netlink reactive paths. + +#### 3.2.7 Ethtool Netlink Reactive Path (Settings Change to Tree) +``` +Kernel genetlink ethmonitor ethtool.Client Tree + | | | | | + |--ETHTOOL_MSG_*_NTF>| | | | + | |---NTF message->| | | + | | |---parse cmd | | + | | |---LinkInfo()--->| | + | | | |---JSON resp->| + | | |---tree.Set()---------------->| + | | | | | +``` + +#### 3.2.8 D-Bus Monitor Reactive Path (D-Bus Signal to Tree) +``` +D-Bus Daemon godbus/dbus/v5 dbusmonitor re-read Tree + | | | | | + |--D-Bus signal---->| | | | + | |---signal msg-->| | | + | | |---dispatch | | + | | |---re-read---->| | + | | | |---data------>| + | | |---tree.Set()---------------->| + | | | | | +``` + +Note: The D-Bus Monitor subscribes to signals from dnsmasq (`uk.org.thekelleys.dnsmasq`, signals `DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`) and firewalld (`org.fedoraproject.FirewallD1`, signal `Reloaded`; plus `org.freedesktop.DBus.NameOwnerChanged` for service restart detection). On dnsmasq signals, the monitor re-reads `/var/lib/misc/dnsmasq.leases` and calls `GetMetrics()` via D-Bus method call. On firewalld signals, the monitor re-reads firewall state via firewalld D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). This follows the same event-as-trigger pattern used by the netlink and bridge subsystems. + +Note: Unlike the ip/bridge/iw subsystems, the ethtool netlink monitor is NOT a subprocess. It is a native Go genetlink socket subscription using `mdlayher/genetlink`. The `EthMonitor` goroutine joins the `"monitor"` multicast group of the `"ethtool"` genetlink family and receives `_NTF` notification messages directly. On receiving `ETHTOOL_MSG_LINKMODES_NTF` or `ETHTOOL_MSG_LINKINFO_NTF`, it re-queries the affected interface via `ethtool.Client.LinkInfo()` and `ethtool.Client.LinkMode()` to obtain updated speed, duplex, and auto-negotiation state. Statistics (counters) have no `_NTF` message type and remain polling-based via the ethtool collector. Importantly, `ETHNL_MCGRP_MONITOR` does **NOT** fire on link up/down events — only on explicit settings renegotiation. To close this gap, the link event handler calls `ethmonitor.RefreshInterface()` on every RTM_NEWLINK, triggering an ethtool re-query for the affected interface. + +### 3.3 Component Responsibilities + +| Component | Responsibility | +|-----------|----------------| +| **NLMonitor** (netlink event subscriptions) | Native Go netlink subscriptions via `vishvananda/netlink` (`LinkSubscribeWithOptions`, `AddrSubscribeWithOptions`, `NeighSubscribeWithOptions`) for kernel events; persistent `ip -json -force -batch -` subprocess for state queries; event dispatch and tree updates. Link, addr, and neigh events trigger full re-reads of the affected state via ip batch. Route events are **not** handled by NLMonitor -- route data is sourced exclusively from the ZAPI watcher (Section 4.1octies). For link events: 3-query full interface re-read via ip batch + `ethmonitor.RefreshInterface()`. For addr/neigh: single-query re-read of the affected subtree via ip batch. | +| **File Watcher Subsystem** | Watches selected procfs paths (IP forwarding flags) via Linux inotify; triggers re-read and tree update on file change; falls back to polling for pseudo-filesystem paths that do not support inotify. Note: sysfs sensor files (`/sys/class/hwmon`, `/sys/class/thermal`) do NOT emit inotify events and are handled by the hardware polling collector instead. STP bridge port state is handled reactively via netlink events (Bridge Monitor Subsystem), not via fswatcher. DHCP lease file watching is handled by the D-Bus Monitor Subsystem, not via fswatcher. | +| **Bridge Netlink / Bridge Batch Subsystem** | Netlink events trigger full bridge state re-reads via the persistent `bridge -json -batch -` subprocess. FDB entries arrive via `NeighSubscribeWithOptions` (entries with `NDA_MASTER` flag); VLAN and STP port state changes arrive via `LinkSubscribeWithOptions`; MDB events arrive via raw netlink subscription to `RTNLGRP_MDB` (group 26). Each event triggers the appropriate `bridge -json -batch -` query (`fdb show`, `vlan show`, `mdb show`) to re-read full state. STP root/topology data is re-queried whenever a port state change event arrives. | +| **IW Event Monitor Subsystem** | Persistent `iw event -t` subprocess for 802.11 wireless events (station associations, disconnections, channel switches, regulatory changes); triggers re-query of WiFi state via short-lived `iw dev info` and `iw dev station dump` commands. Enabled only when `YANGERD_ENABLE_WIFI=true` (set by Buildroot when WiFi support is included in the build). When enabled, the `iw` binary is guaranteed present on the target. | +| **Ethtool Netlink Monitor Subsystem** | Native Go genetlink subscription to the `"ethtool"` family's `"monitor"` multicast group (`ETHNL_MCGRP_MONITOR`); receives `ETHTOOL_MSG_LINKMODES_NTF` and `ETHTOOL_MSG_LINKINFO_NTF` notifications for speed, duplex, and auto-negotiation changes; re-queries via `ethtool.Client.LinkInfo()` + `ethtool.Client.LinkMode()`. Also exposes `RefreshInterface()` for cross-subsystem use by the link event handler (RTM_NEWLINK), since `ETHNL_MCGRP_MONITOR` does NOT fire on link up/down. Not a subprocess — runs as a goroutine with a genetlink socket. | +| **ZAPI Watcher Subsystem** | Persistent streaming connection to FRRouting's zebra daemon via the zserv Unix socket (`/var/run/frr/zserv.api`), using ZAPI protocol v6. Subscribes to route redistribution for kernel, connected, static, OSPF, and RIP route types. Receives incremental `REDISTRIBUTE_ROUTE_ADD` and `REDISTRIBUTE_ROUTE_DEL` messages and updates the in-memory tree. Handles zebra restarts with automatic reconnection and exponential backoff. **Sole source** for route table data -- replaces `vtysh` for route collection. See Section 4.1octies. | +| **D-Bus Monitor Subsystem** | Subscribes to D-Bus signals from dnsmasq and firewalld via `godbus/dbus/v5`. dnsmasq signals (`DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`) trigger re-read of the lease file (`/var/lib/misc/dnsmasq.leases`) and a `GetMetrics()` D-Bus method call for DHCP packet counters. firewalld signals (`Reloaded`, plus `NameOwnerChanged` for restart detection) trigger re-read of firewall state via firewalld D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). Follows the event-as-trigger pattern: D-Bus signals provide the notification, actual data is re-read from the canonical source (firewalld D-Bus API). See Section 4.1novies. | +| **LLDP Monitor Subsystem** | Persistent `lldpcli -f json0 watch` subprocess for reactive LLDP neighbor discovery. Receives pretty-printed JSON objects (blank-line delimited) with root keys `lldp-added`, `lldp-updated`, `lldp-deleted` — each carrying the full neighbor payload. Parses the stream using brace-depth counting, extracts neighbor data, and replaces the `ieee802-dot1ab-lldp:lldp` subtree in the in-memory tree. Uses lldpd's own CLI rather than reimplementing LLDP — lldpd is the system's LLDP authority. See Section 4.1decies. | +| **mDNS Monitor Subsystem** | Subscribes to Avahi's D-Bus API (`org.freedesktop.Avahi`) for reactive mDNS/DNS-SD neighbor discovery. Uses the existing `godbus/dbus/v5` dependency to interact with Avahi's `ServiceTypeBrowser`, `ServiceBrowser`, and `ServiceResolver` objects. Updates `/infix-services:mdns/neighbors` on add/update/remove signals. Uses Avahi rather than a standalone mDNS library — Avahi is already running on the system and is the canonical mDNS authority. See Section 4.1undecies. | +| **Collectors** | Poll external sources (vtysh for OSPF/RIP/BFD protocol data, chrony cmdmon for NTP, `podman` CLI for container state, sysfs for hardware sensors) at configured intervals. Container collection invokes `podman ps`, `podman inspect`, and `podman stats` for runtime data; lifecycle events (start/stop/create/remove) are candidates for reactive monitoring via `podman events --format json` in Phase 2. Route table collection is **not** performed by collectors -- it is handled by the ZAPI watcher. DHCP and firewall data are handled reactively by the D-Bus Monitor Subsystem, not by polling collectors. | +| **In-Memory Tree** | Thread-safe storage of pre-serialized YANG JSON subtrees. | +| **IPC Server** | Handle Unix socket requests from `statd` and `yangerctl`. | +| **statd (C)** | Bridge sysrepo to `yangerd`. | +| **yangerctl** | CLI interface for monitoring and debugging `yangerd`. | + +--- + +## 4. Detailed Design + +### 4.1 Netlink Monitor Subsystem + +`yangerd` manages THREE persistent subprocesses plus two native netlink subsystems: +1. `ip -json -batch -` -- data queries (yangerd writes commands to stdin, reads JSON arrays from stdout) +2. `bridge -json -batch -` -- bridge state queries (see [4.1quater](#41quater-bridge-monitor-subsystem)) +3. `iw event -t` -- 802.11 wireless event notification (see [4.1quinquies](#41quinquies-iw-event-monitor-subsystem)) +4. **Netlink event subscriptions** -- native Go netlink subscriptions via `vishvananda/netlink` (`LinkSubscribeWithOptions`, `AddrSubscribeWithOptions`, `NeighSubscribeWithOptions`) for link, address, and neighbor events. Bridge FDB events arrive via `NeighSubscribeWithOptions` (FDB entries are neighbor-like with `NDA_MASTER`); bridge VLAN changes via `LinkSubscribeWithOptions`; bridge MDB via raw netlink subscription to `RTNLGRP_MDB`. These are not subprocesses -- they are goroutines reading from typed Go channels. yangerd does NOT subscribe to netlink route groups (`RTNLGRP_IPV4_ROUTE`, `RTNLGRP_IPV6_ROUTE`) -- route data is sourced exclusively from the ZAPI watcher (Section 4.1octies). +5. **Ethtool genetlink monitor** -- native Go genetlink socket subscription to `ETHNL_MCGRP_MONITOR` for ethtool settings change notifications (see [4.1sexies](#41sexies-ethtool-netlink-monitor-subsystem)). This is not a subprocess -- it is a goroutine that opens a genetlink socket, joins the `"monitor"` multicast group, and calls `Receive()` in a loop. + +When a netlink event arrives on any subscription channel (e.g., `LinkUpdate` for a state change on eth0), the NLMonitor goroutine: +1. Extracts the affected entity from the typed Go struct (interface name from `update.Link.Attrs().Name`, address from `update.LinkAddress`, etc.) +2. **For link events (RTM_NEWLINK/RTM_DELLINK)**: writes a **full interface re-read** set of commands to `ip batch` stdin -- `link show dev eth0\n`, `-s link show dev eth0\n` (with stats), and `addr show dev eth0\n` -- to obtain the entire interface state atomically. This ensures all interface data in the tree is coherent at a single point in time. Additionally, the dispatcher calls `ethmonitor.RefreshInterface("eth0")` to re-query ethtool settings (speed/duplex/autoneg), because the ethtool genetlink monitor (`ETHNL_MCGRP_MONITOR`) does NOT emit notifications on link up/down events. On RTM_DELLINK, the re-read returns empty/error, causing the interface subtree to be removed from the tree. +3. **For address events (RTM_NEWADDR/RTM_DELADDR)**: writes `addr show dev ` to `ip batch` stdin and replaces the entire address subtree for that interface. Both add and remove events trigger the same re-read -- the result after a delete simply omits the removed address. +4. **For neighbor events (RTM_NEWNEIGH/RTM_DELNEIGH)**: writes `neigh show dev ` to `ip batch` stdin and replaces the neighbor subtree for that interface. +5. Reads the JSON array response(s) from `ip batch` stdout (one `[...]` per command, one per line) +6. Calls `tree.Set()` with the parsed JSON to replace the affected subtree + +#### 4.1.2 Netlink Subscription Event Channels +- **Link events**: `netlink.LinkSubscribeWithOptions(linkCh, ctx.Done(), LinkSubscribeOptions{ErrorCallback: errCb})` -- receives `LinkUpdate` structs containing `Link.Attrs().Name`, `Link.Attrs().OperState`, etc. +- **Address events**: `netlink.AddrSubscribeWithOptions(addrCh, ctx.Done(), AddrSubscribeOptions{ErrorCallback: errCb})` -- receives `AddrUpdate` structs containing `LinkAddress`, `LinkIndex`, `NewAddr` bool +- **Neighbor events**: `netlink.NeighSubscribeWithOptions(neighCh, ctx.Done(), NeighSubscribeOptions{ErrorCallback: errCb})` -- receives `NeighUpdate` structs containing `Neigh`, event `Type` (RTM_NEWNEIGH/RTM_DELNEIGH) +- **Bridge FDB events**: arrive on the neighbor channel (`NeighSubscribeWithOptions`) -- FDB entries have `NDA_MASTER` flag and are distinguishable from ARP/NDP neighbors. Used as trigger only; full FDB state re-read via `bridge -json -batch -`. +- **Bridge VLAN events**: arrive on the link channel (`LinkSubscribeWithOptions`) -- VLAN attributes are carried on link update messages. Used as trigger only; full VLAN state re-read via `bridge -json -batch -`. +- **Bridge MDB events**: require raw netlink socket subscribed to `RTNLGRP_MDB` (multicast group 26) -- `vishvananda/netlink` does not expose a dedicated MDB subscription API. Used as trigger only; full MDB state re-read via `bridge -json -batch -`. +- **Bridge STP events**: STP port state changes arrive as `RTM_NEWLINK` events carrying `IFLA_BRPORT_STATE` in `IFLA_PROTINFO` on the link channel. The link event handler detects bridge port events and triggers a bridge batch re-query for STP state. STP root/topology data is not proactively notified by the kernel, so it is re-read from the bridge device via batch whenever a port state change is detected. +- Events include: link up/down, address add/remove, neighbor add/remove, bridge FDB/VLAN/MDB/STP changes +- **Event-as-trigger pattern**: All netlink events -- link, addr, neigh, and bridge (FDB, VLAN, MDB, STP) -- use the event only to identify WHAT entity changed, then re-read the FULL current state via `ip batch` or `bridge batch`. Route data is sourced exclusively from the ZAPI watcher's streaming connection to zebra (Section 4.1octies). The event content itself is not parsed for data. This applies equally to RTM_NEW* and RTM_DEL* events; a delete event triggers a re-read that returns the state without the deleted entity. +- **Full interface re-read on RTM_NEWLINK/RTM_DELLINK**: When a link event arrives on `linkCh`, the NLMonitor writes a full set of three queries to ip batch (`link show dev `, `-s link show dev `, `addr show dev `) and updates the entire YANG subtree for that interface. This ensures consistency -- all interface data (flags, MTU, operstate, statistics, addresses) is captured at a single coherent point in time. On RTM_DELLINK, the re-read returns empty/error, causing the interface subtree to be removed. +- **Full address re-read on RTM_NEWADDR/RTM_DELADDR**: When an address event arrives on `addrCh`, the NLMonitor writes `addr show dev ` to ip batch and replaces the entire address subtree for that interface. Both add and remove produce the same re-read; after a remove, the result simply omits the deleted address. +- **Full neighbor re-read on RTM_NEWNEIGH/RTM_DELNEIGH**: When a neighbor event arrives on `neighCh`, the NLMonitor writes `neigh show dev ` to ip batch and replaces the neighbor subtree for that interface. +- **Cross-subsystem ethtool trigger**: RTM_NEWLINK link events also trigger `ethmonitor.RefreshInterface()` to re-query ethtool data (speed, duplex, auto-negotiation). This is necessary because `ETHNL_MCGRP_MONITOR` does NOT fire notifications when a link goes up/down -- only when settings are explicitly renegotiated. + +#### 4.1.3 ip batch Query Engine +- Started as: `ip -json -batch -` (reads commands from stdin, `-` means stdin) +- Flag order matters: `-json` MUST come before `-batch` +- Each command written to stdin produces one JSON array `[...]` on stdout +- Use `-force` flag (`ip -json -force -batch -`) so errors don't abort the batch process — it continues past failed commands +- Error output goes to stderr (e.g., "Device does not exist"); JSON goes to stdout — clean separation +- Example commands written to stdin: `link show dev eth0`, `-s link show dev eth0` (with stats), `addr show dev eth0`, `neigh show` +- **Full interface re-read set** (written atomically on RTM_NEWLINK for interface eth0): + ``` + link show dev eth0 + -s link show dev eth0 + addr show dev eth0 + ``` + This produces three JSON array responses on stdout (one per line). The first gives link state (flags, MTU, operstate, qdisc, etc.), the second adds hardware counters (rx/tx bytes/packets/errors/dropped), and the third gives all IPv4/IPv6 addresses. Together they provide the complete interface snapshot needed to update the entire YANG subtree. +- **Address re-read** (written on RTM_NEWADDR or RTM_DELADDR for interface eth0): `addr show dev eth0` — single query, single JSON response replacing the full address subtree. +- **Route data**: Route data is NOT queried via `ip batch`. Route data is sourced exclusively from the ZAPI watcher's streaming connection to zebra's zserv socket (see Section 4.1octies). yangerd does not subscribe to netlink route groups. +- **Neighbor re-read** (written on RTM_NEWNEIGH or RTM_DELNEIGH for interface eth0): `neigh show dev eth0` — single query, single JSON response replacing the neighbor subtree. +- For bridge data: a separate `bridge -json -batch -` subprocess (see [4.1quater](#41quater-bridge-monitor-subsystem)) + +#### 4.1.4 Initial State Dump +- On startup, before subscribing to netlink events, `yangerd` populates the tree from three sources: + - **ip batch** (link, address, neighbor data): + - `link show` (all links) + - `-s link show` (all links with stats) + - `addr show` (all addresses) + - `neigh show` (all neighbors) + - **ZAPI watcher** (routing table — zebra is the authoritative source for all route types): + - Streaming connection to `/var/run/frr/zserv.api` via ZAPI v6 + - `ZEBRA_REDISTRIBUTE_ADD` per route type triggers full RIB dump from zebra + - Receives `REDISTRIBUTE_ROUTE_ADD` / `REDISTRIBUTE_ROUTE_DEL` messages incrementally + - **fswatcher** (procfs forwarding flags, hostname, timezone): + - After glob expansion and inotify watch setup, calls `InitialRead()` to read the current value of every watched file (see Section 4.1ter) + - `/proc/sys/net/ipv4/conf/*/forwarding` for all existing interfaces + - Completes sub-millisecond (procfs reads are kernel-generated) +- This populates the tree before any events arrive + +#### 4.1.5 Subprocess and Socket Lifecycle +- The `ip batch` and `bridge batch` subprocesses are started in `yangerd`'s `main()` and run for the daemon's lifetime +- Netlink subscription channels are created and subscriptions established before the initial state dump (subscribe-first-then-list pattern, following Antrea's approach) +- If a netlink subscription channel closes (indicating kernel-side error), `yangerd` re-establishes all subscriptions (following OVN-Kubernetes' re-subscribe-on-close pattern) +- If either batch subprocess exits unexpectedly, `yangerd` restarts it with exponential backoff (100ms to 30s) +- On daemon shutdown (SIGTERM/SIGINT), `ctx.Done()` closes all netlink subscriptions, and stdin pipes are closed for batch subprocesses +- The shared `ErrorCallback` logs warnings and triggers context cancellation, following the Cilium/Docker production pattern + +#### 4.1.6 Concurrency Model +- One goroutine runs the NLMonitor select loop, reading from three netlink subscription channels (`linkCh`, `addrCh`, `neighCh`) plus bridge MDB raw netlink channel +- One goroutine reads `ip batch` stdout (response reader) +- One goroutine writes to `ip batch` stdin (query writer, serialized via channel) +- The query writer and response reader coordinate via a request/response queue (channel of pending queries) + +### 4.1bis ip batch Subprocess Manager + +`yangerd` uses a dedicated manager to interact with the persistent `ip batch` process. This manager handles the stdin/stdout pipes and ensures that queries are serialized and paired with their responses. + +#### IPBatch Manager Implementation + +The following Go code demonstrates the core logic for the `IPBatch` manager: + +```go +type IPBatch struct { + cmd *exec.Cmd + stdin io.WriteCloser + stdout *bufio.Scanner + stderr io.ReadCloser + mu sync.Mutex // serializes queries + log *slog.Logger +} + +func NewIPBatch(ctx context.Context, log *slog.Logger) (*IPBatch, error) { + cmd := exec.CommandContext(ctx, "ip", "-json", "-force", "-batch", "-") + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, fmt.Errorf("stdin pipe: %w", err) + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("start ip batch: %w", err) + } + b := &IPBatch{ + cmd: cmd, + stdin: stdin, + stdout: bufio.NewScanner(stdout), + stderr: stderr, + log: log, + } + go b.drainStderr() + return b, nil +} + +// Query sends a command to the ip batch process and returns the JSON response. +// Commands are newline-terminated (e.g., "link show dev eth0\n"). +// Each command produces exactly one line of JSON array output on stdout. +func (b *IPBatch) Query(command string) (json.RawMessage, error) { + b.mu.Lock() + defer b.mu.Unlock() + + if _, err := fmt.Fprintf(b.stdin, "%s\n", command); err != nil { + return nil, fmt.Errorf("write command: %w", err) + } + if !b.stdout.Scan() { + if err := b.stdout.Err(); err != nil { + return nil, fmt.Errorf("read response: %w", err) + } + return nil, fmt.Errorf("ip batch process exited") + } + return json.RawMessage(b.stdout.Bytes()), nil +} + +func (b *IPBatch) drainStderr() { + scanner := bufio.NewScanner(b.stderr) + for scanner.Scan() { + b.log.Warn("ip batch stderr", "line", scanner.Text()) + } +} +``` +#### IPBatch Error Handling and Restart + +The `IPBatch` manager detects subprocess death via pipe EOF: when the `ip batch` process exits, `b.stdout.Scan()` returns `false` and `fmt.Fprintf(b.stdin, ...)` returns a broken-pipe error. Either condition causes `Query()` to return an error immediately to the caller. There is no per-query timeout — pipe EOF detection is instantaneous. + +**Restart coordination:** + +1. On subprocess death, the `IPBatch` manager transitions to a `dead` state. All subsequent `Query()` calls return `ErrBatchDead` immediately without acquiring the mutex. +2. A dedicated restart goroutine respawns the subprocess with exponential backoff (100ms initial, 30s max, factor 2x). +3. After a successful restart, a canary query (`link show lo`) validates the new process. Only on canary success does the manager transition back to `alive`, allowing `Query()` calls to proceed. +4. During the restart window, callers (monitor goroutines) receive `ErrBatchDead` and simply skip the current event. The next netlink event will retry the query against the restored subprocess. No event data is lost — the event-as-trigger pattern means the next event triggers a full re-read that captures all accumulated state changes. Note: `ErrBatchDead` is a transient sentinel error that must be handled by the monitor's `select` loop, not propagated as a fatal daemon error. + +**Terminology mapping**: The `dead`/`alive` states used internally by `IPBatch` and `BridgeBatch` map to the health API states as follows: `alive` → `"running"`, `dead` (during restart with backoff) → `"restarting"`, `dead` (after max retries exhausted) → `"failed"`. See Section 4.3.5 for the health response schema. + + +The `BridgeBatch` manager follows the identical error handling and restart protocol, using `vlan show` as its canary query. + +Specifically, on `bridge -json -batch -` subprocess death, `BridgeBatch.Query()` returns `ErrBatchDead` immediately to all callers. A restart goroutine respawns the subprocess with the same exponential backoff parameters (100ms initial, 30s max, factor 2x). After a successful restart, a `vlan show` canary query validates the new process before transitioning back to the alive state. During the restart window, bridge event handlers that receive `ErrBatchDead` skip the current re-query; the next netlink event triggers a full re-read against the restored subprocess. + + +#### NLMonitor Event Loop + +The NLMonitor subscribes to netlink events via `vishvananda/netlink` channels and triggers full-state queries via the `IPBatch` manager: + +```go +// NLMonitor tracks per-interface oper-status for last-change timestamps. +// The lastOperStatus map records the most recent operstate string per interface; +// when a LinkUpdate arrives with a different oper-status, time.Now() is +// recorded as the interface's last-change timestamp. +// +// On ANY LinkUpdate, the monitor performs a full interface re-read: +// three ip batch queries (link show, -s link show, addr show) to capture the +// complete interface state atomically, plus an ethtool re-query via +// ethmonitor.RefreshInterface() since ETHNL_MCGRP_MONITOR does NOT fire on +// link up/down events. +type NLMonitor struct { + batch *IPBatch + brBatch *BridgeBatch + tree *tree.Tree + ethMon *ethmonitor.EthMonitor // for cross-subsystem ethtool re-query + log *slog.Logger + lastOperStatus map[string]string // iface -> "UP"/"DOWN"/"DORMANT"/... +} + +func (m *NLMonitor) Run(ctx context.Context) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // Shared error callback (Cilium/Docker production pattern). + // Any netlink socket error triggers context cancellation, which + // closes all subscription channels and allows the supervisor to + // re-establish subscriptions. + errorCallback := func(err error) { + m.log.Warn("netlink subscription error, restarting", "err", err) + cancel() + } + + // Subscribe to all three netlink event types. + // Subscribe BEFORE initial dump (Antrea subscribe-first-then-list pattern) + // to ensure no events are missed between dump and subscription. + linkCh := make(chan netlink.LinkUpdate) + if err := netlink.LinkSubscribeWithOptions(linkCh, ctx.Done(), netlink.LinkSubscribeOptions{ + ErrorCallback: errorCallback, + }); err != nil { + return fmt.Errorf("link subscribe: %w", err) + } + + addrCh := make(chan netlink.AddrUpdate) + if err := netlink.AddrSubscribeWithOptions(addrCh, ctx.Done(), netlink.AddrSubscribeOptions{ + ErrorCallback: errorCallback, + }); err != nil { + return fmt.Errorf("addr subscribe: %w", err) + } + + neighCh := make(chan netlink.NeighUpdate) + if err := netlink.NeighSubscribeWithOptions(neighCh, ctx.Done(), netlink.NeighSubscribeOptions{ + ErrorCallback: errorCallback, + }); err != nil { + return fmt.Errorf("neigh subscribe: %w", err) + } + + // Initial state dump AFTER subscribe (subscribe-first-then-list pattern). + // Any events that arrive during the dump are queued in the channels + // and will be processed once we enter the select loop. + m.initialDump(ctx) + + // Main event loop: select across all subscription channels. + for { + select { + case u, ok := <-linkCh: + if !ok { + // Channel closed — netlink socket error (OVN-K re-subscribe pattern). + return fmt.Errorf("link subscription channel closed") + } + iface := u.Link.Attrs().Name + if iface == "" { + continue + } + + // === FULL INTERFACE RE-READ === + // On ANY LinkUpdate, re-read the ENTIRE interface to ensure + // all data in the tree is coherent at a single point in time. + // Three queries: link state, link stats, addresses. + ifPath := fmt.Sprintf("/ietf-interfaces:interfaces/interface[name='%s']", iface) + + linkData, err := m.batch.Query(fmt.Sprintf("link show dev %s", iface)) + if err != nil { + m.log.Warn("batch link query failed", "iface", iface, "err", err) + continue + } + m.tree.Set(ifPath, linkData) + + statsData, err := m.batch.Query(fmt.Sprintf("-s link show dev %s", iface)) + if err != nil { + m.log.Warn("batch stats query failed", "iface", iface, "err", err) + // Non-fatal: link data already written, stats are supplementary + } else { + m.tree.Set(ifPath+"/statistics", statsData) + } + + addrData, err := m.batch.Query(fmt.Sprintf("addr show dev %s", iface)) + if err != nil { + m.log.Warn("batch addr query failed", "iface", iface, "err", err) + } else { + m.tree.Set(ifPath+"/addresses", addrData) + } + + // === CROSS-SUBSYSTEM ETHTOOL RE-QUERY === + // ETHNL_MCGRP_MONITOR does NOT fire on link up/down -- only on + // explicit settings renegotiation. When a link goes up, the kernel + // negotiates speed/duplex/autoneg but the ethtool genetlink monitor + // is silent. We must explicitly re-query ethtool here. + if m.ethMon != nil { + m.ethMon.RefreshInterface(iface) + } + + // Track oper-status transitions for last-change (RFC 7223 sec 2.2). + // Since we receive every LinkUpdate, recording time.Now() at + // the moment of oper-status change gives last-change for free. + newStatus := extractOperStatus(linkData) + if oldStatus, ok := m.lastOperStatus[iface]; !ok || oldStatus != newStatus { + m.lastOperStatus[iface] = newStatus + ts := time.Now().Format(time.RFC3339) + m.tree.Set( + ifPath+"/last-change", + json.RawMessage(fmt.Sprintf("%q", ts)), + ) + m.log.Info("oper-status changed", "iface", iface, + "old", oldStatus, "new", newStatus, "last-change", ts) + } + + case u, ok := <-addrCh: + if !ok { + return fmt.Errorf("addr subscription channel closed") + } + // === FULL ADDRESS RE-READ === + // On ANY AddrUpdate (new or removed), re-read all addresses + // for this interface. The event is just a trigger -- we don't parse its + // content. After a delete, the re-read result simply omits the removed address. + link, err := netlink.LinkByIndex(u.LinkIndex) + if err != nil { + m.log.Warn("resolve link index", "index", u.LinkIndex, "err", err) + continue + } + iface := link.Attrs().Name + ifPath := fmt.Sprintf("/ietf-interfaces:interfaces/interface[name='%s']", iface) + addrData, err := m.batch.Query(fmt.Sprintf("addr show dev %s", iface)) + if err != nil { + m.log.Warn("batch addr query failed", "iface", iface, "err", err) + continue + } + m.tree.Set(ifPath+"/addresses", addrData) + + case u, ok := <-neighCh: + if !ok { + return fmt.Errorf("neigh subscription channel closed") + } + // === FULL NEIGHBOR RE-READ === + // On ANY NeighUpdate (new or deleted), re-read all neighbors + // for this interface. + iface := "" + if link, err := netlink.LinkByIndex(u.LinkIndex); err == nil { + iface = link.Attrs().Name + } + if iface == "" { + continue + } + ifPath := fmt.Sprintf("/ietf-interfaces:interfaces/interface[name='%s']", iface) + neighData, err := m.batch.Query(fmt.Sprintf("neigh show dev %s", iface)) + if err != nil { + m.log.Warn("batch neigh query failed", "iface", iface, "err", err) + continue + } + m.tree.Set(ifPath+"/neighbors", neighData) + + case <-ctx.Done(): + return ctx.Err() + } + } +} + +// extractOperStatus pulls the operstate string from ip -json link output. +// Returns "UP", "DOWN", "DORMANT", "LOWERLAYERDOWN", etc. +func extractOperStatus(data json.RawMessage) string { + var links []struct { + OperState string `json:"operstate"` + } + if err := json.Unmarshal(data, &links); err != nil || len(links) == 0 { + return "" + } + return links[0].OperState +} +``` + + +### 4.1ter File Watcher Subsystem + +The File Watcher Subsystem provides reactive monitoring of filesystem-based data sources, replacing traditional polling for files that support inotify. By leveraging the Linux `inotify` mechanism, `yangerd` can detect and process updates to IP forwarding flags, the system hostname, and the timezone symlink immediately upon their modification, significantly reducing latency and CPU wake-ups for data that changes infrequently. For regular files (forwarding flags, hostname), the standard `Watch()` method adds an inotify watch directly on the file. For symlinks (`/etc/localtime`), the `WatchSymlink()` method watches the parent directory instead, because Go's `fsnotify` follows symlinks — a direct watch on `/etc/localtime` would monitor the target file (e.g., `/usr/share/zoneinfo/Europe/Amsterdam`), not the symlink entry itself, so symlink replacements via `ln -sf` would go undetected. Note: sysfs pseudo-files (hwmon sensors, thermal zones) do not support inotify and are handled by the polling-based hardware collector instead -- see the note after the Watched Paths table below. STP bridge port state is not watched via inotify either; it is handled reactively via netlink events (see Section 4.1quater). DHCP lease updates and firewall configuration changes are handled reactively via D-Bus signals (see Section 4.1novies). + +#### FSWatcher Implementation + +The following Go code defines the `FSWatcher` type and its core event loop in `internal/fswatcher/fswatcher.go`: + +```go +type FSWatcher struct { + watcher *fsnotify.Watcher + tree *tree.Tree + handlers map[string]WatchHandler // path -> handler + debounce map[string]*time.Timer // path -> debounce timer + mu sync.Mutex + log *slog.Logger +} + +type WatchHandler struct { + TreeKey string // YANG tree key to update + ReadFunc func(path string) (json.RawMessage, error) // read and transform + Debounce time.Duration // coalescing window +} + +func New(tree *tree.Tree, log *slog.Logger) (*FSWatcher, error) { + w, err := fsnotify.NewWatcher() + if err != nil { + return nil, fmt.Errorf("fsnotify: %w", err) + } + return &FSWatcher{ + watcher: w, + tree: tree, + handlers: make(map[string]WatchHandler), + debounce: make(map[string]*time.Timer), + log: log, + }, nil +} + +func (fw *FSWatcher) Watch(path string, handler WatchHandler) error { + fw.mu.Lock() + fw.handlers[path] = handler + fw.mu.Unlock() + return fw.watcher.Add(path) +} + +func (fw *FSWatcher) WatchSymlink(path string, handler WatchHandler) error { + dir := filepath.Dir(path) + fw.mu.Lock() + fw.handlers[path] = handler + fw.mu.Unlock() + return fw.watcher.Add(dir) +} + +func (fw *FSWatcher) Run(ctx context.Context) error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + case event, ok := <-fw.watcher.Events: + if !ok { + return fmt.Errorf("watcher closed") + } + if event.Has(fsnotify.Write) || event.Has(fsnotify.Create) { + fw.handleEvent(event.Name) + } + if event.Has(fsnotify.Remove) { + // inotify sends IN_IGNORED after IN_DELETE; re-add watch + fw.rewatch(event.Name) + } + case err, ok := <-fw.watcher.Errors: + if !ok { + return fmt.Errorf("watcher error channel closed") + } + fw.log.Warn("fsnotify error", "err", err) + } + } +} +``` + +#### Watched Paths + +| Watched Path Pattern | Handler | Tree Key | Debounce | Notes | +|-----|------|------|------|------| +| `/proc/sys/net/ipv4/conf/*/forwarding` | readForwardingState | `ietf-routing:routing` | 200ms | May not support inotify on some procfs paths; falls back to polling | +| `/etc/hostname` | readHostname | `ietf-system:system` | 200ms | Real file, reliable inotify; `UseMerge: true` to coexist with SystemCollector | +| `/etc/localtime` | readTimezone | `ietf-system:system` | 200ms | Symlink to zoneinfo; uses `WatchSymlink()` (watches parent dir `/etc/`) because `fsnotify` follows symlinks — direct `Watch()` would track the target file, missing symlink replacements. `UseMerge: true`. Handles named zones and Etc/GMT±N with POSIX sign inversion. | + +**Note**: sysfs pseudo-files under `/sys/class/hwmon/` and `/sys/class/thermal/` do **not** emit inotify events. The kernel does not call `fsnotify_modify()` when hardware sensor values change — these files generate their values on `read()`, not on write. Additionally, sensor values (temperature, fan speed, voltage) fluctuate continuously, which would produce event storms even if inotify worked. Hardware sensor data is therefore collected by the polling-based hardware collector (`collector/hardware.go`) at a 10-second interval, not by the fswatcher. See Section 5, collector #6. + +#### WatchSymlink — Symlink-Aware File Watching + +Go's `fsnotify` library follows symlinks transparently: calling `watcher.Add("/etc/localtime")` places an inotify watch on the **target** file (e.g., `/usr/share/zoneinfo/Europe/Amsterdam`), not on the symlink entry in `/etc/`. When the system timezone changes, the symlink is replaced atomically via `ln -sf /usr/share/zoneinfo/NewZone /etc/localtime` (which translates to `unlink` + `symlink` at the syscall level). Since the inotify watch is on the old target, no event fires — the watch silently becomes stale. + +`WatchSymlink()` solves this by watching the **parent directory** (`/etc/`) instead of the symlink itself. Directory-level inotify receives `Create` and `Rename` events whenever any entry in the directory changes, including symlink replacements. The handler is registered in `fw.handlers` under the full file path (`/etc/localtime`), so when a directory event fires with `event.Name == "/etc/localtime"`, the existing `handleEvent()` lookup finds the correct handler. + +**Side effect**: Watching `/etc/` means the fswatcher also receives events for unrelated files in that directory (e.g., writes to `/etc/resolv.conf`). These events are harmlessly discarded by the `handleEvent()` handler lookup — no handler is registered for those paths, so `fw.handlers[event.Name]` returns the zero value and no action is taken. The `/etc/hostname` watch, being a direct file watch via `Watch()`, also receives a duplicate event from the directory watch when hostname changes; debouncing coalesces these into a single tree update. + +#### Debouncing Strategy + +To prevent excessive tree updates during rapid filesystem writes (e.g., multiple interface forwarding state changes during reconfiguration), the file watcher implements a per-path debouncing mechanism. When a file modification event is received, `FSWatcher` starts or resets a timer for that specific path. Only after the timer expires (the "coalescing window") is the file read and the tree updated. This ensures that only the final state is committed to the in-memory tree during bulk write operations. + +#### inotify Limitations and Fallback + +While inotify is highly efficient, it has certain kernel-level limitations that `yangerd` must handle. The `/proc/sys/fs/inotify/max_user_watches` limit (default 65536) can be exhausted on systems with many interfaces. Additionally, some pseudo-filesystems do not support inotify events at all: `sysfs` files under `/sys/class/hwmon/` and `/sys/class/thermal/` are generated dynamically by the kernel on `read()` — the kernel never calls `fsnotify_modify()` when hardware sensor values change, so inotify watches on these paths would never fire. For this reason, hardware sensor data is collected by the polling-based `collector/hardware.go` (see Section 5, item 6), not by the fswatcher. If a watch on a supported path cannot be established, `yangerd` logs a warning and the affected path falls back to the polling collector at its configured interval. + +#### Glob Expansion at Startup + +Some watched paths contain wildcards that must be resolved at startup. For example, procfs forwarding flags (`/proc/sys/net/ipv4/conf/*/forwarding`) use shell-style globs. These patterns are expanded using `filepath.Glob` during daemon initialization. For each matching path discovered, an individual inotify watch is added to the `FSWatcher` instance. If new interfaces appear at runtime, `yangerd` must be notified to re-scan and add new watches. + +#### Initial Read at Startup + +After all watches are established (including glob-expanded paths), the fswatcher performs a synchronous initial read of every watched file before entering the `Run()` event loop. For each path registered in `fw.handlers`, it calls the handler's `ReadFunc` to read the current value and populates the tree immediately. This ensures that forwarding flags, hostname, and timezone are present in the tree from daemon start, rather than remaining empty until the first inotify event fires — which may never happen if the forwarding state, hostname, or timezone does not change after boot. + +```go +// InitialRead reads the current value of every watched file and populates +// the tree. Called once after all Watch() calls and glob expansion, before +// Run(). Errors on individual files are logged but do not prevent startup. +func (fw *FSWatcher) InitialRead() { + fw.mu.Lock() + defer fw.mu.Unlock() + for path, handler := range fw.handlers { + data, err := handler.ReadFunc(path) + if err != nil { + fw.log.Warn("initial read failed", "path", path, "err", err) + continue + } + fw.tree.Set(handler.TreeKey, data) + fw.log.Debug("initial read", "path", path, "key", handler.TreeKey) + } +} +``` + +The initial read is fast (sub-millisecond per file — procfs reads are kernel-generated values) and completes synchronously before the daemon signals readiness. + +#### Concurrency Model + +The `FSWatcher` runs as a single goroutine executing the `Run()` event loop. All incoming inotify events are processed sequentially within this loop. When a debounce timer expires, it calls `handleEvent` in its own goroutine via `time.AfterFunc`, which then posts the event back to the main event loop or acquires the necessary locks to perform the read and update the tree. This ensures thread-safe access to the internal `handlers` and `debounce` maps. + + +### 4.1quater Bridge Monitor Subsystem + +The Bridge Monitor Subsystem provides fully reactive updates for the Forwarding Database (FDB), VLAN membership, Multicast Database (MDB), and Spanning Tree Protocol (STP) states. All bridge data follows the same event-as-trigger pattern used for link/addr/neigh: netlink events identify WHAT changed, then full state re-reads via the persistent `bridge -json -batch -` subprocess provide the authoritative data. No bridge netlink attributes are parsed directly — `iproute2`'s `bridge` tool handles all attribute parsing, ensuring complete coverage of kernel-exposed bridge data including attributes not yet wrapped by Go netlink libraries. + +#### BridgeBatch Subprocess Manager + +The `BridgeBatch` manager interacts with a persistent `bridge -json -batch -` subprocess, ensuring serialized queries and response pairing. Its structure is identical to the `IPBatch` manager but utilizes the `bridge` binary for all operations. + +```go +type BridgeBatch struct { + cmd *exec.Cmd + stdin io.WriteCloser + stdout *bufio.Scanner + stderr io.ReadCloser + mu sync.Mutex + log *slog.Logger +} + +func NewBridgeBatch(ctx context.Context, log *slog.Logger) (*BridgeBatch, error) { + cmd := exec.CommandContext(ctx, "bridge", "-json", "-batch", "-") + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, fmt.Errorf("bridge batch stdin pipe: %w", err) + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("bridge batch stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("bridge batch stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("start bridge batch: %w", err) + } + b := &BridgeBatch{ + cmd: cmd, + stdin: stdin, + stdout: bufio.NewScanner(stdout), + stderr: stderr, + log: log, + } + go b.drainStderr() + return b, nil +} +``` + +#### Bridge Netlink Event Handling + +The bridge event monitor receives events from multiple netlink channels and uses each event solely as a trigger for a full state re-read via `bridge -json -batch -`. Bridge FDB entries arrive via `NeighSubscribeWithOptions` (FDB entries are neighbor-like with `NDA_MASTER` flag), bridge VLAN and STP port state changes arrive via `LinkSubscribeWithOptions` (as `RTM_NEWLINK` events on bridge port interfaces), and bridge MDB events arrive via a raw netlink socket subscribed to `RTNLGRP_MDB` (multicast group 26). The event content is not parsed for data — only the affected bridge name is extracted to scope the re-query: + +```go +// Bridge event handling is integrated into the NLMonitor's select loop. +// Bridge events are used as triggers only -- the event content is not parsed +// for data. Full state is always re-read via bridge -json -batch -. +// +// In the LinkUpdate handler (linkCh): +// - If the link is a bridge port (has MasterIndex), trigger bridge vlan +// and STP state re-read via bridge batch +// - Regular link processing continues as normal +// +// In the NeighUpdate handler (neighCh): +// - If neigh has NDA_MASTER flag (bridge FDB entry), trigger FDB re-read +// via bridge batch +// - Otherwise, regular neighbor re-read via ip batch +// +// Bridge MDB events require a dedicated raw netlink socket: +func (m *NLMonitor) subscribeBridgeMDB(ctx context.Context) (<-chan struct{}, error) { + // Raw netlink socket for RTNLGRP_MDB (group 26) + sock, err := nl.Subscribe(syscall.NETLINK_ROUTE, 26) // RTNLGRP_MDB + if err != nil { + return nil, fmt.Errorf("subscribe RTNLGRP_MDB: %w", err) + } + + mdbCh := make(chan struct{}, 1) + go func() { + defer sock.Close() + for { + msgs, _, err := sock.Receive() + if err != nil { + if ctx.Err() != nil { + return + } + m.log.Warn("MDB netlink receive error", "err", err) + continue + } + if len(msgs) > 0 { + select { + case mdbCh <- struct{}{}: + default: // coalesce if unread + } + } + } + }() + return mdbCh, nil +} + +// handleBridgeFDB is called when a NeighUpdate has NDA_MASTER flag. +func (m *NLMonitor) handleBridgeFDB(u netlink.NeighUpdate) { + bridge := "" + if link, err := netlink.LinkByIndex(u.MasterIndex); err == nil { + bridge = link.Attrs().Name + } + if bridge == "" { + return + } + data, err := m.brBatch.Query(fmt.Sprintf("fdb show br %s", bridge)) + if err != nil { + m.log.Warn("bridge fdb query failed", "bridge", bridge, "err", err) + return + } + m.tree.Set( + fmt.Sprintf("/ietf-interfaces:interfaces/interface[name='%s']/bridge:bridge/fdb", bridge), + data, + ) +} + +// handleBridgeMDB is called when an MDB event arrives. +func (m *NLMonitor) handleBridgeMDB() { + data, err := m.brBatch.Query("mdb show") + if err != nil { + m.log.Warn("bridge mdb query failed", "err", err) + return + } + m.tree.Set("/ieee802-dot1q-bridge:bridges/bridge/mdb", data) +} +``` + +#### Initial State Dump + +On startup, the bridge subsystem populates the tree by issuing the following commands to the `BridgeBatch` process: +- `vlan show` +- `fdb show` +- `mdb show` + +#### Subprocess Lifecycle + +The bridge batch subprocess is managed with the same exponential backoff restart pattern as the ip batch subsystem. A canary query (`vlan show`) is performed upon restart to verify the health of the bridge batch process. Bridge netlink subscriptions are established alongside the main NLMonitor subscriptions and share the same error callback and context cancellation pattern. + +#### Concurrency Model + +Bridge FDB and VLAN events are handled within the NLMonitor's main select loop (they arrive on the `neighCh` and `linkCh` channels respectively). Bridge MDB events arrive on a dedicated raw netlink channel and are also included in the NLMonitor select loop. The `BridgeBatch` manager serializes bridge state queries via a mutex, mirroring the `IPBatch` design. + +### 4.1quinquies IW Event Monitor Subsystem + +#### Overview + +The IW Event Monitor subsystem provides reactive 802.11 wireless monitoring by running a persistent `iw event -t` subprocess. Unlike the NLMonitor (which receives typed Go structs from `vishvananda/netlink` channels) and the bridge netlink subscriptions, the `iw event` command produces human-readable text lines that require custom parsing. Additionally, `iw` has no batch query mode -- re-queries spawn individual short-lived `exec.Command("iw", ...)` subprocesses. This is acceptable because WiFi events occur at a much lower rate than netlink link/addr/neigh events (typically single-digit events per minute during normal operation, compared to hundreds of netlink events per second during convergence). + +The subsystem is governed by the `YANGERD_ENABLE_WIFI` feature flag: when WiFi support is included in the Infix build, the Buildroot recipe sets `YANGERD_ENABLE_WIFI=true` in `/etc/default/yangerd`, and the `iw` binary is guaranteed present on the target. When WiFi is not included in the build, the flag is set to `false` and the IW Event Monitor is not started at all. + +#### IW Event Output Format + +The `iw event -t` command produces timestamped, human-readable text lines on stdout. Each line follows one of several formats: + +``` +1708984743.123456: wlan0 (phy #0): new station aa:bb:cc:dd:ee:ff +1708984743.456789: wlan0 (phy #0): del station aa:bb:cc:dd:ee:ff +1708984800.111222: wlan0 (phy #0): connected to aa:bb:cc:dd:ee:ff +1708984800.333444: wlan0 (phy #0): disconnected +1708984900.555666: wlan0 (phy #0): scan started +1708984901.777888: wlan0 (phy #0): scan aborted +1708984950.999000: wlan0 (phy #0): ch_switch_started_notify freq 5180 width 80 MHz +1708985000.111222: phy #0: reg_change +``` + +Key differences from NLMonitor (netlink subscription) output: +- **Text, not JSON**: Each line must be parsed with string splitting and regular expressions rather than `json.Unmarshal()` +- **No batch mode**: There is no `iw -batch -` equivalent; re-queries use individual short-lived `exec.Command` invocations +- **Timestamp format**: Floating-point Unix epoch seconds (e.g., `1708984743.123456`), not ISO 8601 +- **Variable structure**: Different event types have different numbers of fields after the interface identifier + +#### Key Event Types + +| Event | Meaning | Action | +|-------|---------|--------| +| `new station` | A wireless client associated (AP mode) | Re-query `iw dev station dump` | +| `del station` | A wireless client disassociated (AP mode) | Re-query station list; remove from tree | +| `connected` | This station connected to an AP (station mode) | Re-query `iw dev info` and `iw dev link` | +| `disconnected` | This station disconnected from AP (station mode) | Clear link info from tree | +| `auth` | Authentication event | Logged; no tree update (transient) | +| `scan started` | Background scan initiated | Logged for observability | +| `scan aborted` | Scan was aborted | Logged for observability | +| `ch_switch_started_notify` | Channel switch in progress | Re-query `iw dev info` for new frequency | +| `reg_change` | Regulatory domain changed | Re-query all wireless interfaces | + +#### IWMonitor Go Struct + +```go +// IWMonitor manages the persistent `iw event -t` subprocess. +// Started only when YANGERD_ENABLE_WIFI=true (WiFi included in build). +type IWMonitor struct { + cmd *exec.Cmd // persistent iw event -t subprocess + stdout *bufio.Scanner // line scanner for subprocess stdout + tree *tree.Tree // shared in-memory data tree + log *slog.Logger // structured logger + ctx context.Context // lifecycle context + cancel context.CancelFunc +} + +// IWEvent represents a single parsed line from `iw event -t`. +type IWEvent struct { + Timestamp float64 // Unix epoch seconds (e.g., 1708984743.123456) + Interface string // Wireless interface name (e.g., "wlan0") + Phy string // Physical device identifier (e.g., "phy #0") + Type string // Event type (e.g., "new station", "disconnected") + Addr string // MAC address (if applicable, empty otherwise) +} +``` + +#### Event Parser + +```go +// parseIWEvent parses a single line from `iw event -t` output. +// Returns an IWEvent and true on success, or zero-value and false +// if the line does not match any known event format. +func parseIWEvent(line string) (IWEvent, bool) { + // Format: ": (): []" + // Example: "1708984743.123456: wlan0 (phy #0): new station aa:bb:cc:dd:ee:ff" + parts := strings.SplitN(line, ": ", 3) + if len(parts) < 3 { + return IWEvent{}, false + } + + ts, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return IWEvent{}, false + } + + // Parse "wlan0 (phy #0)" portion + ifacePhy := parts[1] + parenIdx := strings.Index(ifacePhy, " (") + if parenIdx < 0 { + return IWEvent{}, false + } + iface := ifacePhy[:parenIdx] + phy := strings.Trim(ifacePhy[parenIdx+2:], ")") + + // Remaining is event type + optional address + eventStr := parts[2] + ev := IWEvent{Timestamp: ts, Interface: iface, Phy: phy} + + switch { + case strings.HasPrefix(eventStr, "new station "): + ev.Type = "new station" + ev.Addr = strings.TrimPrefix(eventStr, "new station ") + case strings.HasPrefix(eventStr, "del station "): + ev.Type = "del station" + ev.Addr = strings.TrimPrefix(eventStr, "del station ") + case strings.HasPrefix(eventStr, "connected to "): + ev.Type = "connected" + ev.Addr = strings.TrimPrefix(eventStr, "connected to ") + case eventStr == "disconnected": + ev.Type = "disconnected" + case strings.HasPrefix(eventStr, "ch_switch_started_notify"): + ev.Type = "ch_switch_started_notify" + case eventStr == "scan started": + ev.Type = "scan started" + case eventStr == "scan aborted": + ev.Type = "scan aborted" + case strings.HasPrefix(eventStr, "reg_change"): + ev.Type = "reg_change" + case strings.HasPrefix(eventStr, "auth"): + ev.Type = "auth" + default: + ev.Type = eventStr // preserve unknown events for logging + } + + return ev, true +} +``` + +#### Event Handler and Re-Query + +```go +// handleEvent processes a parsed IW event by re-querying the appropriate +// iw subcommands and updating the in-memory tree. +func (m *IWMonitor) handleEvent(ev IWEvent) { + switch ev.Type { + case "new station", "del station": + m.refreshStationList(ev.Interface) + case "connected", "ch_switch_started_notify": + m.refreshInterfaceInfo(ev.Interface) + case "disconnected": + m.clearLinkInfo(ev.Interface) + case "reg_change": + m.refreshAllInterfaces() + default: + m.log.Debug("unhandled iw event", "type", ev.Type, "iface", ev.Interface) + } +} + +// refreshStationList runs `iw dev station dump` and updates +// the tree with the current list of associated stations. +func (m *IWMonitor) refreshStationList(iface string) { + ctx, cancel := context.WithTimeout(m.ctx, 5*time.Second) + defer cancel() + out, err := exec.CommandContext(ctx, "iw", "dev", iface, "station", "dump").Output() + if err != nil { + m.log.Warn("iw station dump failed", "iface", iface, "err", err) + return + } + stations := parseStationDump(string(out)) + m.tree.Set("wifi/"+iface+"/stations", stations) +} + +// refreshInterfaceInfo runs `iw dev info` to update SSID, +// frequency, channel width, and TX power in the tree. +func (m *IWMonitor) refreshInterfaceInfo(iface string) { + ctx, cancel := context.WithTimeout(m.ctx, 5*time.Second) + defer cancel() + out, err := exec.CommandContext(ctx, "iw", "dev", iface, "info").Output() + if err != nil { + m.log.Warn("iw dev info failed", "iface", iface, "err", err) + return + } + info := parseIWInfo(string(out)) + m.tree.Set("wifi/"+iface+"/info", info) +} +``` + +#### Differences from IP/Bridge Monitor Subsystems + +| Aspect | NLMonitor (netlink subscriptions) | iw event | +|--------|---------------------------|----------| +| Output format | Typed Go structs (`LinkUpdate`, `AddrUpdate`, etc.) on channels | Human-readable text lines | +| Batch query mode | `ip -json -batch -` / `bridge -json -batch -` (persistent stdin/stdout) | None -- each query spawns a short-lived `exec.Command` | +| Event rate | High (100s/sec during convergence) | Low (single-digit/min typical) | +| Parser implementation | Direct struct field access (`u.Link.Attrs().Name`) | `strings.SplitN()` + `strconv.ParseFloat()` + switch | +| Event source | Native Go netlink channels (`vishvananda/netlink`) | `iw event -t` subprocess (stdout) | +| Absence handling | Netlink always available (kernel 6.18) | Governed by `YANGERD_ENABLE_WIFI` feature flag; when enabled, `iw` is guaranteed present | + +#### Subprocess Lifecycle + +The `iw event -t` subprocess is started during yangerd initialization when `YANGERD_ENABLE_WIFI=true`. If the subprocess exits unexpectedly, it is restarted with the same exponential backoff pattern used by the NLMonitor and bridge batch subsystems (initial delay 100ms, max delay 30s, backoff factor 2x). Upon restart, a full re-query of all known wireless interfaces is performed to synchronize the in-memory tree with the current kernel state. + +Unlike the ip and bridge subsystems, there is no canary query mechanism because `iw event` has no query/response mode—it only emits events. Health monitoring is instead based on subprocess liveness: if the process exits or its stdout is closed, the monitor goroutine detects this via `scanner.Err()` and initiates the restart sequence. + +#### Concurrency Model + +The IW Event Monitor uses a single goroutine that reads lines from `iw event -t` stdout via a `bufio.Scanner`. For each parsed event, re-queries are executed synchronously within the same goroutine because WiFi event rates are low enough that sequential processing does not introduce meaningful latency. This avoids the complexity of a separate query goroutine and its associated synchronization. If future deployments reveal that re-query latency becomes problematic (e.g., on systems with dozens of wireless interfaces), the design can be extended to dispatch re-queries to a bounded worker pool without changing the event parsing goroutine. + +### 4.1sexies Ethtool Netlink Monitor Subsystem + +#### Overview + +The Linux kernel's ethtool subsystem exposes a genetlink family named `"ethtool"`. This family includes a multicast group named `"monitor"` (`ETHNL_MCGRP_MONITOR`) that delivers notification messages whenever ethtool-managed settings change on any network interface. Infix targets Linux kernel 6.18, where ethtool netlink is unconditionally available. By subscribing to this multicast group, yangerd receives immediate notification of speed, duplex, link mode, and auto-negotiation changes without polling. + +Unlike the ip, bridge, and iw subsystems, the ethtool netlink monitor is **not a subprocess**. It is a native Go goroutine that opens a genetlink socket using `mdlayher/genetlink`, joins the monitor multicast group, and calls `conn.Receive()` in a loop. This avoids the overhead of managing an external process, parsing its output, and supervising its lifecycle. + +#### Notification Types + +The kernel's `ethnl_default_notify_ops[]` array (defined in `net/ethtool/netlink.c`) registers the following notification message types: + +| Notification Message | Trigger | Relevant YANG Leaves | +|---------------------|---------|---------------------| +| `ETHTOOL_MSG_LINKINFO_NTF` | Link info change (PHY type, transceiver) | speed, duplex | +| `ETHTOOL_MSG_LINKMODES_NTF` | Link modes change (advertised/supported speeds, autoneg) | speed, duplex, auto-negotiation | +| `ETHTOOL_MSG_FEATURES_NTF` | Offload feature toggle | (not mapped to YANG leaves in Phase 1) | +| `ETHTOOL_MSG_WOL_NTF` | Wake-on-LAN setting change | (not mapped) | +| `ETHTOOL_MSG_RINGS_NTF` | Ring buffer size change | (not mapped) | +| `ETHTOOL_MSG_CHANNELS_NTF` | Channel count change | (not mapped) | +| `ETHTOOL_MSG_COALESCE_NTF` | Interrupt coalescing change | (not mapped) | +| `ETHTOOL_MSG_PAUSE_NTF` | Pause frame setting change | (not mapped) | +| `ETHTOOL_MSG_EEE_NTF` | Energy-Efficient Ethernet change | (not mapped) | +| `ETHTOOL_MSG_FEC_NTF` | Forward Error Correction change | (not mapped) | +| `ETHTOOL_MSG_MODULE_NTF` | Transceiver module event | (not mapped) | +| `ETHTOOL_MSG_PLCA_NTF` | Physical Layer Collision Avoidance change | (not mapped) | +| `ETHTOOL_MSG_MM_NTF` | MAC Merge (802.3br) change | (not mapped) | + +Of these, yangerd acts on `ETHTOOL_MSG_LINKINFO_NTF` and `ETHTOOL_MSG_LINKMODES_NTF` — these are the only notifications that affect YANG-modeled operational leaves (speed, duplex, auto-negotiation). All other notification types are logged at DEBUG level and discarded. + +**Important**: Statistics and counters (e.g., `ethtool -S` output) have **no corresponding `_NTF` message type**. They must remain polling-based via the ethtool collector at a 30-second interval. + +#### What Does NOT Have Notifications + +The following ethtool data categories are explicitly **not** covered by the genetlink monitor and remain polling-based: + +- **Per-interface statistics** (`ethtool -S `, `ETHTOOL_MSG_STATS_GET`): No `ETHTOOL_MSG_STATS_NTF` exists. Counters are monotonically increasing values that change on every packet; event-based notification would be impractical. +- **String sets** (`ethtool -i `, `ETHTOOL_MSG_STRSET_GET`): Driver name, firmware version — effectively static, queried once at startup. + +#### Hybrid Model + +The ethtool data acquisition becomes a **hybrid** of reactive and polling: + +| Data Category | Method | Interval/Trigger | Go Package | +|--------------|--------|-----------------|-----------| +| Speed, duplex, auto-negotiation | REACTIVE (genetlink monitor) | On `_NTF` notification | `internal/ethmonitor/` | +| Extended statistics (counters) | POLLING | 30 seconds | `internal/collector/ethtool.go` | +| Advertised/supported link modes | REACTIVE (genetlink monitor) | On `ETHTOOL_MSG_LINKMODES_NTF` | `internal/ethmonitor/` | + +Both the reactive ethmonitor and the polling ethtool collector write to the same tree paths under `ietf-interfaces:interfaces` (specifically the `infix-ethernet-interface:ethernet` augment subtrees). The per-model `sync.RWMutex` for the `ietf-interfaces:interfaces` key ensures that concurrent writes from the monitor goroutine and the collector goroutine are serialized. + +#### Core Types + +```go +// internal/ethmonitor/ethmonitor.go + +package ethmonitor + +import ( + "context" + "log/slog" + + "github.com/mdlayher/ethtool" + "github.com/mdlayher/genetlink" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// EthMonitor subscribes to the ethtool genetlink monitor multicast +// group and updates the in-memory tree on settings change notifications. +type EthMonitor struct { + conn *genetlink.Conn + family genetlink.Family + groupID uint32 // monitor multicast group ID + tree *tree.Tree + etClient *ethtool.Client // for re-queries on notification + log *slog.Logger +} + +// NTF command constants from include/uapi/linux/ethtool_netlink_generated.h +const ( + ETHTOOL_MSG_LINKINFO_NTF = 28 + ETHTOOL_MSG_LINKMODES_NTF = 29 +) +``` + +#### Subscription and Event Loop + +```go +// New creates an EthMonitor by dialing the genetlink socket, +// resolving the "ethtool" family, and finding the "monitor" multicast group. +func New(t *tree.Tree, log *slog.Logger) (*EthMonitor, error) { + conn, err := genetlink.Dial(nil) + if err != nil { + return nil, fmt.Errorf("genetlink dial: %w", err) + } + + family, err := conn.GetFamily("ethtool") + if err != nil { + conn.Close() + return nil, fmt.Errorf("get ethtool family: %w", err) + } + + var groupID uint32 + for _, g := range family.Groups { + if g.Name == "monitor" { + groupID = g.ID + break + } + } + if groupID == 0 { + conn.Close() + return nil, fmt.Errorf("ethtool monitor multicast group not found") + } + + if err := conn.JoinGroup(groupID); err != nil { + conn.Close() + return nil, fmt.Errorf("join monitor group: %w", err) + } + + etClient, err := ethtool.New() + if err != nil { + conn.Close() + return nil, fmt.Errorf("ethtool client: %w", err) + } + + return &EthMonitor{ + conn: conn, + family: family, + groupID: groupID, + tree: t, + etClient: etClient, + log: log, + }, nil +} + +// Run listens for ethtool notifications and updates the tree. +// It blocks until ctx is cancelled. +func (m *EthMonitor) Run(ctx context.Context) error { + defer m.conn.Close() + defer m.etClient.Close() + + // Set read deadline so we can check ctx.Done() periodically + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + msgs, _, err := m.conn.Receive() + if err != nil { + if ctx.Err() != nil { + return ctx.Err() + } + m.log.Warn("ethmonitor receive error", "err", err) + continue + } + + for _, msg := range msgs { + m.handleNotification(msg) + } + } +} +``` + +#### Notification Handler + +```go +func (m *EthMonitor) handleNotification(msg genetlink.Message) { + switch msg.Header.Command { + case ETHTOOL_MSG_LINKINFO_NTF, ETHTOOL_MSG_LINKMODES_NTF: + ifname := extractIfname(msg.Data) + if ifname == "" { + m.log.Debug("ethmonitor: NTF without ifname", "cmd", msg.Header.Command) + return + } + m.refreshEthernetSettings(ifname) + default: + m.log.Debug("ethmonitor: ignored NTF", "cmd", msg.Header.Command) + } +} + +// refreshEthernetSettings re-queries speed, duplex, and auto-negotiation +// for the given interface and updates the tree. +func (m *EthMonitor) refreshEthernetSettings(ifname string) { + iface, err := net.InterfaceByName(ifname) + if err != nil { + m.log.Warn("ethmonitor: interface lookup failed", "iface", ifname, "err", err) + return + } + + linkInfo, err := m.etClient.LinkInfo(ethtool.Interface{Index: iface.Index}) + if err != nil { + m.log.Warn("ethmonitor: LinkInfo failed", "iface", ifname, "err", err) + return + } + + linkMode, err := m.etClient.LinkMode(ethtool.Interface{Index: iface.Index}) + if err != nil { + m.log.Warn("ethmonitor: LinkMode failed", "iface", ifname, "err", err) + return + } + + data := map[string]interface{}{ + "speed": linkMode.SpeedMegabits, + "duplex": duplexString(linkInfo.Duplex), + "auto-negotiation": autonegString(linkMode.AutoNegotiation), + } + jsonData, _ := json.Marshal(data) + m.tree.Set("ietf-interfaces:interfaces/interface["+ifname+"]/ethernet", json.RawMessage(jsonData)) +} +``` + +#### Public RefreshInterface API (Cross-Subsystem) + +The `EthMonitor` exposes a public `RefreshInterface()` method that the link event handler (`monitor/link.go`) calls on every RTM_NEWLINK event. This is necessary because the ethtool genetlink monitor (`ETHNL_MCGRP_MONITOR`) does **NOT** emit notifications when a link goes up or down — only when settings are explicitly renegotiated (e.g., by `ethtool -s`). When the kernel brings a link up, it negotiates speed, duplex, and auto-negotiation with the link partner, but this negotiation result is invisible to the ethtool monitor. + +`RefreshInterface()` is a thin public wrapper around the private `refreshEthernetSettings()`: + +```go +// RefreshInterface is called by the link event handler (monitor/link.go) +// whenever an RTM_NEWLINK event arrives. Since ETHNL_MCGRP_MONITOR does NOT +// emit notifications on link up/down (only on explicit settings changes), +// this method ensures that speed/duplex/autoneg are re-queried after every +// link state change. +func (m *EthMonitor) RefreshInterface(ifname string) { + m.refreshEthernetSettings(ifname) +} +``` + +This cross-subsystem coordination ensures that ethtool data is always current after link events: + +| Trigger | Source | Ethtool Action | +|---------|--------|----------------| +| `ETHTOOL_MSG_LINKINFO_NTF` | ethtool genetlink monitor (settings change) | `refreshEthernetSettings()` (internal) | +| `ETHTOOL_MSG_LINKMODES_NTF` | ethtool genetlink monitor (mode change) | `refreshEthernetSettings()` (internal) | +| `RTM_NEWLINK` (any) | link event handler (`monitor/link.go`) | `RefreshInterface()` (public, cross-subsystem) | + +Without `RefreshInterface()`, after a link-up event the tree would show stale speed/duplex/autoneg values until the next 30-second polling cycle (if ethmonitor failed) or indefinitely (if ethmonitor was active but the kernel never sent an explicit ethtool NTF). +``` + +#### Differences from Other Reactive Subsystems + +| Aspect | NLMonitor (netlink subscriptions) | iw event | ethtool genetlink | +|--------|-------------------|----------|-------------------| +| Implementation | Native Go netlink channels (`vishvananda/netlink`) | External subprocess | Native Go genetlink socket | +| Output format | Typed Go structs (`LinkUpdate`, `AddrUpdate`, etc.) | Human-readable text | Binary genetlink messages | +| Process management | Goroutine with channel re-subscribe on close | Persistent subprocess with restart | Goroutine -- no process to manage | +| Batch query mode | Yes (`ip -batch -` / `bridge -batch -`) for re-reads | None (short-lived exec) | No -- re-queries via `ethtool.Client` | +| Failure mode | Channel close -> re-subscribe (OVN-K pattern) | Subprocess crash -> restart | Socket error -> reconnect | +| Event rate | High (100s/sec during convergence) | Low (single-digit/min) | Very low (link negotiation events only) | +| Absence handling | Netlink always available (kernel 6.18) | Governed by `YANGERD_ENABLE_WIFI` flag | Always active (ethtool netlink unconditionally available on kernel 6.18) | + +#### Lifecycle + +The `EthMonitor` is created during yangerd initialization by calling `ethmonitor.New()`. Since Infix targets Linux kernel 6.18, the `"ethtool"` genetlink family and its `"monitor"` multicast group are unconditionally available. If the subscription fails for any unexpected reason (e.g., permission denied, kernel module not loaded), the error is treated as fatal and logged at ERROR — this indicates a misconfigured system, not a kernel capability gap. + +On clean shutdown (context cancellation), the genetlink connection is closed via `defer m.conn.Close()`, which causes `conn.Receive()` to return an error and the goroutine to exit. + +#### Concurrency Model + +The `EthMonitor` uses a single goroutine that calls `conn.Receive()` in a loop. Each notification triggers a synchronous re-query via `ethtool.Client.LinkInfo()` and `ethtool.Client.LinkMode()`. This sequential model is appropriate because ethtool settings change notifications are extremely infrequent — they occur only during physical link negotiation events (cable plug/unplug, speed forced by operator, autoneg toggled). Even on a system with hundreds of interfaces, link negotiation storms are rare and short-lived. + +The tree write from the ethmonitor goroutine and the tree write from the ethtool polling collector are serialized by the per-model `sync.RWMutex` for their shared key (`ietf-interfaces:interfaces`). No additional synchronization is needed between these two components -- they write to the same tree paths but at different times (reactive on notification vs. periodic on 30-second tick). + +### 4.1octies ZAPI Watcher Subsystem (Zebra Route Redistribution) + +#### Overview + +The ZAPI (Zebra API) watcher replaces the previous `vtysh`-based route table collection with a persistent, streaming connection to FRRouting's zebra daemon. Instead of forking `vtysh -c 'show ip route json'` on every netlink route event, yangerd opens a Unix domain socket to zebra's zserv API, subscribes to route redistribution notifications, and receives incremental route add/delete messages as they occur. + +This design is motivated by a fundamental limitation of the Linux kernel FIB: **routes may exist in zebra's RIB that are not installed in the kernel**. These include: + +- Routes with unresolvable next-hops (`"installed": false` in FRR) +- Routes that lost the administrative distance election (`"selected": false`) +- ECMP paths exceeding the kernel's maximum next-hop count +- Routes filtered by FRR's `table-map` policy + +The `ip route` command (and netlink `RTM_NEWROUTE` events) only reflect routes that zebra has successfully installed in the kernel FIB. To expose the complete routing state through the YANG operational datastore, yangerd must query zebra directly. + +#### ZAPI Protocol + +FRRouting uses the Zebra Serv (zserv) protocol for inter-daemon communication. All FRR daemons (bgpd, ospfd, ripd, staticd) use this same protocol to exchange routes with zebra. The protocol version is ZSERV_VERSION 6, which has been stable across FRR 8.x, 9.x, and 10.x (including the target FRR 10.5.1). + +``` +ZAPI v6 Header (10 bytes): + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Length (2) | Marker 0xFE | Version (6) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | VRF ID (4) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Command (2) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +Socket path: /var/run/frr/zserv.api (Unix domain socket) +``` + +The connection flow for a route redistribution client is: + +``` +yangerd (ZAPI client) zebra + | | + |--- ZEBRA_HELLO (daemon type=0) --------->| Register as client + | | + |--- ZEBRA_ROUTER_ID_ADD ----------------->| Request router-id updates + | | + |--- ZEBRA_REDISTRIBUTE_ADD (kernel) ----->| Subscribe: type 1 + |--- ZEBRA_REDISTRIBUTE_ADD (connected) -->| Subscribe: type 2 + |--- ZEBRA_REDISTRIBUTE_ADD (static) ----->| Subscribe: type 3 + |--- ZEBRA_REDISTRIBUTE_ADD (rip) -------->| Subscribe: type 4 + |--- ZEBRA_REDISTRIBUTE_ADD (ospf) ------->| Subscribe: type 6 + | | + |<-- ZEBRA_REDISTRIBUTE_ROUTE_ADD ---------| Full dump of existing + |<-- ZEBRA_REDISTRIBUTE_ROUTE_ADD ---------| routes matching the + |<-- ZEBRA_REDISTRIBUTE_ROUTE_ADD ---------| subscribed types + |<-- ... | + | | + | (incremental updates from here on) | + |<-- ZEBRA_REDISTRIBUTE_ROUTE_ADD ---------| New route installed + |<-- ZEBRA_REDISTRIBUTE_ROUTE_DEL ---------| Route withdrawn + | | +``` + +After the initial dump, zebra sends incremental `REDISTRIBUTE_ROUTE_ADD` and `REDISTRIBUTE_ROUTE_DEL` messages whenever a route matching a subscribed type is added, modified, or withdrawn. Each message includes the full route body: prefix, prefix length, address family, route type, distance, metric, tag, next-hop list (with interface index and gateway address), and flags indicating whether the route is selected and installed in the kernel. + +#### Go Implementation + +The ZAPI watcher uses the `github.com/osrg/gobgp/v4/pkg/zebra` package, which implements ZAPI protocol versions 2 through 6. This library is production-tested in Cilium and kube-vip. It provides `NewClient()` for connection setup, `SendRedistribute()` for subscription, and a `Receive()` channel for incoming messages. + +```go +package zapiwatcher + +import ( + "context" + "log/slog" + "math" + "net" + "time" + + "github.com/osrg/gobgp/v4/pkg/zebra" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const ( + zapiSocketPath = "/var/run/frr/zserv.api" + zapiVersion = 6 + zapiSoftware = "frr10.5" + + // Reconnection parameters + reconnectInitial = 100 * time.Millisecond + reconnectMax = 30 * time.Second + reconnectFactor = 2.0 +) + +// Route types to subscribe for redistribution. +var subscribeTypes = []zebra.RouteType{ + zebra.RouteKernel, // type 1: kernel routes (from ip route add) + zebra.RouteConnect, // type 2: connected (interface subnets) + zebra.RouteStatic, // type 3: static routes (from staticd) + zebra.RouteRIP, // type 4: RIP-learned routes + zebra.RouteOSPF, // type 6: OSPF-learned routes +} + +// ZAPIWatcher maintains a persistent connection to zebra's zserv +// socket and updates the in-memory tree with route redistribution +// messages. It handles zebra restarts with automatic reconnection. +type ZAPIWatcher struct { + tree *tree.Tree + log *slog.Logger +} + +func New(t *tree.Tree, log *slog.Logger) *ZAPIWatcher { + return &ZAPIWatcher{tree: t, log: log} +} +``` + +#### Connection and Subscription + +```go +// connect establishes a ZAPI session and subscribes to route +// redistribution for all configured route types. +func (w *ZAPIWatcher) connect(ctx context.Context) (*zebra.Client, error) { + conn, err := net.Dial("unix", zapiSocketPath) + if err != nil { + return nil, fmt.Errorf("dial zserv: %w", err) + } + + cli, err := zebra.NewClient(conn, zebra.MaxSoftware(zapiSoftware), + zebra.Version(uint8(zapiVersion))) + if err != nil { + conn.Close() + return nil, fmt.Errorf("zapi handshake: %w", err) + } + + // Send HELLO to register as a redistribution client. + if err := cli.SendHello(); err != nil { + cli.Close() + return nil, fmt.Errorf("zapi hello: %w", err) + } + + // Request router-id updates (needed for some route attributes). + if err := cli.SendRouterIDAdd(); err != nil { + cli.Close() + return nil, fmt.Errorf("zapi router-id: %w", err) + } + + // Subscribe to redistribution for each route type. + for _, rt := range subscribeTypes { + if err := cli.SendRedistribute(rt, zebra.VRFDefault); err != nil { + cli.Close() + return nil, fmt.Errorf("zapi redistribute %v: %w", rt, err) + } + } + + w.log.Info("zapi: connected to zebra", "socket", zapiSocketPath, + "version", zapiVersion, "types", len(subscribeTypes)) + return cli, nil +} +``` + +#### Main Run Loop with Reconnection + +The gobgp zebra client's `Receive()` channel delivers incoming ZAPI messages. When zebra restarts (the zserv socket is deleted and recreated), the channel closes with an EOF. The watcher detects this and reconnects with exponential backoff. + +```go +// Run starts the ZAPI watcher. It blocks until ctx is cancelled. +// On disconnect, it reconnects with exponential backoff. +func (w *ZAPIWatcher) Run(ctx context.Context) error { + delay := reconnectInitial + + for { + cli, err := w.connect(ctx) + if err != nil { + w.log.Warn("zapi: connect failed, retrying", + "error", err, "delay", delay) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(delay): + } + delay = time.Duration(math.Min( + float64(delay)*reconnectFactor, + float64(reconnectMax))) + continue + } + + // Reset backoff on successful connection. + delay = reconnectInitial + + // Process messages until disconnect. + w.processMessages(ctx, cli) + + // If we reach here, the connection was lost. + // Clear stale routes before reconnecting. + w.clearAllRoutes() + w.log.Warn("zapi: disconnected from zebra, reconnecting") + } +} +``` + +#### Message Processing + +```go +func (w *ZAPIWatcher) processMessages(ctx context.Context, cli *zebra.Client) { + for { + select { + case <-ctx.Done(): + cli.Close() + return + case msg, ok := <-cli.Receive(): + if !ok { + // Channel closed -- zebra disconnected. + return + } + w.handleMessage(msg) + } + } +} + +func (w *ZAPIWatcher) handleMessage(msg *zebra.Message) { + switch body := msg.Body.(type) { + case *zebra.IPRouteBody: + switch msg.Header.Command { + case zebra.RedistributeRouteAdd: + w.addRoute(body) + case zebra.RedistributeRouteDel: + w.deleteRoute(body) + } + case *zebra.RouterIDUpdateBody: + w.log.Debug("zapi: router-id update", "id", body.Prefix) + default: + // Ignore unhandled message types. + } +} +``` + +#### Route Tree Updates + +Route messages are transformed into YANG-compatible structures and written to the in-memory tree. The `IPRouteBody` from gobgp's zebra package contains: + +| Field | Description | YANG mapping | +|-------|-------------|--------------| +| `Prefix` | Route prefix (net.IPNet) | `destination-prefix` | +| `Type` | Route type (kernel/connected/static/ospf/rip) | `source-protocol` | +| `Distance` | Administrative distance | `route-preference` (when supported) | +| `Metric` | Route metric | `metric` | +| `Nexthops` | Next-hop list (gateway + interface index) | `next-hop-list/next-hop` | +| `Flags` | Selected, installed, etc. | `active` leaf | + +```go +func (w *ZAPIWatcher) addRoute(body *zebra.IPRouteBody) { + rib := ribName(body.Prefix) // "ipv4-master" or "ipv6-master" + key := routeKey(body) // prefix + protocol composite key + + entry := transformRoute(body) // -> YANG-compatible JSON structure + w.tree.SetRoute(rib, key, entry) + + w.log.Debug("zapi: route add", "prefix", body.Prefix, + "type", body.Type, "nexthops", len(body.Nexthops), + "installed", body.IsInstalled()) +} + +func (w *ZAPIWatcher) deleteRoute(body *zebra.IPRouteBody) { + rib := ribName(body.Prefix) + key := routeKey(body) + + w.tree.DeleteRoute(rib, key) + + w.log.Debug("zapi: route del", "prefix", body.Prefix, + "type", body.Type) +} +``` + +#### Stale Route Cleanup on Reconnect + +When the connection to zebra is lost (zebra restart, socket error), all routes in the tree sourced from ZAPI become potentially stale. The watcher clears all ZAPI-sourced routes from the tree before reconnecting. Upon successful reconnection, zebra performs a full dump of all routes matching the subscribed types, which repopulates the tree with current data. + +```go +func (w *ZAPIWatcher) clearAllRoutes() { + w.tree.ClearRIB("ipv4-master") + w.tree.ClearRIB("ipv6-master") + w.log.Info("zapi: cleared stale routes from tree") +} +``` + +This full-replacement strategy is simpler and more reliable than mark-and-sweep. Since zebra's post-connection dump is complete (it sends every route matching the subscribed types), the tree converges to the correct state within seconds of reconnection. The brief window where routes are absent from the tree is acceptable because: + +1. RESTCONF/NETCONF clients querying during reconnection get an empty (but valid) routing table rather than stale data. +2. The reconnection window is short (typically under 1 second for a local Unix socket). +3. Zebra restarts are infrequent operational events, not steady-state behavior. + +#### Differences from Other Reactive Subsystems + +| Aspect | NLMonitor (netlink) | iw event | ethmonitor (genetlink) | ZAPI watcher | +|--------|-------------------|----------|----------------------|--------------| +| Implementation | Native Go netlink channels (`vishvananda/netlink`) | External subprocess | Native Go genetlink socket | Native Go Unix socket (`osrg/gobgp/v4/pkg/zebra`) | +| Output format | Typed Go structs (`LinkUpdate`, etc.) | Human-readable text | Binary genetlink messages | Typed Go structs (`IPRouteBody`, etc.) | +| Process management | Goroutine with channel re-subscribe on close | Persistent subprocess with restart | Goroutine -- no process to manage | Goroutine with reconnection and re-subscription | +| Failure mode | Channel close -> re-subscribe | Subprocess crash -> restart | Socket error -> reconnect | EOF -> clear routes -> reconnect with backoff | +| Event rate | High (100s/sec during convergence) | Low (single-digit/min) | Very low (link negotiation only) | Moderate (proportional to route churn) | +| Absence handling | Netlink always available (kernel 6.18) | Governed by `YANGERD_ENABLE_WIFI` flag | Always active (kernel 6.18) | Requires FRR zebra running; reconnects if absent | +| Data exclusivity | Supplements ip batch re-reads | Supplements iw queries | Supplements ethtool polling | **Sole source** for route table data | + +The key distinction from other subsystems is that the ZAPI watcher is the **sole source** for route table data. The NLMonitor, iw event monitor, and ethmonitor all supplement batch/polling collectors that perform the same queries. The ZAPI watcher fully replaces `vtysh` for route collection -- there is no parallel polling or batch query for routes. + +#### Lifecycle + +The `ZAPIWatcher` is created during yangerd initialization by calling `zapiwatcher.New()`. Its `Run()` method is started as a goroutine that blocks until context cancellation. If zebra is not yet running at startup (e.g., yangerd starts before FRR), the watcher's reconnection loop handles this transparently -- it retries with exponential backoff until zebra becomes available. + +On clean shutdown (context cancellation), the `processMessages` loop detects `ctx.Done()`, closes the zebra client, and the `Run()` goroutine returns. + +#### Concurrency Model + +The ZAPI watcher uses a single goroutine that reads from the `cli.Receive()` channel. Route messages are processed synchronously within this goroutine: each `REDISTRIBUTE_ROUTE_ADD` or `REDISTRIBUTE_ROUTE_DEL` triggers an immediate tree write. This sequential model is appropriate because: + +1. Route redistribution messages arrive at moderate rates (tens per second during convergence, single-digit per minute steady-state). +2. Tree writes are fast (in-memory map update under the per-model `sync.RWMutex` for `ietf-routing:routing`). +3. Sequential processing preserves route ordering semantics -- a delete followed by an add for the same prefix is applied in the correct order. + +The tree write from the ZAPI watcher goroutine and the tree reads from RESTCONF/NETCONF handlers are serialized by the per-model `sync.RWMutex` for the `ietf-routing:routing` key. No additional synchronization is needed. + +### 4.1novies D-Bus Monitor Subsystem + +The D-Bus Monitor Subsystem provides reactive monitoring of dnsmasq DHCP lease events and firewalld configuration reloads via D-Bus signal subscriptions. Instead of polling the DHCP lease file or periodically querying firewall state, `yangerd` subscribes to D-Bus signals emitted by these services and reacts immediately when state changes occur. This follows the same event-as-trigger pattern used by the netlink and bridge subsystems: the D-Bus signal is the notification mechanism, but the actual data is re-read from the canonical source (lease file and D-Bus method call for DHCP; firewalld D-Bus method calls for firewall). + +#### Why D-Bus Instead of inotify/Polling + +The previous design used `fswatcher` (inotify) for the dnsmasq lease file and polling for firewall state. D-Bus is superior for both cases: + +- **dnsmasq**: While inotify on `/var/lib/misc/dnsmasq.leases` works, dnsmasq explicitly provides D-Bus signals (`DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`) designed for exactly this purpose. Using D-Bus signals rather than watching the file avoids race conditions where inotify fires before dnsmasq has finished writing the file, and provides semantic information (which lease changed) rather than just "file modified." +- **firewalld**: Firewall state is managed by firewalld and accessed via its D-Bus API. The only alternative to D-Bus signals is periodic polling, but firewalld provides no file-based state representation. D-Bus signals (`Reloaded`, plus `NameOwnerChanged` for restart detection) provide instant notification with zero steady-state CPU cost. On each signal, yangerd re-reads the full firewall state via firewalld D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). + +#### DBusMonitor Implementation + +The following Go code defines the `DBusMonitor` type and its core event loop in `internal/dbusmonitor/dbusmonitor.go`: + +```go +package dbusmonitor + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "math" + "os" + "os/exec" + "strings" + "time" + + "github.com/godbus/dbus/v5" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const ( + // dnsmasq D-Bus constants + dnsmasqBusName = "uk.org.thekelleys.dnsmasq" + dnsmasqInterface = "uk.org.thekelleys.dnsmasq" + dnsmasqPath = "/uk/org/thekelleys/dnsmasq" + + // firewalld D-Bus constants + firewalldBusName = "org.fedoraproject.FirewallD1" + firewalldInterface = "org.fedoraproject.FirewallD1" + firewalldPath = "/org/fedoraproject/FirewallD1" + + // D-Bus standard interface for service lifecycle + dbusInterface = "org.freedesktop.DBus" + dbusPath = "/org/freedesktop/DBus" + + // Data sources + dnsmasqLeaseFile = "/var/lib/misc/dnsmasq.leases" + + // Tree keys + dhcpTreeKey = "infix-dhcp-server:dhcp-server" + firewallTreeKey = "infix-firewall:firewall" + + // Reconnection parameters + reconnectInitial = 100 * time.Millisecond + reconnectMax = 30 * time.Second + reconnectFactor = 2.0 +) + +// DBusMonitor subscribes to D-Bus signals from dnsmasq and firewalld, +// using each signal as a trigger to re-read data from canonical sources. +type DBusMonitor struct { + tree *tree.Tree + log *slog.Logger +} + +func New(t *tree.Tree, log *slog.Logger) *DBusMonitor { + return &DBusMonitor{tree: t, log: log} +} +``` + +#### Signal Subscription + +The monitor subscribes to three categories of D-Bus signals using `AddMatchSignal()` match rules: + +| Signal | Interface | Source | Trigger Action | +|--------|-----------|--------|----------------| +| `DHCPLeaseAdded` | `uk.org.thekelleys.dnsmasq` | dnsmasq | Re-read lease file + `GetMetrics()` | +| `DHCPLeaseDeleted` | `uk.org.thekelleys.dnsmasq` | dnsmasq | Re-read lease file + `GetMetrics()` | +| `DHCPLeaseUpdated` | `uk.org.thekelleys.dnsmasq` | dnsmasq | Re-read lease file + `GetMetrics()` | +| `Reloaded` | `org.fedoraproject.FirewallD1` | firewalld | Re-read firewall state via firewalld D-Bus method calls | +| `NameOwnerChanged` | `org.freedesktop.DBus` | D-Bus daemon | Detect service restart; trigger full re-read | + +```go +func (m *DBusMonitor) subscribe(conn *dbus.Conn) error { + // Subscribe to dnsmasq DHCP lease signals. + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dnsmasqInterface), + dbus.WithMatchMember("DHCPLeaseAdded"), + ); err != nil { + return fmt.Errorf("dbus: match DHCPLeaseAdded: %w", err) + } + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dnsmasqInterface), + dbus.WithMatchMember("DHCPLeaseDeleted"), + ); err != nil { + return fmt.Errorf("dbus: match DHCPLeaseDeleted: %w", err) + } + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dnsmasqInterface), + dbus.WithMatchMember("DHCPLeaseUpdated"), + ); err != nil { + return fmt.Errorf("dbus: match DHCPLeaseUpdated: %w", err) + } + + // Subscribe to firewalld reload signal. + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(firewalldInterface), + dbus.WithMatchMember("Reloaded"), + ); err != nil { + return fmt.Errorf("dbus: match Reloaded: %w", err) + } + + // Subscribe to NameOwnerChanged for dnsmasq and firewalld restart detection. + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dbusInterface), + dbus.WithMatchMember("NameOwnerChanged"), + dbus.WithMatchArg(0, dnsmasqBusName), + ); err != nil { + return fmt.Errorf("dbus: match dnsmasq NameOwnerChanged: %w", err) + } + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dbusInterface), + dbus.WithMatchMember("NameOwnerChanged"), + dbus.WithMatchArg(0, firewalldBusName), + ); err != nil { + return fmt.Errorf("dbus: match firewalld NameOwnerChanged: %w", err) + } + + return nil +} +``` + +#### Main Run Loop with Reconnection + +The D-Bus monitor follows the same reconnection pattern as the ZAPI watcher (Section 4.1octies): exponential backoff from 100ms to 30s with a 2x factor. When the D-Bus connection drops, the monitor reconnects and re-subscribes to all signals. + +```go +// Run starts the D-Bus monitor. It blocks until ctx is cancelled. +// On disconnect, it reconnects with exponential backoff. +func (m *DBusMonitor) Run(ctx context.Context) error { + delay := reconnectInitial + + for { + conn, err := dbus.ConnectSystemBus() + if err != nil { + m.log.Warn("dbus: connect failed, retrying", + "error", err, "delay", delay) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(delay): + } + delay = time.Duration(math.Min( + float64(delay)*reconnectFactor, + float64(reconnectMax))) + continue + } + + // Reset backoff on successful connection. + delay = reconnectInitial + + if err := m.subscribe(conn); err != nil { + m.log.Warn("dbus: subscribe failed", "error", err) + conn.Close() + continue + } + + m.log.Info("dbus: connected and subscribed", + "signals", "dnsmasq(3)+firewalld(1)+nameowner(2)") + + // Perform initial data load for both services. + m.refreshDHCP(conn) + m.refreshFirewall(conn) + + // Process signals until disconnect. + m.processSignals(ctx, conn) + + conn.Close() + m.log.Warn("dbus: disconnected, reconnecting") + } +} +``` + +#### Signal Processing + +Incoming D-Bus signals are dispatched based on interface and member name. The `NameOwnerChanged` signal carries three string arguments: the bus name, the old owner, and the new owner. When the new owner is empty, the service has stopped; when the old owner is empty, the service has started. + +```go +func (m *DBusMonitor) processSignals(ctx context.Context, conn *dbus.Conn) { + ch := make(chan *dbus.Signal, 32) + conn.Signal(ch) + defer conn.RemoveSignal(ch) + + for { + select { + case <-ctx.Done(): + return + case sig, ok := <-ch: + if !ok { + return // D-Bus connection lost + } + m.handleSignal(conn, sig) + } + } +} + +func (m *DBusMonitor) handleSignal(conn *dbus.Conn, sig *dbus.Signal) { + switch sig.Name { + case dnsmasqInterface + ".DHCPLeaseAdded", + dnsmasqInterface + ".DHCPLeaseDeleted", + dnsmasqInterface + ".DHCPLeaseUpdated": + m.log.Debug("dbus: dnsmasq lease event", "signal", sig.Name) + m.refreshDHCP(conn) + + case firewalldInterface + ".Reloaded": + m.log.Debug("dbus: firewalld reloaded") + m.refreshFirewall(conn) + + case dbusInterface + ".NameOwnerChanged": + if len(sig.Body) < 3 { + return + } + name, _ := sig.Body[0].(string) + oldOwner, _ := sig.Body[1].(string) + newOwner, _ := sig.Body[2].(string) + + switch name { + case dnsmasqBusName: + if oldOwner == "" && newOwner != "" { + m.log.Info("dbus: dnsmasq started") + m.refreshDHCP(conn) + } else if oldOwner != "" && newOwner == "" { + m.log.Info("dbus: dnsmasq stopped") + m.tree.Set(dhcpTreeKey, json.RawMessage(`{}`)) + } + case firewalldBusName: + if oldOwner == "" && newOwner != "" { + m.log.Info("dbus: firewalld started") + m.refreshFirewall(conn) + } else if oldOwner != "" && newOwner == "" { + m.log.Info("dbus: firewalld stopped") + m.tree.Set(firewallTreeKey, json.RawMessage(`{}`)) + } + } + } +} +``` + +#### Data Refresh Functions + +Each refresh function re-reads data from the canonical source. For DHCP, this involves two operations: parsing the lease file and querying dnsmasq metrics via a D-Bus method call. For the firewall, this queries firewalld via D-Bus method calls to retrieve zones, policies, services, and global settings. + +```go +func (m *DBusMonitor) refreshDHCP(conn *dbus.Conn) { + // 1. Re-read the lease file. + leaseData, err := os.ReadFile(dnsmasqLeaseFile) + if err != nil { + m.log.Warn("dbus: read lease file", "error", err) + return + } + leases := parseDnsmasqLeases(string(leaseData)) + + // 2. Query dnsmasq DHCP metrics via D-Bus method call. + obj := conn.Object(dnsmasqBusName, dnsmasqPath) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + var metrics map[string]uint64 + if err := obj.CallWithContext(ctx, dnsmasqInterface+".GetMetrics", 0).Store(&metrics); err != nil { + m.log.Warn("dbus: GetMetrics call failed", "error", err) + // Continue with lease data only; metrics are supplementary. + } + + // 3. Combine leases and metrics into YANG-compatible JSON. + result := buildDHCPTree(leases, metrics) + m.tree.Set(dhcpTreeKey, result) + m.log.Debug("dbus: DHCP tree updated", "leases", len(leases)) +} + +func (m *DBusMonitor) refreshFirewall(conn *dbus.Conn) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + obj := conn.Object(firewalldBusName, firewalldPath) + + // 1. Query global firewall state. + var defaultZone string + if err := obj.CallWithContext(ctx, firewalldInterface+".getDefaultZone", 0).Store(&defaultZone); err != nil { + m.log.Warn("dbus: firewalld getDefaultZone", "error", err) + return + } + var logDenied string + obj.CallWithContext(ctx, firewalldInterface+".getLogDenied", 0).Store(&logDenied) + var panicMode bool + obj.CallWithContext(ctx, firewalldInterface+".queryPanicMode", 0).Store(&panicMode) + + // 2. Query active zones and per-zone settings. + zoneObj := conn.Object(firewalldBusName, firewalldPath) + var activeZones map[string]interface{} + zoneObj.CallWithContext(ctx, firewalldInterface+".zone.getActiveZones", 0).Store(&activeZones) + zoneSettings := make(map[string]interface{}) + for name := range activeZones { + var settings interface{} + zoneObj.CallWithContext(ctx, firewalldInterface+".zone.getZoneSettings2", 0, name).Store(&settings) + zoneSettings[name] = settings + } + + // 3. Query policies. + var policies []string + obj.CallWithContext(ctx, firewalldInterface+".policy.getPolicies", 0).Store(&policies) + policySettings := make(map[string]interface{}) + for _, name := range policies { + var settings interface{} + obj.CallWithContext(ctx, firewalldInterface+".policy.getPolicySettings", 0, name).Store(&settings) + policySettings[name] = settings + } + + // 4. Query services. + var services []string + obj.CallWithContext(ctx, firewalldInterface+".listServices", 0).Store(&services) + serviceSettings := make(map[string]interface{}) + for _, name := range services { + var settings interface{} + obj.CallWithContext(ctx, firewalldInterface+".getServiceSettings2", 0, name).Store(&settings) + serviceSettings[name] = settings + } + + // 5. Build YANG-compatible JSON tree from all firewalld data. + result := buildFirewallTree(defaultZone, logDenied, panicMode, + zoneSettings, policySettings, serviceSettings) + m.tree.Set(firewallTreeKey, result) + m.log.Debug("dbus: firewall tree updated", "zones", len(zoneSettings), + "policies", len(policySettings), "services", len(serviceSettings)) +} +``` + +#### NameOwnerChanged Handling + +The `NameOwnerChanged` signal from the D-Bus daemon provides service lifecycle detection without polling. When dnsmasq or firewalld restarts, the D-Bus daemon emits this signal with the bus name, old owner (empty if service just appeared), and new owner (empty if service just disappeared). This allows yangerd to: + +- **Service start**: Perform a full data refresh immediately, ensuring the tree is populated even if signals were missed during the restart window. +- **Service stop**: Clear the relevant tree key, presenting an empty (but valid) subtree to RESTCONF/NETCONF clients rather than stale data. + +This is analogous to the ZAPI watcher's `clearAllRoutes()` on zebra disconnect (Section 4.1octies): the tree reflects the actual service state, not cached data from a previous service instance. + +#### Differences from Other Reactive Subsystems + +| Aspect | NLMonitor (netlink) | ZAPI watcher | D-Bus Monitor | +|--------|-------------------|--------------|---------------| +| Implementation | Native Go netlink channels (`vishvananda/netlink`) | Native Go Unix socket (`osrg/gobgp/v4/pkg/zebra`) | Native Go D-Bus (`godbus/dbus/v5`) | +| Event source | Kernel multicast groups | Zebra redistribution messages | Userspace service signals | +| Signal semantics | Low-level (RTM_NEWLINK, etc.) | Protocol-level (route add/del) | Application-level (lease added, config reloaded) | +| Data re-read | `ip -json -batch -` subprocess | Direct from ZAPI message body | Lease file + D-Bus method call (DHCP); firewalld D-Bus method calls (firewall) | +| Failure mode | Channel close -> re-subscribe | EOF -> clear routes -> reconnect | Connection lost -> reconnect with backoff | +| Service absence | Always available (kernel) | Requires FRR zebra | Requires dnsmasq/firewalld; tree cleared when absent | +| Data exclusivity | Supplements ip batch re-reads | Sole source for routes | Sole source for DHCP leases and firewall state | + +#### Concurrency Model + +The D-Bus monitor runs as a single goroutine executing the `Run()` event loop. All incoming D-Bus signals are processed sequentially within this loop. The `refreshDHCP()` and `refreshFirewall()` functions are called synchronously from the signal handler. Tree writes are serialized by the per-model `sync.RWMutex` for `infix-dhcp-server:dhcp-server` and `infix-firewall:firewall` respectively. No additional synchronization is needed. + +Signal processing is fast (file read + parse, or D-Bus method calls to firewalld), so sequential processing does not introduce meaningful latency. If multiple lease events arrive in rapid succession, each triggers a full re-read; this is acceptable because lease file parsing is inexpensive and the tree converges to the correct state after the final event. + +### 4.1decies LLDP Monitor Subsystem + +The LLDP monitor provides **reactive** LLDP neighbor updates by running a persistent `lldpcli -f json0 watch` subprocess. This replaces periodic `lldpctl -f json` polling. The monitor follows the same lifecycle pattern as `IWMonitor`: long-lived subprocess, stdout parsing loop, exponential backoff restart, and event-triggered tree replacement. + +#### Command and Output Contract + +- Command: `lldpcli -f json0 watch` (**`-f json0` before `watch`**) +- Output framing: pretty-printed JSON objects separated by a blank line (`\n\n`) +- Event roots: `lldp-added`, `lldp-updated`, `lldp-deleted` +- Payload: each event object contains full neighbor data (not a delta patch) +- `json0` guarantees stable structure (arrays stay arrays) + +Unlike NDJSON, each event is multi-line JSON. Framing must therefore use blank-line delimiters or brace-depth counting; single-line splitting is incorrect. + +#### Framing Strategy + +`internal/lldpmonitor/monitor.go` reads stdout as a stream and accumulates bytes until an object boundary is detected: + +1. Preferred: split on `\n\n` (lldpcli watch object separator) +2. Defensive fallback: brace-depth counter for malformed/partial separators +3. Parse each complete object via `json.Unmarshal` +4. Dispatch by root key (`lldp-added` / `lldp-updated` / `lldp-deleted`) + +Each event triggers full in-memory LLDP subtree regeneration for `ieee802-dot1ab-lldp:lldp` from the watch payload, preserving list shape and RFC7951 key structure. + +#### Failure and Restart Behavior + +If `lldpd` is restarted or the subprocess exits, LLDPMonitor logs WARN, restarts `lldpcli -f json0 watch` with exponential backoff (100ms → 30s, factor 2x), and rebuilds state from subsequent watch events. During restart windows, the previous LLDP subtree remains served as last-known-good data. + +### 4.1undecies mDNS Monitor Subsystem + +The mDNS monitor provides **reactive** updates for `/infix-services:mdns/neighbors` using Avahi's D-Bus API (`org.freedesktop.Avahi`). This is a migration of `src/statd/avahi.c` behavior into pure Go. + +#### Why D-Bus (not libavahi-client) + +`yangerd` is pure Go (no CGo), so linking `libavahi-client` is not allowed. Avahi already exposes complete browsing/resolution via D-Bus signals and objects: + +- `ServiceTypeBrowser` +- `ServiceBrowser` +- `ServiceResolver` + +`internal/mdnsmonitor/` uses `godbus/dbus/v5` (already present for DBusMonitor) to subscribe to Avahi events and resolve service instances. + +#### Data Model Mapping + +The monitor writes: + +- Path: `/infix-services:mdns/neighbors` +- Keys: `neighbor/hostname`, nested `service/name` +- Leaves: `hostname`, `address` (leaf-list), `last-seen`, `service/name`, `service/type`, `service/port`, `service/txt` (leaf-list) + +On add/update/remove signals, the affected neighbor/service entries are rebuilt and the `infix-services:mdns` subtree is atomically replaced. + +#### Alternative Considered + +Pure Go mDNS libraries (`hashicorp/mdns`, `brutella/dnssd`) are possible, but Avahi D-Bus is preferred because Avahi is already running on target systems and is the canonical system mDNS authority. + +### 4.1septies Event-Triggered Batch Re-read Pattern (All Netlink Events) + +This section documents the unified pattern used by all netlink event handlers for **link, address, and neighbor** events: **every event (both add and remove) triggers a full re-read of the affected state via ip batch**. Events are received as typed Go structs on `vishvananda/netlink` channels (`LinkUpdate`, `AddrUpdate`, `NeighUpdate`). The event itself is used only as a trigger -- its content is not parsed for data. Route data is sourced exclusively from the ZAPI watcher's streaming connection to zebra (Section 4.1octies) and is not part of this pattern -- yangerd does not subscribe to netlink route groups. This design is driven by two observations: + +1. **Partial state updates lead to inconsistency.** If the event handler only queries the single attribute that changed (e.g., oper-status from an RTM_NEWLINK, or one address from an RTM_NEWADDR), other attributes of the same entity may be from a different point in time. By re-reading the full state for the affected scope, all data in the tree is coherent. + +2. **Delete events require re-reading, not surgical removal.** Parsing RTM_DEL* events to determine exactly which subtree entry to remove is complex and fragile. Instead, re-reading the full state after a delete naturally produces the correct result without the deleted entry. + +For link events specifically, there is a third driver: + +3. **The ethtool genetlink monitor (`ETHNL_MCGRP_MONITOR`) does NOT fire on link up/down.** When a physical link transitions (cable plugged/unplugged, carrier lost/restored), the kernel negotiates speed, duplex, and auto-negotiation with the link partner. However, this negotiation does not produce `ETHTOOL_MSG_LINKINFO_NTF` or `ETHTOOL_MSG_LINKMODES_NTF` messages. The link handler must explicitly re-query ethtool settings. + +When the NLMonitor's select loop receives a netlink event from any subscription channel, the re-read scope depends on the event type: + +``` +=== Link Event (RTM_NEWLINK / RTM_DELLINK) === +Step 1: Write three queries to ip batch stdin (full interface re-read) + link show dev -> link state (flags, MTU, operstate, qdisc, ...) + -s link show dev -> link state + hardware counters (rx/tx bytes/packets/errors) + addr show dev -> all IPv4/IPv6 addresses on this interface +Step 2: Read three JSON array responses from ip batch stdout +Step 3: tree.Set("/ietf-interfaces:.../interface[name='']", linkData) + tree.Set("...//statistics", statsData) + tree.Set("...//addresses", addrData) +Step 4: ethmonitor.RefreshInterface("") -> re-query speed/duplex/autoneg +Step 5: If oper-status changed: record time.Now() as last-change + +=== Address Event (RTM_NEWADDR / RTM_DELADDR) === +Step 1: Write one query to ip batch stdin + addr show dev -> all addresses on this interface +Step 2: Read one JSON array response +Step 3: tree.Set("...//addresses", addrData) + +=== Neighbor Event (RTM_NEWNEIGH / RTM_DELNEIGH) === +Step 1: Write one query to ip batch stdin + neigh show dev -> all neighbors on this interface +Step 2: Read one JSON array response +Step 3: tree.Set("...//neighbors", neighData) +``` + +#### Why Full Re-read Instead of Targeted Queries + +The alternative — parsing the event content to extract the changed data and applying it surgically to the tree — has three problems: + +1. **Netlink events carry typed Go structs, not raw data.** The `vishvananda/netlink` channels deliver `LinkUpdate`, `AddrUpdate`, `NeighUpdate` structs. While these contain parsed netlink attributes, they do not reliably indicate which fields changed. RTM_NEWLINK fires for many reasons (oper-status, MTU, flags, master, alias). RTM_NEWADDR/RTM_DELADDR carry the affected address, but the full address set may have other concurrent changes (e.g., IPv6 DAD state transitions). A full re-read is more reliable than trying to reconstruct state from individual update structs. + +2. **Point-in-time consistency.** A full re-read ensures all data for the affected scope (interface, address set, neighbor table) is from a single coherent point in time. + +3. **Simplicity.** Batch queries are cheap (microseconds over a local stdin/stdout pipe). The complexity of transforming each netlink update struct into a partial tree mutation and applying it surgically would be significantly higher and more fragile than a blanket re-read. Delete handling is especially simplified -- no need to construct the exact tree key from event attributes. +#### Event Rate and Debouncing + +On a typical Infix system, netlink events arrive at single-digit rates per second under normal operation. During convergence events (e.g., STP topology change, link aggregation failover), rates can spike to hundreds per second. Since events arrive on Go channels rather than subprocess stdout, channel buffer capacity provides implicit backpressure. For link/addr/neigh events, the batch re-read approach generates at most 3 ip batch queries per event (for link events; 1 for addr/neigh), which is well within the capacity of the persistent `ip -json -force -batch -` subprocess. + +If event storms are detected (e.g., the same interface generating multiple events of the same type within a 10ms window), per-entity debouncing is applied: only the last event in the window triggers a re-read. Debouncing is per-interface for link, addr, and neigh events. + +#### Interaction with Other Subsystems + +| Event Type | ip batch Queries | Additional Triggers | Debounce Key | +|------------|-----------------|---------------------|--------------| +| RTM_NEWLINK / RTM_DELLINK | 3 (link + stats + addr) | `ethmonitor.RefreshInterface()` + last-change | per-interface | +| RTM_NEWADDR / RTM_DELADDR | 1 (addr show dev) | None | per-interface | +| RTM_NEWNEIGH / RTM_DELNEIGH | 1 (neigh show dev) | None | per-interface | + +Subsystems NOT affected by the NLMonitor's netlink channels: +- **bridge batch** -- bridge state queries use a separate `bridge -json -batch -` subprocess; bridge events arrive on the NLMonitor's existing channels (FDB via `neighCh`, VLAN via `linkCh`, MDB via raw netlink) +- **iw event** -- WiFi events use a separate `iw event` subprocess; unrelated to the NLMonitor's netlink channels +- **fswatcher** -- file events are independent of netlink +- **ethmonitor** -- has its own genetlink subscription; only cross-triggered by link events via `RefreshInterface()` +- **ZAPI watcher** -- route data is sourced from zebra's zserv socket via the ZAPI watcher subsystem (Section 4.1octies); independent of NLMonitor's netlink channels +### 4.2 In-Memory Data Tree + +#### 4.2.1 Design Rationale +- **Pre-serialized JSON:** Trading write-time CPU for zero-allocation, zero-copy reads. +- **Subtree Replacement:** Each update replaces only the affected module's JSON blob. +- **Per-Model RWMutex:** Each YANG module key has its own `sync.RWMutex`, so writers for different modules never block each other and readers only contend with writers of the same module. A top-level `sync.RWMutex` protects the models map structure itself (new key insertion). + +#### 4.2.2 Core Tree Type +```go +// internal/tree/tree.go + +// modelEntry holds a single YANG module's pre-serialized JSON blob +// and its own read-write mutex. +type modelEntry struct { + mu sync.RWMutex + data json.RawMessage + updated time.Time +} + +// OnDemandFunc is called on every Get()/GetMulti() for keys that have a +// registered provider. The returned JSON is shallow-merged with the cached +// data so that live fields (e.g. clock, resource-usage) override stale +// cached values while preserving fields that are only set by collectors. +type OnDemandFunc func() json.RawMessage + +// Tree holds the operational YANG data in per-module JSON blobs. +// Each module key has its own sync.RWMutex, so writers for different +// modules never block each other. +// All methods are safe for concurrent use. +type Tree struct { + mu sync.RWMutex + models map[string]*modelEntry + providers map[string]OnDemandFunc +} + +func New() *Tree { + return &Tree{ + models: make(map[string]*modelEntry), + providers: make(map[string]OnDemandFunc), + } +} + +// RegisterProvider installs an on-demand function for the given tree key. +// On every Get()/GetMulti() call for this key, the provider is invoked +// and its output is shallow-merged with the cached entry data. +func (t *Tree) RegisterProvider(key string, fn OnDemandFunc) { + t.mu.Lock() + t.providers[key] = fn + t.mu.Unlock() +} + +// Set replaces the entire subtree at the given YANG module key. +// Only the target module's write lock is held; other modules remain +// readable and writable. +func (t *Tree) Set(key string, v json.RawMessage) { + t.mu.RLock() + entry, ok := t.models[key] + t.mu.RUnlock() + if !ok { + t.mu.Lock() + entry, ok = t.models[key] + if !ok { + entry = &modelEntry{} + t.models[key] = entry + } + t.mu.Unlock() + } + entry.mu.Lock() + entry.data = v + entry.updated = time.Now() + entry.mu.Unlock() +} + +// Get returns the raw JSON for the given module key. +// Only the target module's read lock is held. +func (t *Tree) Get(key string) json.RawMessage { + t.mu.RLock() + entry, ok := t.models[key] + provider := t.providers[key] + t.mu.RUnlock() + if !ok { + return nil + } + entry.mu.RLock() + data := entry.data + entry.mu.RUnlock() + if provider == nil { + return data + } + return shallowMerge(data, provider()) +} + +// GetMulti returns the concatenated raw JSON for multiple module keys. +// Each module's read lock is acquired and released individually. +func (t *Tree) GetMulti(keys []string) []json.RawMessage { + result := make([]json.RawMessage, 0, len(keys)) + t.mu.RLock() + defer t.mu.RUnlock() + for _, key := range keys { + entry, ok := t.models[key] + if !ok { + continue + } + provider := t.providers[key] + entry.mu.RLock() + data := entry.data + entry.mu.RUnlock() + if provider != nil { + data = shallowMerge(data, provider()) + } + result = append(result, data) + } + return result +} + +#### 4.2.3 Shallow Merge +The `shallowMerge` helper performs a single-level merge of two JSON objects. It is used to combine static cached data with live on-demand fields. + +```go +func shallowMerge(base, override json.RawMessage) json.RawMessage { + if len(override) == 0 || string(override) == "null" { + return base + } + if len(base) == 0 || string(base) == "null" { + return override + } + var baseMap, overrideMap map[string]json.RawMessage + if err := json.Unmarshal(base, &baseMap); err != nil { + return base + } + if err := json.Unmarshal(override, &overrideMap); err != nil { + return base + } + for k, v := range overrideMap { + baseMap[k] = v + } + merged, _ := json.Marshal(baseMap) + return merged +} +``` + +#### 4.2.4 Update Strategy +Each monitor maintains its own in-memory Go struct and re-serializes the entire module subtree to JSON on each update to ensure consistency. + + +```go +func (m *LinkMonitor) updateTree(link netlink.Link) { + m.mu.Lock() + m.ifaces[link.Attrs().Name] = linkToInterface(link) + raw, _ := json.Marshal(m.buildInterfacesTree()) + m.mu.Unlock() + m.tree.Set("ietf-interfaces:interfaces", raw) +} +``` + +#### 4.2.4 Memory Bounds + +The in-memory tree has no hard size cap by default — in typical deployments, the total tree size is under 1 MiB. However, to guard against pathological cases (e.g., an extremely large routing table or a runaway collector producing oversized JSON), the following safeguards apply: + +- **Per-model size limit**: Each `tree.Set()` call checks the size of the incoming `json.RawMessage`. If it exceeds `YANGERD_MAX_MODEL_BYTES` (default: 16 MiB), the update is rejected, the previous value is retained, and a warning is logged. This prevents a single collector from consuming unbounded memory. +- **Total tree size monitoring**: The health endpoint reports `size_bytes` per model and the aggregate total. Operators can monitor this via `yangerctl health` or automated checks. +- **No backpressure to kernel**: Netlink events are never dropped intentionally by yangerd (the kernel drops on ENOBUFS). Tree writes are fast (mutex + pointer swap), so memory pressure does not create backpressure in the event pipeline. + + +### 4.3 IPC Protocol Specification + +#### 4.3.1 Transport +`AF_UNIX SOCK_STREAM` at `/run/yangerd.sock`. Permissions `0660`, owned by `root:yangerd`. + +#### 4.3.2 Framing +1-byte protocol version + 4-byte big-endian length header + JSON body. The version field enables future protocol changes without ambiguity. Version `1` is the initial release. + +``` ++--------+--------+--------+--------+--------+------- ... -------+ +| ver(1) | length (uint32 big-endian, bytes) | JSON body | ++--------+--------+--------+--------+--------+------- ... -------+ +``` + +#### 4.3.3 Request Schema +```json +{ + "method": "get", + "path": "/ietf-interfaces:interfaces", + "filter": {"name": "eth0"} +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `method` | string | yes | `"get"` or `"health"` | +| `path` | string | yes (get) | YANG module-qualified path | +| `filter` | object | no | Key-value map selecting a single list entry | + +**Subcommand-to-IPC mapping**: The `yangerctl` CLI subcommands map to IPC requests as follows: +- `yangerctl get ` → `{"method": "get", "path": ""}` +- `yangerctl health` → `{"method": "health"}` +- `yangerctl dump` → `{"method": "get", "path": "/"}` (root path returns all models) +- `yangerctl watch ` → Client-side polling loop: repeated `{"method": "get", "path": ""}` requests at 1-second intervals with client-side diff. There is no server-side subscription or push mechanism. + +#### 4.3.4 Response Schema +**Success:** +```json +{"status": "ok", "data": { "module:node": { ... } }} +``` + +**Error:** +```json +{"status": "error", "code": 404, "message": "..."} +``` + +#### 4.3.5 Health Response Schema + +The `health` method returns per-subsystem status and per-model freshness data: + +```json +{ + "status": "ok", + "subsystems": { + "nlmonitor": {"state": "running", "restarts": 0}, + "ipbatch": {"state": "running", "pid": 1234, "restarts": 0}, + "bridgebatch": {"state": "restarting", "pid": null, "restarts": 2, "backoff_ms": 400}, + "zapiwatcher": {"state": "running", "restarts": 0}, + "ethmonitor": {"state": "running"}, + "fswatcher": {"state": "running", "watches": 12}, + "dbusmonitor": {"state": "running"}, + "iwmonitor": {"state": "disabled"} + }, + "models": { + "ietf-interfaces:interfaces": {"last_updated": "2026-03-04T12:34:56Z", "size_bytes": 8192}, + "ietf-routing:routing": {"last_updated": "2026-03-04T12:34:55Z", "size_bytes": 2048}, + "ietf-hardware:hardware": {"last_updated": "2026-03-04T12:34:50Z", "size_bytes": 1024} + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `subsystems` | object | Per-subsystem status. Keys match internal package names. | +| `subsystems.*.state` | string | `"running"`, `"restarting"`, `"failed"`, or `"disabled"` | +| `subsystems.*.restarts` | int | Cumulative restart count since daemon start | +| `subsystems.*.pid` | int/null | PID of managed subprocess (ip batch, bridge batch, iw event); null during restart | +| `subsystems.*.backoff_ms` | int | Current backoff delay in milliseconds (only present during restart) | +| `subsystems.*.watches` | int | Number of active inotify watches (fswatcher only) | +| `models` | object | Per-model tree metadata. Keys are YANG module-qualified names. | +| `models.*.last_updated` | string | ISO 8601 timestamp of the last `tree.Set()` call for this model | +| `models.*.size_bytes` | int | Size in bytes of the stored `json.RawMessage` | + + +### 4.4 Supplementary Collectors + +#### 4.4.1 Interface +```go +type Collector interface { + Name() string + Interval() time.Duration + Collect(ctx context.Context, tree *tree.Tree) error +} +``` + +#### 4.4.2 Failure Philosophy +- Never panic. +- Log at WARN. +- Retain stale data. + +**Exceptions — intentional clearing**: Two subsystems intentionally clear their tree keys instead of retaining stale data: +- **ZAPI watcher** (routes): On zebra disconnect, the route subtree is cleared immediately. Stale routes from a previous zebra session could direct traffic to non-existent next-hops; serving no routes is safer than serving wrong routes. Routes are repopulated atomically on reconnect via a full RIB dump. +- **D-Bus monitor** (DHCP, firewall): When dnsmasq or firewalld stops (detected via `NameOwnerChanged`), the corresponding tree key is set to `{}`. A stopped service has no active leases or rules; retaining data from the previous instance would misrepresent the system state. + +In all other failure modes (collector timeouts, parse errors, subprocess restarts), stale data is retained. + +#### 4.4.3 Detailed Collector Specifications + +The following collectors handle operational data not exposed via Linux netlink multicast groups and not handled by the bridge reactive subsystem (Section 4.4.3 item 1). Each collector runs in its own goroutine on a fixed polling interval. + +##### 1. Bridge Data (Reactive via Netlink + `bridge -json -batch -`) + +Bridge data collection is **fully reactive** — there is no polling collector for bridge state. All bridge data updates are driven by kernel netlink events that trigger re-queries via the persistent `bridge -json -batch -` subprocess. This follows the same event-as-trigger pattern used for link, address, and neighbor data. + +**FDB (Forwarding Database)**: FDB entries arrive as `NeighUpdate` events on the neighbor channel (entries with `NDA_MASTER` flag are bridge FDB, not ARP/NDP). Each event triggers `fdb show br ` via bridge batch to re-read the full FDB for the affected bridge. + +**VLAN membership**: VLAN changes arrive as `LinkUpdate` events on the link channel (bridge VLAN attributes on link update messages). Each event triggers `vlan show` via bridge batch. + +**MDB (Multicast Database)**: MDB events (`RTM_NEWMDB`, `RTM_DELMDB`) arrive via a raw netlink socket subscribed to `RTNLGRP_MDB` (group 26). Each event triggers `mdb show` via bridge batch. + +**STP port state**: STP port state changes arrive as `RTM_NEWLINK` events carrying `IFLA_BRPORT_STATE` in `IFLA_PROTINFO`. The link event handler detects bridge port events and triggers a bridge batch re-query. STP root and topology-change data are not proactively notified by the kernel (`br_root_selection()` does not call `br_ifinfo_notify`), so these are re-read from the bridge device via batch whenever a port state change event is received. + +**Data source**: `bridge -json -batch -` (persistent subprocess) — commands written to stdin include `fdb show br `, `vlan show`, `mdb show`, and per-bridge STP state queries. +**Failure behavior**: Log warning; retain stale bridge data in tree (except on persistent `bridge` subprocess crash, where data is cleared after 3 restart attempts). +**Writes to**: `ietf-interfaces:interfaces`. +##### 2. WiFi Collector (`internal/collector/wifi.go`) — Feature-Gated +**Collects**: SSID, BSSID, channel, frequency (MHz), bitrate (Mbps), signal strength (dBm), RX/TX speed, scan results, and a list of associated stations with per-station TX/RX statistics. +**Sources**: +- `exec iw dev info` — interface-level parameters (SSID, channel, frequency, interface mode AP/station) +- `exec iw dev link` (via `iw.py link `) — station-mode link info including **signal strength in dBm**, connected SSID, RX/TX speed. This is the **only reliable source** for WiFi signal strength on modern cfg80211/nl80211 drivers; `/proc/net/wireless` is empty on these drivers. +- `exec iw dev station dump` — per-station statistics (AP mode: connected clients; station mode: single entry with detailed stats) +- `exec wpa_cli -i scan_result` — available network scan results from wpa_supplicant (station mode only) +**Interval**: 10 seconds for polling path; reactive re-queries on `iw event` triggers (`connected`, `disconnected`, `new station`, `ch_switch_started_notify`). +**Failure behavior**: Log warning; write an empty station list. Common failure causes: interface is down, or the interface is not a wireless interface. Virtual interfaces return `ENODEV` from `iw`; these are silently skipped. (Note: On `iw event` monitor disconnection, the WiFi subtree is NOT cleared — stale link data is retained). +**Writes to**: `ietf-interfaces:interfaces`. +**Feature gate**: `YANGERD_ENABLE_WIFI=true`. When WiFi support is not included in the Infix build, this collector and the IW Event Monitor are not started. When enabled, `iw` and `wpa_cli` are guaranteed present on the target. + +##### 3. Ethtool Collector (`internal/collector/ethtool.go`) — Hybrid Reactive/Polling +**Collects**: Link speed (Mbps), duplex mode (`half`/`full`), auto-negotiation state (`enabled`/`disabled`), advertised link modes, and extended per-group hardware statistics (eth-mac, rmon counters). +**Sources**: A hybrid of two mechanisms: +- **Reactive (settings)**: The `internal/ethmonitor/` package subscribes to the kernel's `ETHNL_MCGRP_MONITOR` genetlink multicast group. When the kernel emits `ETHTOOL_MSG_LINKINFO_NTF` or `ETHTOOL_MSG_LINKMODES_NTF` notifications (e.g., after link renegotiation), the ethmonitor re-queries speed, duplex, and auto-negotiation via `ethtool.Client.LinkInfo()` and `ethtool.Client.LinkMode()` and writes the updated values to the tree immediately. +- **Polling (statistics)**: Hardware counters (FramesTransmittedOK, FrameCheckSequenceErrors, OctetsReceivedOK, etc.) have no kernel notification mechanism — there is no `ETHTOOL_MSG_STATS_NTF`. These are polled every 30 seconds via `ethtool.Client.Stats()`. +**Interval**: Polling at 30 seconds for statistics only. Speed, duplex, and auto-negotiation are updated reactively via ethmonitor (no polling needed). +**Failure behavior**: Virtual interfaces, tunnel interfaces, and loopback return `ENOTSUP` from the ethtool generic netlink family. These are silently skipped — no warning is logged for `ENOTSUP`. Unexpected errors (permission denied, kernel bug) are logged at WARN. +**Writes to**: `ietf-interfaces:interfaces` — `infix-ethernet-interface` augment subtrees under each physical Ethernet interface. + +##### 4. WireGuard Collector (`internal/collector/wireguard.go`) +**Collects**: Per-peer statistics for all WireGuard interfaces: public key, endpoint IP:port, allowed IPs, time of latest handshake, received bytes, and transmitted bytes. +**Sources**: `golang.zx2c4.com/wireguard/wgctrl` — reads via WireGuard generic netlink (`WG_CMD_GET_DEVICE`) without requiring the `wg` CLI tool. +**Interval**: 30 seconds. +**Failure behavior**: If the WireGuard kernel module is not loaded, `wgctrl.New()` returns an error at daemon startup and the collector is disabled. If the module is loaded but a specific interface has been deleted between polls, log at WARN and skip that interface. +**Writes to**: `ietf-interfaces:interfaces`. + +##### 5. Route Table Collector (`internal/zapiwatcher/`) -- Reactive via ZAPI Streaming +**Collects**: Complete IPv4 and IPv6 routing tables (RIBs) from FRRouting, including all route types: kernel, connected, static, OSPF-learned, RIP-learned. Each route includes destination prefix, source protocol, administrative distance, metric, next-hops (with outgoing interface and gateway address), and active/installed flags. Includes routes in zebra's RIB that are NOT installed in the Linux kernel FIB (unresolvable next-hops, routes that lost admin-distance election, ECMP overflow, table-map filtered). +**Sources**: +- ZAPI v6 streaming connection to zebra via `/var/run/frr/zserv.api` Unix domain socket +- `REDISTRIBUTE_ROUTE_ADD` and `REDISTRIBUTE_ROUTE_DEL` messages from zebra for subscribed route types (kernel, connected, static, OSPF, RIP) +**Trigger**: Streaming -- no trigger needed. The ZAPI watcher receives incremental route updates as they occur in zebra's RIB. Upon initial connection, zebra sends a full dump of all routes matching the subscribed redistribution types. This replaces the previous `vtysh`-based approach where netlink route events (RTM_NEWROUTE/RTM_DELROUTE) were used as triggers for `vtysh` re-reads. See Section 4.1octies for the full ZAPI watcher design. +**Initial startup**: The ZAPI watcher connects to zebra and subscribes to redistribution. Zebra responds with a full dump of all matching routes, populating the tree before the NLMonitor's select loop begins processing events. +**Failure behavior**: If zebra is not running (socket absent), the watcher retries with exponential backoff (100ms initial, 30s max). Routes are cleared from the tree immediately upon ZAPI disconnection to prevent serving stale routing data. On reconnect, the full RIB dump repopulates the subtree. +**Writes to**: `ietf-routing:routing/ribs` (shared tree — routes only; ARP/NDP neighbors under `ietf-routing:routing` are written by the NLMonitor's neighbor handler, and forwarding flags are written by the fswatcher). + +##### 5b. FRR Protocol Collectors (`internal/collector/ospf.go`, `rip.go`, `bfd.go`) +**Collects**: OSPF neighbor state/adjacency, RIP full route table with metrics, BFD session state/peer address. +**Sources**: +- `exec vtysh -c 'show ip ospf json'` and `vtysh -c 'show ip ospf neighbor json'` +- `exec vtysh -c 'show ip rip json'` +- `exec vtysh -c 'show bfd peers json'` +**Interval**: 10 seconds for all three. Protocol state machines can transition quickly (OSPF adjacency flap, BFD session down); 10 seconds balances responsiveness with `vtysh` execution overhead. +**Failure behavior**: If FRRouting is not running, write empty structures for the relevant subtrees. Log at ERROR on first failure; suppress to DEBUG for subsequent identical failures. (Note: Unlike the ZAPI watcher, protocol-specific state is cleared immediately when `vtysh` returns an error). +**Writes to**: `ietf-routing:routing/control-plane-protocols/control-plane-protocol` (OSPF under `.../ietf-ospf:ospf`, RIP under `.../ietf-rip:rip`, BFD under `.../ietf-bfd:bfd/ietf-bfd-ip-sh:...`). + +##### 6. Hardware Collector (`internal/collector/hardware.go`) +**Collects**: Temperature readings, fan speeds, voltage rail readings from kernel hwmon drivers; chassis inventory (manufacturer, model, serial number) from DMI. +**Sources**: +- `/sys/class/hwmon/hwmon*/temp*_input`, `fan*_input`, `in*_input`, `temp*_fault` +- `exec dmidecode -t system` — chassis manufacturer, product name, serial number +**Intervals**: 10 seconds for sensor readings; 300 seconds for DMI inventory. +**Failure behavior**: If a hwmon path does not exist, the path is silently skipped. +**Writes to**: `ietf-hardware:hardware`. + +##### 7. System Collector (`internal/collector/system.go`) +**Collects**: DNS resolver configuration and Finit service list only. +**Sources**: +- `/etc/resolv.conf.head`, `exec /sbin/resolvconf -l` (DNS nameservers and search domains) +- `exec initctl -j` (Finit service list: PID, identity, status, description, memory, uptime, restart-count) +**Interval**: 300 seconds. DNS and services are slow-changing. +**Note**: All other ietf-system data is handled outside SystemCollector: +- **Hostname, timezone**: reactive fswatcher (Section 4.1ter) +- **Users**: reactive fswatcher on `/etc/shadow` + `WatchDir("/var/run/sshd/")` for SSH keys +- **Platform** (os-release, uname): boot-once via `BootPlatform()` in `internal/collector/boot.go` +- **Software** (RAUC slots, installation status): boot-once via `BootSoftware()` in `internal/collector/boot.go` +- **Boot order**: reactive fswatcher on `/mnt/aux/grub/grubenv` and `/mnt/aux/uboot.env`, triggering `ReadBootOrder()` in `internal/collector/boot.go` +- **Clock, memory, load average, filesystem**: on-demand via `LiveSystemState()` in `internal/collector/live.go` +**Failure behavior**: Individual source failures are logged at WARN; the collector writes whatever fields could be collected successfully. +**Writes to**: `ietf-system:system-state` (via Merge — DNS and services only; all other system data provided by fswatcher handlers, boot-once initialization, or on-demand providers). + +##### 8. NTP Collector (`internal/collector/ntp.go`) +**Collects**: Synchronization status, reference server address, clock offset (seconds), stratum, and RMS jitter from chrony. +**Sources**: +- chrony cmdmon protocol v6 over Unix socket (`/var/run/chrony/chronyd.sock`) -- tracking request (synchronization state, stratum, refid, offset, root delay/dispersion, frequency, leap status) +- chrony cmdmon protocol v6 over Unix socket -- sources request (configured NTP source list with mode, state, address, stratum, poll interval, reachability) + +Uses `github.com/facebook/time/ntp/chrony` to speak the cmdmon protocol natively in Go, eliminating `exec chronyc` subprocess spawning. The protocol is strictly request-response (no subscription/push mode exists); polling is the only supported monitoring approach. +**Interval**: 60 seconds (configurable via `YANGERD_POLL_INTERVAL_NTP`). +**Failure behavior**: If chrony is not running (Unix socket absent or connection refused), write `synchronized: false` with an empty source list and log at WARN. +**Writes to**: `ietf-ntp:ntp` (RFC 9249 associations, clock state, server status, server statistics) and `ietf-system:system-state` (Infix `infix-system:ntp/sources/source` augmentation with per-source address, mode, state, stratum, poll via `tree.Merge()`). + +The `infix-system:ntp` augmentation (defined in `infix-system.yang`) extends `ietf-system:system-state` with a simplified NTP sources list. Each source carries `state` (enum: selected, candidate, outlier, unusable, falseticker, unstable) and `mode` (enum: server, peer, local-clock) mapped from chronyc indicators (`*→selected`, `+→candidate`, `-→outlier`, `?→unusable`, `x→falseticker`, `~→unstable`; `^→server`, `=→peer`). Reference clocks (mode `#`) and invalid stratum are skipped. `addSources()` shares the same `chronyc -c sources` output with `addAssociations()` to avoid duplicate subprocess invocations. + +##### 9. LLDP Monitor (`internal/lldpmonitor/`) — Reactive Subprocess +**Collects**: Per-port LLDP neighbor information: chassis ID, port ID, TTL, system name, system capabilities, and management addresses. +**Sources**: Persistent `exec lldpcli -f json0 watch` subprocess. Output consists of pretty-printed JSON objects separated by blank lines, rooted at `lldp-added`, `lldp-updated`, or `lldp-deleted`. +**Trigger**: Event-driven by `lldpd` watch output (no fixed polling interval). +**Framing**: Blank-line split (`\n\n`) with brace-depth fallback; **not** NDJSON line parsing. +**Failure behavior**: If `lldpd`/`lldpcli` is unavailable, monitor restarts with exponential backoff and serves last-known-good LLDP subtree until events resume. +**Writes to**: `ieee802-dot1ab-lldp:lldp`. + +##### 10. DHCP Collector (`internal/collector/dhcp.go`) — Removed (D-Bus Reactive) +**Status**: This collector has been removed. DHCP lease data is now collected reactively by the D-Bus Monitor Subsystem (Section 4.1novies). +**Previously**: Polled `/var/lib/misc/dnsmasq.leases` at 30-second intervals. +**Now**: The D-Bus Monitor subscribes to dnsmasq signals (`DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`). On each signal, it re-reads the lease file and calls `GetMetrics()` via D-Bus method call. +**Writes to**: `infix-dhcp-server:dhcp-server` (via D-Bus Monitor, not collector loop). + +##### 11. Firewall Collector (`internal/collector/firewall.go`) — Removed (D-Bus Reactive) +**Status**: This collector has been removed. Firewall data is now collected reactively by the D-Bus Monitor Subsystem (Section 4.1novies). +**Previously**: Polled `exec nft list ruleset -j` at 30-second intervals. +**Now**: The D-Bus Monitor subscribes to firewalld signals (`Reloaded`, plus `NameOwnerChanged` for restart detection). On each signal, it re-reads the full firewall state via firewalld D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). +**Writes to**: `infix-firewall:firewall` (via D-Bus Monitor, not collector loop). + +##### 12. Container Collector (`internal/collector/containers.go`) — Phase 2, Feature-Gated +**Collects**: Running container names, image references, state, and creation timestamps. +**Sources**: `exec podman ps --format json`, `exec podman inspect --format json`. +**Interval**: 10 seconds. +**Failure behavior**: Log at WARN. Container-internal interface statistics require more complex namespace traversal, deferred to Phase 2. +**Writes to**: `infix-containers:containers`. +**Feature gate**: `YANGERD_ENABLE_CONTAINERS=true`. When container support is not included in the Infix build, the Buildroot recipe sets this to `false` and the container collector is not started. When enabled, `podman` is guaranteed present on the target. + +**Phase-2 reactive recommendation**: container lifecycle state (`create`, `start`, `stop`, `die`, `remove`) can be made reactive via a persistent `podman events --format json` subscription. This would eliminate lifecycle polling lag and keep polling only for runtime metrics (CPU/memory), which still require periodic sampling. + +**Phase-2 container namespace design**: Collecting per-container network interface statistics requires entering each container's network namespace to read `/sys/class/net/*/statistics/` or query netlink. The planned approach uses `netns.Set()` from `vishvananda/netns` to switch the calling goroutine's network namespace, perform the queries, and switch back. Because Go goroutines can migrate between OS threads, the goroutine must be locked to its OS thread via `runtime.LockOSThread()` before the namespace switch. Each container's statistics are collected in a dedicated goroutine to prevent namespace leaks from affecting other collectors. Container namespace enumeration uses `podman inspect --format '{{.State.Pid}}'` to obtain the container's PID, from which `/proc//ns/net` provides the network namespace file descriptor. +### 4.5 statd Integration + +`yangerd.c` / `yangerd.h` helper file implementing the IPC client, and (b) a modified +`ly_add_yangerd_data()` function in `statd.c` that calls the helper first and falls back +to the existing `fsystemv()` path when yangerd is unavailable. + +### Current Code Path (statd.c) + +The function being replaced is `ly_add_yanger_data()` (lines 76–120 of `statd.c` at the +time of writing). It allocates a `memfd`, wraps it in a `FILE *` stream, calls +`fsystemv(yanger_args, NULL, stream, NULL)` to fork-and-exec the yanger Python interpreter +with stdout redirected to the memfd, rewinds with `lseek()`, and parses the result with +`lyd_parse_data_fd()`: + +```c +/* Current implementation (abbreviated) */ +static int ly_add_yanger_data(const struct ly_ctx *ctx, struct lyd_node **parent, + char *yanger_args[]) +{ + FILE *stream; + int err, fd; + + fd = memfd_create("yanger_tmpfile", MFD_CLOEXEC | MFD_NOEXEC_SEAL); + stream = fdopen(fd, "w+"); + err = fsystemv(yanger_args, NULL, stream, NULL); /* fork + exec yanger */ + fflush(stream); + lseek(fd, 0, SEEK_SET); + err = lyd_parse_data_fd(ctx, fd, LYD_JSON, LYD_PARSE_ONLY, 0, parent); + fclose(stream); + return err; +} +``` + +### New yangerd.c Helper + +A new file `src/statd/yangerd.c` (with corresponding `yangerd.h`) implements the IPC +client. It follows the same style as `gpsd.c`: a module-static fd, a `connect` function, +and a `query` function. Unlike `gpsd.c` (which uses non-blocking I/O and `ev_io`), +`yangerd.c` uses blocking I/O with a `SO_RCVTIMEO` timeout because statd calls it +synchronously from within a sysrepo callback. + +```c +/* SPDX-License-Identifier: BSD-3-Clause */ + +/* + * yangerd.c - yangerd IPC client for statd. + * + * Maintains a persistent AF_UNIX SOCK_STREAM connection to /run/yangerd.sock. + * yangerd_query() returns a malloc'd JSON string on success (caller must free), + * or NULL if yangerd is unavailable -- statd falls back to fsystemv() / yanger. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "yangerd.h" + +#define YANGERD_SOCK_PATH "/run/yangerd.sock" +#define YANGERD_TIMEOUT_MS 50 +#define YANGERD_MAX_RESP (4 * 1024 * 1024) +#define YANGERD_VERSION 1 + +static int yangerd_fd = -1; /* persistent connection fd */ + +static int yangerd_connect(void) +{ + struct timeval tv = { .tv_sec = 0, .tv_usec = YANGERD_TIMEOUT_MS * 1000 }; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + .sun_path = YANGERD_SOCK_PATH, + }; + int fd; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + + if (connect(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + close(fd); + return -1; + } + + /* Enforce read timeout so a stalled yangerd doesn't block statd */ + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) < 0) { + close(fd); + return -1; + } + + yangerd_fd = fd; + DEBUG("yangerd: connected"); + return 0; +} + +/* + * yangerd_query - query yangerd for operational data at @path. + * + * Returns malloc'd JSON string (RFC 7951 fragment) on success; caller must + * free(). Returns NULL if yangerd is unavailable, timed out, or returned an + * error status -- the caller should return SR_ERR_INTERNAL. + */ +char *yangerd_query(const char *path) +{ + uint32_t len; + uint8_t ver; + char req[512], *resp; + ssize_t n, total; + + if (yangerd_fd < 0 && yangerd_connect() < 0) + return NULL; + + snprintf(req, sizeof(req), + "{\"method\":\"get\",\"path\":\"%s\"}", path); + + ver = YANGERD_VERSION; + len = htonl((uint32_t)strlen(req)); + if (write(yangerd_fd, &ver, 1) != 1 || + write(yangerd_fd, &len, 4) != 4 || + write(yangerd_fd, req, strlen(req)) != (ssize_t)strlen(req)) { + DEBUG("yangerd: write failed: %s", strerror(errno)); + close(yangerd_fd); + yangerd_fd = -1; + return NULL; + } + + if (read(yangerd_fd, &ver, 1) != 1 || ver != YANGERD_VERSION) { + DEBUG("yangerd: read version failed or mismatch (got %u)", ver); + close(yangerd_fd); + yangerd_fd = -1; + return NULL; + } + + if (read(yangerd_fd, &len, 4) != 4) { + DEBUG("yangerd: read header failed: %s", strerror(errno)); + close(yangerd_fd); + yangerd_fd = -1; + return NULL; + } + len = ntohl(len); + if (len == 0 || len > YANGERD_MAX_RESP) { + ERROR("yangerd: bad response length %u", len); + close(yangerd_fd); + yangerd_fd = -1; + return NULL; + } + + resp = malloc(len + 1); + if (!resp) + return NULL; + + /* Read body in a loop to handle partial reads on Unix sockets */ + total = 0; + while (total < (ssize_t)len) { + n = read(yangerd_fd, resp + total, len - total); + if (n <= 0) { + DEBUG("yangerd: read body failed (got %zd/%u): %s", + total, len, strerror(errno)); + free(resp); + close(yangerd_fd); + yangerd_fd = -1; + return NULL; + } + total += n; + } + resp[len] = '\0'; + return resp; +} + +void yangerd_close(void) +{ + if (yangerd_fd >= 0) { + close(yangerd_fd); + yangerd_fd = -1; + } +} +``` + +### Modified ly_add_yangerd_data() in statd.c + +`ly_add_yanger_data()` is replaced by `ly_add_yangerd_data()`, which queries yangerd +over the IPC socket. The Python yanger interpreter and `fsystemv()` fork path are removed +entirely -- yangerd is the sole source of operational data: + +```c +/* + * ly_add_yangerd_data - query operational data from yangerd. + * + * Queries yangerd over /run/yangerd.sock. On success, the JSON response body + * is passed to lyd_parse_data_mem() to integrate the data into the libyang tree. + * If yangerd is unavailable (not running, timed out, error response), returns + * SR_ERR_INTERNAL -- there is no fallback path. + * + * The @path argument is the YANG module-qualified path yangerd was subscribed + * to, e.g. "/ietf-interfaces:interfaces". + */ +static int ly_add_yangerd_data(const struct ly_ctx *ctx, struct lyd_node **parent, + const char *path) +{ + char *json; + int err; + + json = yangerd_query(path); + if (!json) { + ERROR("yangerd: query failed for %s", path); + return SR_ERR_INTERNAL; + } + + err = lyd_parse_data_mem(ctx, json, LYD_JSON, LYD_PARSE_ONLY, 0, parent); + if (err) + ERROR("yangerd: lyd_parse_data_mem failed (%d)", err); + + free(json); + return err; +} +``` + +Each callback in `statd.c` that previously called `ly_add_yanger_data(ctx, parent, yanger_args)` +is updated to call `ly_add_yangerd_data(ctx, parent, XPATH_BASE)`, passing the +relevant `XPATH_*_BASE` constant as the `path` argument. The `yanger_args` parameter is +removed entirely. For `sr_iface_cb()`, which may +pass a per-interface filter, the path remains +`XPATH_IFACE_BASE` (`"/ietf-interfaces:interfaces"`) -- filter support in yangerd is +handled server-side by the optional `filter` JSON field in the request. + +### Build Integration + +Two source files are added to `src/statd/Makefile.am`: + +```makefile +statd_SOURCES = statd.c yangerd.c yangerd.h gpsd.c gpsd.h shared.h journal.c journal.h +``` + +No new library dependencies are introduced. `yangerd.c` uses only POSIX headers present +in every Buildroot toolchain: ``, ``, ``, and +``. The `SO_RCVTIMEO` socket option is POSIX.1-2008. + +### Connection Lifecycle + +statd opens one persistent connection to yangerd on first use. `yangerd_fd` is a +module-static `int` initialised to `-1`. `yangerd_query()` checks `yangerd_fd < 0` and +calls `yangerd_connect()` if needed. On any I/O error (`EPIPE`, `ECONNRESET`, short read, +timeout), the fd is closed and `yangerd_fd` is reset to `-1`. The next call will +reconnect. Reconnect failure returns `NULL` immediately — `ly_add_yangerd_data()` +returns `SR_ERR_INTERNAL` without retrying, ensuring a single failed `connect()` does not +add more than one syscall's overhead to the sysrepo callback latency. + +`yangerd_close()` is called from `main()` during statd shutdown (after `unsub_to_all()` +and before `sr_disconnect()`) to close the socket cleanly. + + +### 4.6 yangerctl CLI + +`yangerctl` is a statically-linked Go CLI tool (`cmd/yangerctl/main.go`) that connects to the yangerd Unix socket and provides human-readable access to the in-memory YANG tree. It is built from the same Go module as `yangerd` and installed to `/usr/bin/yangerctl` on the Infix target via the same Buildroot package. Because it has no CGo dependency and is statically linked, it can be copied directly to a target device for debugging without any shared library prerequisites. + +`yangerctl` is intended for two use cases: interactive debug sessions on production devices (inspecting live operational state without a NETCONF client) and CI test assertions (scripted queries with `jq` to verify that yangerd is populating the correct YANG subtrees). + +### Connection + +`yangerctl` connects to `/run/yangerd.sock` by default. The socket path can be overridden with `--socket ` for local testing against a non-system yangerd instance. There is no authentication — access control is enforced entirely by Unix socket file permissions (`srw-rw---- root:yangerd`). + +### Subcommands + +``` +yangerctl get Query a YANG subtree from the in-memory tree +yangerctl health Show daemon health status and per-collector state +yangerctl dump Dump the entire in-memory tree as JSON +yangerctl watch Poll a path every second and print diffs (debug) +``` + +#### `yangerctl get ` + +Queries a single YANG subtree. The path must be a module-qualified XPath prefix in the form `/module-name:top-level-node`. An optional `--filter key=value` argument restricts the output to a single list entry. + +```bash +# Query all interfaces +$ yangerctl get /ietf-interfaces:interfaces +{ + "ietf-interfaces:interfaces": { + "interface": [ + { "name": "eth0", "oper-status": "up", "statistics": { "in-octets": 1234567 } }, + { "name": "eth1", "oper-status": "down" } + ] + } +} + +# Query a specific interface by key filter +$ yangerctl get /ietf-interfaces:interfaces --filter name=eth0 +{ + "ietf-interfaces:interfaces": { + "interface": [ + { "name": "eth0", "oper-status": "up", "phys-address": "52:54:00:ab:cd:ef", + "statistics": { "in-octets": 1234567, "out-octets": 987654 } } + ] + } +} + +# Query routing state +$ yangerctl get /ietf-routing:routing +{ + "ietf-routing:routing": { + "ribs": { + "rib": [ + { "name": "ipv4-master", "routes": { "route": [ ... ] } } + ] + } + } +} +``` + +#### `yangerctl health` + +Displays the daemon's overall health, per-subsystem status (with restart counts and PIDs), and per-model freshness data (last-updated timestamps and sizes). The output matches the canonical health response schema (Section 4.3.5). + +```bash +$ yangerctl health +{ + "status": "ok", + "subsystems": { + "nlmonitor": {"state": "running", "restarts": 0}, + "ipbatch": {"state": "running", "pid": 1234, "restarts": 0}, + "bridgebatch": {"state": "running", "pid": 1235, "restarts": 0}, + "zapiwatcher": {"state": "running", "restarts": 0}, + "ethmonitor": {"state": "running"}, + "fswatcher": {"state": "running", "watches": 8}, + "dbusmonitor": {"state": "running"}, + "iwmonitor": {"state": "disabled"} + }, + "models": { + "ietf-interfaces:interfaces": {"last_updated": "2026-03-04T12:34:56Z", "size_bytes": 8192}, + "ietf-routing:routing": {"last_updated": "2026-03-04T12:34:55Z", "size_bytes": 2048}, + "ietf-hardware:hardware": {"last_updated": "2026-03-04T12:34:50Z", "size_bytes": 1024}, + "ietf-system:system-state": {"last_updated": "2026-03-04T12:34:48Z", "size_bytes": 512}, + "ietf-ntp:ntp": {"last_updated": "2026-03-04T12:34:45Z", "size_bytes": 256} + } +} +``` + +A collector that has never succeeded (e.g., FRRouting not yet running) is shown as `error` with the failure message: + +```bash + ospf: error: exec: "vtysh": executable file not found in $PATH +``` + +#### `yangerctl dump` + +Dumps the entire in-memory tree as a single JSON object to stdout. Useful for piping into `jq` for CI assertions or saving a snapshot of daemon state for offline analysis. + +```bash +# Dump all tree data and extract interface names with jq +$ yangerctl dump | jq '."ietf-interfaces:interfaces".interface[].name' +"eth0" +"eth1" +"lo" + +# Verify OSPF has at least one neighbor in state Full +$ yangerctl dump | jq '."ietf-routing:routing" | .. | objects | select(."ospf-neighbor-state"? == "Full") | ."neighbor-id"' +"192.168.1.2" + +# Save a diagnostic snapshot +$ yangerctl dump > /tmp/yangerd-snapshot-$(date +%s).json +``` + +#### `yangerctl watch ` + +Polls the specified YANG path every second and prints a diff whenever the returned JSON changes. Intended for interactive debugging of reactive updates — for example, observing that a link state change propagates into the tree within milliseconds of the kernel event. + +```bash +# Watch for changes to the routing table +$ yangerctl watch /ietf-routing:routing +[1s] no change +[2s] no change +[3s] changed: + - "oper-status": "up" + + "oper-status": "down" +[4s] no change + +# Watch WireGuard peer handshake timestamps +$ yangerctl watch /ietf-interfaces:interfaces --filter name=wg0 +[1s] no change +[30s] changed: + - "latest-handshake": "2026-02-23T10:00:00Z" + + "latest-handshake": "2026-02-23T10:00:30Z" +``` + +Press `Ctrl-C` to exit; `yangerctl watch` catches `SIGINT` and exits cleanly with exit code 0. + +### Global Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `--socket ` | `/run/yangerd.sock` | Unix socket path for yangerd connection | +| `--timeout ` | `5s` | Per-request connection and read timeout | +| `--json` | false | Force JSON output even for commands that default to human-readable text (e.g., `health`, `watch`) | + +### Exit Codes + +| Code | Meaning | +|------|---------| +| 0 | Success | +| 1 | Connection error (socket not present, connection refused, timeout) | +| 2 | Path not found in the in-memory tree | +| 3 | Daemon is starting up — returned when yangerd responds with HTTP 503 equivalent (tree not yet populated) | + +### Build and Installation + +`yangerctl` is built alongside `yangerd` in the same Go module: + +```bash +# Host build +go build -o yangerctl ./cmd/yangerctl + +# Cross-compile for AArch64 Infix target (static, no CGo) +CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -ldflags='-extldflags -static' -o yangerctl ./cmd/yangerctl +``` + +Being statically linked with no CGo dependency, the resulting binary can be copied directly to a target device via `scp` for one-off debug sessions without requiring package installation: + +```bash +scp yangerctl admin@192.168.1.1:/tmp/ +ssh admin@192.168.1.1 /tmp/yangerctl health +``` + +### 4.7 Design Decisions + +#### AF_UNIX vs TCP +Using Unix domain sockets for inter-process communication provides the most efficient and secure transport for local daemon interactions. Unlike TCP sockets, AF_UNIX avoids the overhead of the network stack, including checksum calculation, sequence numbering, and acknowledgement packets. This choice ensures that data exchange between statd and yangerd occurs with near-zero latency while also permitting the use of standard filesystem permissions to restrict access to the statd user group. By leveraging a stream-oriented socket, we maintain the ability to handle large JSON payloads that might otherwise exceed the size limits of datagram-based alternatives. + +#### Pre-serialization +Storing operational data as pre-serialized JSON blobs in the in-memory tree is a deliberate trade-off that prioritizes read performance over write efficiency. Since the operational data is read far more frequently than it is updated, especially under heavy monitoring from multiple NETCONF or RESTCONF clients, removing the serialization cost from the request path significantly reduces overall response latency. Each update to the tree involves a single serialization of the affected module, whereas every query becomes a simple memory lookup followed by a socket write. This architecture ensures that yangerd remains responsive even when the number of concurrent management sessions increases. + +#### Per-Model RWMutex +The in-memory data tree uses per-model read-write mutexes rather than a single global lock. Each YANG module key (`ietf-interfaces:interfaces`, `ietf-routing:routing`, etc.) has its own `sync.RWMutex` inside a `modelEntry` struct, while a separate top-level `sync.RWMutex` protects the models map structure itself (new key insertion only). This design ensures that writers for different YANG modules never block each other -- for example, a netlink link event updating `ietf-interfaces:interfaces` does not block a ZAPI route update to `ietf-routing:routing`. Readers only contend with writers of the same module. On multi-module IPC requests, per-model read locks are acquired individually, data is read and concatenated into the response. The per-model write locks remain extremely short (updating a single map entry to a new JSON blob), preserving the low-contention characteristics of the original design while eliminating cross-module blocking entirely. + +#### No CGo +The strict requirement to avoid CGo is driven by the necessity of maintaining a stable and reproducible cross-compilation environment within the Buildroot build system. Using pure Go allows the daemon to be compiled for ARM, AArch64, RISC-V, and x86_64 architectures using only the standard Go toolchain and environment variables, without needing a matching C cross-compiler and target sysroot for each architecture. This significantly reduces the complexity of the CI/CD pipeline and eliminates a common source of binary incompatibility and linking errors in embedded Linux environments. Furthermore, a pure Go binary is easier to audit for memory safety and simplifies the deployment process by producing a single, statically linked executable. +#### ip -batch -json for Data Queries vs vishvananda/netlink for Event Monitoring + +yangerd uses a split approach: `vishvananda/netlink` for **event monitoring** and `iproute2` batch mode for **data queries**. Each tool is chosen for what it does best. + +**Why vishvananda/netlink for events:** + +1. **`ip monitor -json` does not produce JSON.** Investigation of the iproute2 source code (`ip/ipmonitor.c`) confirmed that `do_ipmonitor()` never calls `new_json_obj()`. The `-json` flag is parsed globally, but the JSON writer (`_jw`) is never allocated for the monitor subcommand. Likewise, `bridge/monitor.c` has zero JSON references. This was confirmed by Ubuntu bug #2116779 (2025-07-12). Parsing raw text output from `ip monitor` would be fragile and under-specified. + +2. **Typed Go structs eliminate text parsing.** The `vishvananda/netlink` library delivers events as typed Go structs (`LinkUpdate`, `AddrUpdate`, `NeighUpdate`) on dedicated channels. Since yangerd uses events only as triggers (not for data extraction), the library's attribute coverage is sufficient -- we only need the interface name (from `update.Link.Attrs().Name`) or address family to route the event to the correct re-read handler. + +3. **Production-proven in Docker, Cilium, Calico, OVN-Kubernetes, Antrea.** All major Go-based container networking projects use `vishvananda/netlink` for netlink event subscriptions. The library's `ErrorCallback` + context cancellation pattern is battle-tested at scale. + +4. **Fewer subprocesses.** Replacing `ip monitor -json` and `bridge monitor -json` subprocesses with native Go channels reduces the subprocess count from FIVE to THREE (`ip batch`, `bridge batch`, `iw event`). This simplifies process management, reduces file descriptor usage, and eliminates two text-parsing codepaths. + +**Why ip -batch -json for data queries (NOT vishvananda/netlink):** + +1. **The problem**: `vishvananda/netlink` handles common attributes well, but the Linux kernel continuously adds new netlink attributes for features like XDP, tc flower offloads, bridge VLAN filtering extensions, and other advanced networking features. The Go library lags behind kernel development, meaning `yangerd` would be unable to report on features that `iproute2` already supports. + +2. **The solution**: For state queries, `yangerd` delegates all netlink attribute parsing to `iproute2`'s `ip` command running in persistent batch mode (`ip -json -force -batch -`). `iproute2` is always present on the target system, always compiled against the running kernel's headers, and handles every netlink attribute the kernel exposes -- including obscure ones that no Go library wraps. + +3. **How it works**: `yangerd` maintains a persistent `ip -json -force -batch -` subprocess. Commands are written to stdin one per line; each produces a JSON array on stdout. The `-force` flag ensures the process continues past errors. The `-json` flag must precede `-batch` in the argument list. + +4. **Benefits**: (a) No dependency on Go netlink library feature parity with kernel for DATA. (b) `iproute2` handles all TLV parsing including vendor-specific and newly-added attributes. (c) No fork/exec overhead per query -- the batch process is persistent. (d) JSON output is directly usable as YANG operational data with minimal transformation. + +5. **Trade-offs**: (a) Runtime dependency on `iproute2` (always present on Infix). (b) One extra process per `iproute2` tool family (`ip`, `bridge`). (c) Parsing `iproute2` JSON output instead of typed Go structs requires JSON unmarshalling. (d) Query latency includes IPC to subprocess (negligible for batch mode -- sub-millisecond). (e) `vishvananda/netlink` is an additional Go dependency for events, but this is a well-maintained library with minimal transitive dependencies. + +#### inotify/fsnotify for File Watching + +Using inotify (via Go's `fsnotify` library) eliminates fixed polling intervals for data sources based on real filesystem entries that change infrequently, such as procfs forwarding flags. This reactive approach reduces CPU wake-ups and provides near-instant detection of changes. However, this choice introduces a dependency on kernel-level inotify limits (`/proc/sys/fs/inotify/max_user_watches`), and requires special handling for the `IN_IGNORED` event to re-establish watches when files are deleted and recreated (a common pattern for atomic file writes). Note that sysfs pseudo-files (`/sys/class/hwmon/*`, `/sys/class/thermal/*`) do not support inotify -- the kernel generates values on `read()` and never calls `fsnotify_modify()` -- so hardware sensors are collected via polling instead (see Section 5, collector #6). DHCP lease files and firewall state, which were previously candidates for inotify/polling, are now handled reactively via D-Bus signals (see Section 4.1novies). + +#### D-Bus Signal Subscriptions for Service-Managed Data + +Using D-Bus signal subscriptions for dnsmasq DHCP leases and firewalld configuration changes replaces both inotify-based file watching and periodic polling with a semantically richer event source. D-Bus signals are emitted by the application itself at the exact moment state changes, providing both timeliness and context that filesystem-level mechanisms cannot match. + +1. **Why not inotify for DHCP leases?** While inotify on `/var/lib/misc/dnsmasq.leases` functionally works, it has limitations. inotify fires on every `write()` syscall, which may arrive before dnsmasq has finished writing all lease data -- creating a race window where a partial file is read. dnsmasq's D-Bus signals (`DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`) are emitted after the lease state is fully committed. Additionally, D-Bus signals carry semantic meaning (which lease changed) rather than just "file modified," enabling more targeted logging and diagnostics. + +2. **Why not polling for firewall state?** The firewall configuration is managed by firewalld. While nftables kernel tables hold the runtime state, firewalld's D-Bus API provides the authoritative, structured view of zones, policies, and services. Periodic polling would require either subprocess execution or repeated D-Bus calls on a fixed schedule, with two costs: (a) unnecessary IPC every 30 seconds regardless of whether anything changed, and (b) up to 30 seconds of stale data after a firewall reload. firewalld's `Reloaded` D-Bus signal provides instant notification with zero steady-state CPU cost. On each signal, yangerd re-reads the full firewall state via firewalld D-Bus method calls. + +3. **Why `godbus/dbus/v5`?** This is the standard Go D-Bus library, well-maintained and widely used. It provides `AddMatchSignal()` for signal subscription, `Signal()` for channel-based delivery, and `Object.CallWithContext()` for method invocations (used for `GetMetrics()` on dnsmasq and all firewalld data retrieval: `getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). The API surface required by yangerd is minimal: connect, subscribe, receive signals, call methods. + +**External command timeouts**: All short-lived external commands (`exec.Command`) use `exec.CommandContext(ctx)` with an explicit per-command timeout to prevent indefinite blocking. Timeout values: `vtysh` commands (OSPF/RIP/BFD collectors): 5 seconds; `iw` queries (station list, interface info): 2 seconds; `dmidecode` (hardware collector): 5 seconds. D-Bus method calls (dnsmasq `GetMetrics()`: 2 seconds; firewalld data retrieval: 5 seconds) use `CallWithContext()` with context-based timeouts. If a command or D-Bus call exceeds its timeout, the context cancellation terminates the operation, the monitor logs a warning, and the affected tree key retains its previous value. + +4. **Trade-offs**: (a) Runtime dependency on the D-Bus system bus daemon (always present on Infix -- `dbus-daemon` is a core system component). (b) Service absence handling is more complex than file-based approaches: when dnsmasq or firewalld is not running, no signals arrive, and the `NameOwnerChanged` mechanism must be used for lifecycle detection. (c) D-Bus method calls (`GetMetrics()` for dnsmasq, firewalld zone/policy/service queries) have IPC overhead, though this is negligible for the call frequency involved (only on signal receipt, not periodic). (d) The `godbus/dbus/v5` library is an additional Go dependency, but it has minimal transitive dependencies and is already used by many system-level Go programs. + +#### bridge -json -batch - for Bridge Data + +A separate `bridge` batch subprocess is utilized for bridge-specific netlink queries instead of multiplexing through the existing `ip` batch subprocess. While both tools belong to the `iproute2` family, they utilize distinct command grammars and produce different JSON output structures. By maintaining a dedicated `bridge -json -batch -` process, `yangerd` avoids the complexity of a multiplexing layer while reusing the established subprocess management pattern (persistent stdin/stdout pipes, health monitoring, and exponential backoff). This ensures that VLAN, FDB, MDB, and STP data—which are not exposed via the `ip` command—are collected efficiently using the most authoritative tool available on the system. + +#### iw event for 802.11 Wireless Monitoring + +The `iw event -t` command from the `iw` tool provides reactive notification of 802.11 wireless events via the Linux kernel's nl80211 netlink family. Unlike the NLMonitor's `vishvananda/netlink` subscriptions (which receive typed Go structs), `iw event` produces human-readable text output rather than JSON, and `iw` has no batch query mode. Despite these differences, `iw event` is the only reliable mechanism for detecting wireless client associations, disconnections, channel switches, and regulatory domain changes without polling. + +1. **Why not nl80211 directly in Go?** While Go libraries for generic netlink exist (`mdlayher/genetlink`), the nl80211 family has an exceptionally complex attribute set (over 300 attributes, nested TLVs, vendor-specific extensions). The `iw` tool handles all nl80211 attribute parsing and version compatibility, just as `iproute2` handles RTNL parsing for the ip/bridge subsystems. Delegating to `iw` avoids duplicating a fragile and rapidly-evolving netlink parser. + +2. **Why not use a persistent subprocess for queries?** The `iw` tool has no `-batch -` mode. Each query requires a separate `exec.Command` invocation. This is acceptable because WiFi events are infrequent (typically single-digit events per minute), so the overhead of spawning short-lived processes for re-queries is negligible compared to the persistent `ip -json -batch -` subprocess that handles hundreds of queries per second during convergence events. + +3. **Why is the subsystem feature-gated?** Not all Infix deployments include wireless hardware. WiFi support is a build-time option in Buildroot. The `YANGERD_ENABLE_WIFI` environment variable (set by the Buildroot recipe in `/etc/default/yangerd`) controls whether the IW Event Monitor and WiFi collector are started. When WiFi is included in the build, the `iw` binary is guaranteed present on the target. + +4. **Trade-offs**: (a) When WiFi is enabled, `iw` is a runtime dependency (guaranteed present by the build system). (b) Text parsing is more fragile than JSON parsing—format changes in `iw` output could break the parser. (c) Short-lived subprocesses for re-queries have higher per-query overhead than batch mode, but the low event rate makes this negligible. (d) A single goroutine processes events sequentially, which is sufficient for typical WiFi event rates but could become a bottleneck on systems with many wireless interfaces. + +#### Ethtool Genetlink Monitor for Settings Changes + +The Linux kernel's ethtool netlink family exposes a `"monitor"` multicast group (`ETHNL_MCGRP_MONITOR`) that delivers notifications when Ethernet link settings change. Infix targets Linux kernel 6.18, where this facility is unconditionally available. This allows yangerd to receive `ETHTOOL_MSG_LINKINFO_NTF` and `ETHTOOL_MSG_LINKMODES_NTF` messages whenever speed, duplex, auto-negotiation, or other link parameters are renegotiated—without polling. + +1. **Why not poll for everything?** The original design polled ethtool data every 30 seconds. While acceptable for statistics (which change continuously), speed/duplex/auto-negotiation only change on link renegotiation events—typically seconds to minutes apart. Polling at 30 seconds means up to 30 seconds of stale data after a link renegotiation. The genetlink monitor reduces this to sub-second latency. + +2. **Why not use mdlayher/ethtool for monitoring?** The `mdlayher/ethtool` Go library provides typed access to ethtool genetlink queries (LinkInfo, LinkMode, Stats) but does not expose a Monitor or Subscribe API for multicast notifications. However, the lower-level `mdlayher/genetlink` library fully supports `Conn.JoinGroup()` and `Conn.Receive()`, enabling yangerd to subscribe to the ethtool monitor group natively in Go without any subprocess. + +3. **Why is this NOT a subprocess?** Unlike the `iw event` subsystem—which shells out to an external tool because `iw` handles complex nl80211 attribute parsing—the ethtool monitor notifications are simple genetlink messages with a command byte that identifies the notification type. The actual data retrieval is then done via the existing `mdlayler/ethtool` typed API. No complex TLV parsing is needed in the notification path, so a native Go genetlink socket is both simpler and more efficient than spawning an external process. The core netlink event monitoring (link, addr, neigh) is also native Go via `vishvananda/netlink`, making the ethtool genetlink monitor consistent with the overall architecture. + +4. **Hybrid model**: The ethtool collector becomes a hybrid: reactive for settings (speed, duplex, auto-negotiation via `ETHNL_MCGRP_MONITOR` genetlink subscription) and polling for statistics (hardware counters via `ethtool.Client.Stats()` at 30-second intervals). Statistics have no `_NTF` message type—they must remain polling. + +5. **No fallback needed**: Infix targets Linux kernel 6.18, where ethtool netlink is unconditionally available. The ethmonitor is always active in production — there is no polling fallback for settings. If the genetlink subscription fails, it indicates a system misconfiguration, not a kernel capability gap. + +6. **Trade-offs**: (a) Dependency on `mdlayher/genetlink` in addition to `mdlayher/ethtool` (both are pure Go, no CGo). (b) The genetlink socket is an additional file descriptor per yangerd instance. (c) Only settings changes are reactive; statistics remain polling. + +7. **Ethtool NTF gap on link up/down**: The `ETHNL_MCGRP_MONITOR` multicast group does NOT deliver notifications when a link goes up or down. When a physical link transitions, the kernel negotiates speed/duplex/autoneg with the link partner, but this negotiation is invisible to the ethtool genetlink monitor. To close this gap, the link event handler (`monitor/link.go`) calls `ethmonitor.RefreshInterface()` on every RTM_NEWLINK event, explicitly re-querying ethtool data for the affected interface. This ensures sub-second convergence for ethtool data after link events, matching the latency of the genetlink monitor for explicit settings changes. + +8. **Parser version robustness**: At startup, yangerd logs the output of `iw --version` to record the exact `iw` version in use. The text parser handles unknown/unparsed event lines by logging them at DEBUG level and skipping them -- unrecognized lines do not cause errors or stop event processing. This provides forward compatibility with newer `iw` versions that may add new event types. Test fixtures in `testdata/` capture known-good outputs from `iw` 6.9 for regression testing. + +#### Event-Triggered Batch Re-read Pattern (All Netlink Events) + +All netlink events -- link, address, and neighbor, both add (`RTM_NEW*`) and remove (`RTM_DEL*`) -- use the same core pattern: the event content is used only as a **trigger** to identify which entity changed. The event payload itself is NOT parsed for data. Instead, the event dispatcher issues a full re-read of the affected state and replaces the corresponding subtree in the YANG tree atomically. For link, address, and neighbor events, re-reads go through `ip -json -force -batch -`. Route data is sourced exclusively from the ZAPI watcher's streaming connection to zebra (Section 4.1octies) and is NOT part of the netlink event-triggered re-read pattern. + +The per-event-type re-read queries are: + +| Event Type | Trigger | ip batch Re-read Queries | Subtree Replaced | Additional Actions | +|------------|---------|--------------------------|------------------|--------------------| +| **Link** (RTM_NEWLINK / RTM_DELLINK) | Interface name | `link show dev `, `-s link show dev `, `addr show dev ` (3 queries) | Entire interface subtree (flags, MTU, operstate, stats, addresses) | `ethmonitor.RefreshInterface()` + `last-change` timestamp | +| **Address** (RTM_NEWADDR / RTM_DELADDR) | Interface name | `addr show dev ` (1 query) | Address subtree for that interface | -- | +| **Neighbor** (RTM_NEWNEIGH / RTM_DELNEIGH) | Interface name | `neigh show dev ` (1 query) | Neighbor subtree for that interface | -- | + +**Why the same pattern for delete events?** After a `RTM_DEL*` event, the re-read query returns the current state which simply omits the deleted entity. The tree update replaces the entire subtree with this result. No surgical removal logic is needed — the subtree replacement naturally drops the deleted entry. This eliminates an entire class of bugs related to partial tree surgery. + +**Design Rationale:** + +1. **Why re-read instead of parsing the event?** Netlink events carry only partial state. An RTM_NEWLINK does not include addresses; an RTM_NEWADDR does not include the full address list for the interface. Event payloads vary by kernel version and may omit fields. A full re-read via `ip batch` for link/addr/neigh captures all fields at a single coherent point in time, making the tree self-consistent and kernel-version-independent. + +2. **Why include addresses in the link re-read?** An RTM_NEWLINK event itself does not carry address information, but address behavior can change as a consequence of link state (e.g., IPv6 SLAAC addresses are added/removed on link up/down, DAD state may transition). By including `addr show dev` in the link re-read set, the tree always reflects the current address state even if the separate addr monitor event arrives slightly later. + +3. **Why trigger ethtool re-query on link events?** As noted in point 7 above, `ETHNL_MCGRP_MONITOR` does not fire on link up/down. The link event handler calls `ethmonitor.RefreshInterface()` to re-query speed/duplex/autoneg after every RTM_NEWLINK, ensuring ethtool data converges within milliseconds of the link event. + +4. **Per-entity debouncing**: During convergence storms (STP topology change, ARP storms), the same entity may generate tens of events per second. A per-entity debounce window (10ms) coalesces rapid events into a single full re-read, preventing redundant queries while still converging to the correct final state. Debouncing is keyed by interface name for link/addr/neigh events. + +5. **Trade-offs**: (a) Full re-reads for link/addr/neigh issue more ip batch queries per event compared to targeted field updates. This is well within ip batch throughput capacity (microseconds per query over a local pipe). (b) Per-entity debouncing adds a small latency (up to 10ms) in the storm case, but ensures efficiency. (c) The `ethmonitor.RefreshInterface()` call on link events adds two ethtool genetlink queries (LinkInfo + LinkMode), which is negligible. +### 4.8 Monitoring & Observability + +Monitoring the internal state of yangerd is critical for ensuring that data collection remains accurate and timely. The daemon exposes a health endpoint over the IPC socket that provides real-time status for all netlink monitors and supplementary collectors. + +#### Health Endpoint + +The `yangerctl health` command (and the underlying `{"method":"health"}` IPC request) returns the same schema defined in Section 4.3.5 (`subsystems` + `models`): + +```json +{ + "status": "ok", + "subsystems": { + "nlmonitor": { "state": "running", "restarts": 0 }, + "ipbatch": { "state": "running", "pid": 1321, "restarts": 0 }, + "bridgebatch": { "state": "running", "pid": 1322, "restarts": 0 }, + "zapiwatcher": { "state": "running", "restarts": 1 }, + "iwmonitor": { "state": "disabled" }, + "ethmonitor": { "state": "running" }, + "lldpmonitor": { "state": "running", "pid": 1323, "restarts": 0 }, + "mdnsmonitor": { "state": "running", "restarts": 0 }, + "dbusmonitor": { "state": "running" }, + "fswatcher": { "state": "running", "watches": 12 } + }, + "models": { + "ietf-routing:routing": { "last_updated": "2026-03-27T10:22:01Z", "size_bytes": 15012 }, + "ieee802-dot1ab-lldp:lldp": { "last_updated": "2026-03-27T10:22:00Z", "size_bytes": 4312 }, + "infix-services:mdns": { "last_updated": "2026-03-27T10:21:58Z", "size_bytes": 2088 } + } +} +``` + +#### Metrics Tracked + +For each subsystem: state (`running`/`restarting`/`failed`/`disabled`), restart counters, and PID for managed subprocesses. The route health entry is attributed to **`zapiwatcher`** (not NLMonitor), because routes are sourced from zebra via ZAPI. For each model: `last_updated` and `size_bytes`. + +#### Log Levels + +yangerd uses structured logging (Go `slog` package). The default log level is `info`. Setting `YANGERD_LOG_LEVEL=debug` enables per-event netlink message logging and per-request IPC tracing. The `warn` level logs ENOBUFS recoveries and collector timeouts. The `error` level logs collector failures and IPC protocol violations. + +### 4.9 Security Considerations + +Security is a primary concern given that yangerd handles sensitive network state and runs with elevated privileges on some platforms. + +#### Socket Permissions + +The Unix domain socket at `/run/yangerd.sock` is created with mode `0660` and owned by `root:yangerd`. Only processes running as root or in the `yangerd` group can connect. In practice, the only consumer is statd. The socket path is not configurable via the IPC protocol itself — it is set at daemon startup via the `YANGERD_SOCK` environment variable or the compile-time default. + +#### Linux Capabilities + +yangerd drops all capabilities at startup except those explicitly required: + +- `CAP_NET_ADMIN` — required to open netlink sockets and subscribe to multicast groups. +- `CAP_SYS_RAWIO` — required only when the `ietf-hardware` collector invokes `dmidecode` to read SMBIOS data from `/dev/mem`. This capability is granted via the Finit service file (`cap_sys_rawio+ep`) and is only needed on physical hardware; virtual machines can omit it. + +No other capabilities are retained. yangerd does not need `CAP_SYS_ADMIN`, `CAP_NET_RAW`, or any filesystem-related capabilities. + +#### Runtime User and Process Model + +yangerd runs as `root` but drops all capabilities except those listed above via `cap_set_proc()` at startup. Running as root is required because: +- Netlink multicast group subscriptions require `CAP_NET_ADMIN`, which must be in the process's effective set. +- The ZAPI watcher connects to `/var/run/frr/zserv.api`, which is owned by `root:frr` with mode `0660`. yangerd must be in the `frr` group. +- The IPC socket at `/run/yangerd.sock` is created with `root:yangerd` ownership. + +The Finit service file grants the minimum required capabilities: + +``` +# /etc/finit.d/yangerd.conf +service [S12345] env:-/etc/default/yangerd \ + log:prio:daemon.notice,tag:yangerd \ + yangerd -- yangerd operational data daemon +``` + +Subprocesses (`ip batch`, `bridge batch`, `iw event`) inherit yangerd's reduced capability set. They do not require additional capabilities beyond what the parent provides. + +#### Trust Boundary + +The trust boundary is the Unix domain socket. yangerd trusts that any process connecting to the socket is authorized (enforced by filesystem permissions). It does not perform authentication or authorization on individual IPC requests. All IPC payloads are validated for size (maximum 64 KiB request, configurable) and structural correctness (must be valid JSON with a `command` field) before processing, preventing resource exhaustion and malformed-input attacks. + +The absence of CGo eliminates an entire class of memory-safety vulnerabilities (buffer overflows, use-after-free, format string attacks) that would be present if yangerd linked against C libraries. The Go runtime's garbage collector and bounds checking provide defense-in-depth for the data processing pipeline. + +## 5. Data Source Matrix + +Every operational YANG leaf collected by yangerd is listed below with its data source, the collection method (ip batch query via persistent `ip -json -force -batch -` subprocess, or other tool), whether collection is reactive (event-driven via `vishvananda/netlink` subscriptions) or polling-based, and any known gaps or caveats. + +### ietf-interfaces — Interface Operational State + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../interface/oper-status` | RTNLGRP_LINK (IFF_UP \| IFF_RUNNING) | `ip -json -batch` | REACTIVE | Kernel delivers on every link state change | +| `.../interface/phys-address` | RTNLGRP_LINK (IFLA_ADDRESS) | `ip -json -batch` | REACTIVE | MAC address from NLMSG_NEWLINK | +| `.../interface/if-index` | RTNLGRP_LINK (ifi_index) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/in-octets` | RTNLGRP_LINK (stats64.rx_bytes) | `ip -json -batch` | REACTIVE | Full stats64 in `ip -json -s link show` output | +| `.../interface/statistics/out-octets` | RTNLGRP_LINK (stats64.tx_bytes) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/in-unicast-pkts` | RTNLGRP_LINK (stats64.rx_packets) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/out-unicast-pkts` | RTNLGRP_LINK (stats64.tx_packets) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/in-errors` | RTNLGRP_LINK (stats64.rx_errors) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/out-errors` | RTNLGRP_LINK (stats64.tx_errors) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/in-discards` | RTNLGRP_LINK (stats64.rx_dropped) | `ip -json -batch` | REACTIVE | | +| `.../interface/statistics/out-discards` | RTNLGRP_LINK (stats64.tx_dropped) | `ip -json -batch` | REACTIVE | | +| `.../ietf-ip:ipv4/address/ip` | RTNLGRP_IPV4_IFADDR | `ip -json -batch` | REACTIVE | Full address re-read via `addr show dev ` on any RTM_NEWADDR or RTM_DELADDR; event is trigger only, not parsed for data | +| `.../ietf-ip:ipv4/address/prefix-length` | RTNLGRP_IPV4_IFADDR | `ip -json -batch` | REACTIVE | Full address re-read on any addr event (add or remove) | +| `.../ietf-ip:ipv6/address/ip` | RTNLGRP_IPV6_IFADDR | `ip -json -batch` | REACTIVE | Full address re-read via `addr show dev ` on any RTM_NEWADDR or RTM_DELADDR | +| `.../ietf-ip:ipv6/address/prefix-length` | RTNLGRP_IPV6_IFADDR | `ip -json -batch` | REACTIVE | Full address re-read on any addr event (add or remove) | +| `.../ietf-ip:ipv6/address/status` | RTNLGRP_IPV6_IFADDR | `ip -json -batch` | REACTIVE | Full address re-read on any addr event; `ip -json addr show` includes preferred/deprecated status in JSON output | +| `.../ietf-ip:ipv4/neighbor/ip` | RTNLGRP_NEIGH (AF_INET) | `ip -json -batch` | REACTIVE | Full neighbor re-read via `neigh show dev ` on any RTM_NEWNEIGH or RTM_DELNEIGH; event is trigger only | +| `.../ietf-ip:ipv4/neighbor/link-layer-address` | RTNLGRP_NEIGH (AF_INET) | `ip -json -batch` | REACTIVE | Full neighbor re-read on any neigh event (add or remove); NDA_LLADDR attribute | +| `.../ietf-ip:ipv4/neighbor/origin` | RTNLGRP_NEIGH (NUD flags) | `ip -json -batch` | REACTIVE | Full neighbor re-read on any neigh event; dynamic/static from state field in JSON | +| `.../ietf-ip:ipv6/neighbor/ip` | RTNLGRP_NEIGH (AF_INET6) | `ip -json -batch` | REACTIVE | Full neighbor re-read via `neigh show dev ` on any RTM_NEWNEIGH or RTM_DELNEIGH; NDP table | +| `.../ietf-ip:ipv6/neighbor/link-layer-address` | RTNLGRP_NEIGH (AF_INET6) | `ip -json -batch` | REACTIVE | Full neighbor re-read on any neigh event (add or remove) | +| `.../infix-ethernet-interface:ethernet/speed` | ETHNL_MCGRP_MONITOR (ETHTOOL_MSG_LINKMODES_NTF) + RTM_NEWLINK | mdlayher/ethtool + mdlayher/genetlink | REACTIVE (ethtool genetlink monitor) | Reactive via ethmonitor subscription; also re-queried on every RTM_NEWLINK (link up/down) via `ethmonitor.RefreshInterface()` since ETHNL_MCGRP_MONITOR does not fire on link state changes | +| `.../infix-ethernet-interface:ethernet/duplex` | ETHNL_MCGRP_MONITOR (ETHTOOL_MSG_LINKMODES_NTF) + RTM_NEWLINK | mdlayher/ethtool + mdlayher/genetlink | REACTIVE (ethtool genetlink monitor) | Reactive via ethmonitor subscription; also re-queried on RTM_NEWLINK via `ethmonitor.RefreshInterface()` | +| `.../infix-ethernet-interface:ethernet/auto-negotiation` | ETHNL_MCGRP_MONITOR (ETHTOOL_MSG_LINKMODES_NTF) + RTM_NEWLINK | mdlayher/ethtool + mdlayher/genetlink | REACTIVE (ethtool genetlink monitor) | Reactive via ethmonitor subscription; also re-queried on RTM_NEWLINK via `ethmonitor.RefreshInterface()` | +| `.../infix-interfaces:bridge/stp-state` | RTM_NEWLINK (IFLA_BRPORT_STATE) | `bridge -json -batch` | REACTIVE (netlink) | STP port state change arrives via LinkUpdate on bridge port; triggers bridge batch re-query. 0=disabled,1=listening,2=learning,3=forwarding,4=blocking | +| `.../infix-interfaces:bridge/vlan` | RTM_NEWLINK (VLAN attributes on bridge port) | `bridge -json -batch` | REACTIVE (netlink) | VLAN changes arrive via LinkUpdate; trigger `vlan show` via bridge batch | +| `.../infix-interfaces:wifi/ssid` | `iw event -t` + `iw dev info` | iwmonitor + exec.Command | REACTIVE (iw event) | Re-queried on `connected`, `ch_switch_started_notify` events | +| `.../infix-interfaces:wifi/frequency` | `iw event -t` + `iw dev info` | iwmonitor + exec.Command | REACTIVE (iw event) | Re-queried on `connected`, `ch_switch_started_notify` events | +| `.../infix-interfaces:wifi/bitrate` | `iw event -t` + `iw dev station dump` | iwmonitor + exec.Command | REACTIVE (iw event) | Re-queried on `new station`, `connected` events | +| `.../infix-interfaces:wifi/signal-strength` | `iw event -t` + `iw dev link` (via `iw.py link`) | iwmonitor + exec.Command | REACTIVE (iw event) | Signal strength in dBm; available in station mode only (not AP mode). Re-queried on `connected`, `disconnected`, `signal` events. Source is `iw dev link`, NOT `/proc/net/wireless` (which is empty on modern cfg80211/nl80211 drivers). | +| `.../infix-interfaces:wifi/station/scan-results` | `wpa_cli -i scan_result` | exec.Command | POLLING 10 seconds | Available scan results from wpa_supplicant; returns BSSID, frequency, signal, flags, SSID per network. Only populated in station mode. | +| `.../infix-interfaces:wireguard/peer/endpoint` | wgctrl (generic netlink WG_CMD_GET_DEVICE) | wgctrl.Client | POLLING 30 seconds | WireGuard kernel module required | +| `.../infix-interfaces:wireguard/peer/rx-bytes` | wgctrl | wgctrl.Client | POLLING 30 seconds | | +| `.../infix-interfaces:wireguard/peer/tx-bytes` | wgctrl | wgctrl.Client | POLLING 30 seconds | | +| `.../interface/last-change` | RTNLGRP_LINK (RTM_NEWLINK with oper-status change) | `time.Now()` at event receipt | REACTIVE | Timestamp recorded when link event handler detects oper-status transition | + +### ietf-routing — Routing State + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../routing/ribs/rib[name='ipv4-master']/routes/route` | ZAPI `REDISTRIBUTE_ROUTE_ADD` (streaming) | `zapiwatcher.ZAPIWatcher` (gobgp/v4/pkg/zebra) | REACTIVE (ZAPI watcher) | Route data sourced from zebra's zserv socket via ZAPI v6 redistribution subscription. The ZAPI watcher receives incremental route add/delete messages covering ALL route types (kernel, connected, static, OSPF, RIP) with FRR-enriched metadata (protocol, distance, metric, next-hops, active/installed flags) -- including routes in zebra's RIB not installed in the kernel FIB. Replaces previous `vtysh`-based collection. See Section 4.1octies. | +| `.../routing/ribs/rib[name='ipv6-master']/routes/route` | ZAPI `REDISTRIBUTE_ROUTE_ADD` (streaming) | `zapiwatcher.ZAPIWatcher` (gobgp/v4/pkg/zebra) | REACTIVE (ZAPI watcher) | Same as IPv4 but for IPv6 routes. ZAPI subscription covers both address families. See Section 4.1octies. | +| `.../control-plane-protocols/ospf/neighbors` | exec `vtysh -c 'show ip ospf neighbor json'` | exec.Command | POLLING 10 seconds | FRRouting must be running | +| `.../control-plane-protocols/ospf/areas/interfaces` | exec `vtysh -c 'show ip ospf interface json'` | exec.Command | POLLING 10 seconds | FRR exposes this state via request/response CLI only; no streaming API | +| `.../control-plane-protocols/rip/routes` | exec `vtysh -c 'show ip rip json'` | exec.Command | POLLING 10 seconds | FRR exposes this state via request/response CLI only; no streaming API | +| `.../control-plane-protocols/bfd/sessions` | exec `vtysh -c 'show bfd peers json'` | exec.Command | POLLING 10 seconds | FRR exposes this state via request/response CLI only; no streaming API | + +### ietf-hardware — Hardware Components + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../hardware/component[class='sensor']/sensor-data/value` | /sys/class/hwmon/*/temp*_input, fan*_input, in*_input | collector/hardware.go (`os.ReadFile`) | POLLING 10 seconds | Millidegree Celsius, RPM, millivolt raw values. sysfs pseudo-files do not emit inotify events; polling is the only correct method. | +| `.../hardware/component[class='sensor']/sensor-data/oper-status` | /sys/class/hwmon/*/temp*_fault | collector/hardware.go (`os.ReadFile`) | POLLING 10 seconds | Fault flag read alongside sensor values | +| `.../hardware/component[class='chassis']/mfg-name` | exec `dmidecode -s system-manufacturer` | exec.Command | POLLING 300 seconds | Rarely changes; cached after first read | +| `.../hardware/component[class='chassis']/model-name` | exec `dmidecode -s system-product-name` | exec.Command | POLLING 300 seconds | | +| `.../hardware/component[class='chassis']/serial-num` | exec `dmidecode -s system-serial-number` | exec.Command | POLLING 300 seconds | | + +### ietf-system — System State + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../system/hostname` | /etc/hostname | fswatcher inotify (`readHostname`) | REACTIVE (fswatcher) | `UseMerge: true` into `ietf-system:system`; real file, reliable inotify | +| `.../system/clock/timezone-name` | /etc/localtime (symlink target) | fswatcher `WatchSymlink` (`readTimezone`) | REACTIVE (fswatcher) | `UseMerge: true`; `WatchSymlink()` watches `/etc/` dir; strips zoneinfo prefix from symlink target | +| `.../system/clock/timezone-utc-offset` | /etc/localtime (symlink target) | fswatcher `WatchSymlink` (`readTimezone`) | REACTIVE (fswatcher) | Only populated for Etc/GMT±N zones; POSIX sign inversion applied | +| `.../system-state/platform/os-name` | `/etc/os-release` | `BootPlatform()` in `internal/collector/boot.go` | BOOT-ONCE | Read once at startup; merged into tree and never re-read | +| `.../system-state/platform/os-release` | `exec uname -r` | `BootPlatform()` in `internal/collector/boot.go` | BOOT-ONCE | | +| `.../system-state/platform/machine` | `exec uname -m` | `BootPlatform()` in `internal/collector/boot.go` | BOOT-ONCE | | +| `.../system-state/clock/current-datetime` | `time.Now()` | `LiveSystemState()` (on-demand provider) | ON-DEMAND | Computed at IPC request time; never cached/stale | +| `.../system-state/clock/boot-datetime` | `/proc/uptime` + `time.Now()` | `LiveSystemState()` (on-demand provider) | ON-DEMAND | Computed at IPC request time | +| `.../system-state/infix-system:resource-usage/memory/total` | `/proc/meminfo` MemTotal | `LiveSystemState()` (on-demand provider) | ON-DEMAND | Read from /proc/meminfo at IPC request time | +| `.../system-state/infix-system:resource-usage/memory/free` | `/proc/meminfo` MemFree | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system-state/infix-system:resource-usage/memory/available` | `/proc/meminfo` MemAvailable | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system-state/infix-system:resource-usage/load-average/load-1min` | `/proc/loadavg` field 1 | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system-state/infix-system:resource-usage/load-average/load-5min` | `/proc/loadavg` field 2 | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system-state/infix-system:resource-usage/load-average/load-15min` | `/proc/loadavg` field 3 | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system-state/infix-system:resource-usage/filesystem[name='/']/...` | `syscall.Statfs("/")` | `LiveSystemState()` (on-demand provider) | ON-DEMAND | Size, used, available in kB | +| `.../system-state/infix-system:resource-usage/filesystem[name='/var']/...` | `syscall.Statfs("/var")` | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system-state/infix-system:resource-usage/filesystem[name='/cfg']/...` | `syscall.Statfs("/cfg")` | `LiveSystemState()` (on-demand provider) | ON-DEMAND | | +| `.../system/users/user/name` | `/etc/passwd`, `/etc/shadow` | fswatcher `Watch("/etc/shadow")` + `readUsers()` | REACTIVE (fswatcher) | Triggered on `/etc/shadow` changes; reads both passwd and shadow | +| `.../system/users/user/ssh-key` | `/var/run/sshd/.keys` | fswatcher `WatchDir("/var/run/sshd/")` + `readUsers()` | REACTIVE (fswatcher) | `WatchDir()` fires on any file create/write/remove in the directory | +| `.../system-state/infix-system:software/...` | `exec rauc status --detailed --output-format=json`, `exec rauc-installation-status` | `BootSoftware()` in `internal/collector/boot.go` | BOOT-ONCE | RAUC slot status, installation progress; read once at startup | +| `.../system-state/infix-system:software/boot-order` | `exec fw_printenv BOOT_ORDER` / `exec grub-editenv list` | fswatcher `Watch("/mnt/aux/grub/grubenv")` + `Watch("/mnt/aux/uboot.env")` → `ReadBootOrder()` | REACTIVE (fswatcher) | File change triggers re-run of boot order commands only (not full RAUC) | + +### ietf-ntp — NTP State + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../ntp-state/association/address` | chrony cmdmon protocol (sources request) | `github.com/facebook/time/ntp/chrony` | POLLING 60 seconds | Unix socket `/var/run/chrony/chronyd.sock` | +| `.../ntp-state/association/stratum` | chrony cmdmon protocol (sources request) | `github.com/facebook/time/ntp/chrony` | POLLING 60 seconds | | +| `.../ntp-state/association/offset` | chrony cmdmon protocol (tracking request) | `github.com/facebook/time/ntp/chrony` | POLLING 60 seconds | | +| `.../ntp-state/association/synchronized` | chrony cmdmon protocol (tracking request) | `github.com/facebook/time/ntp/chrony` | POLLING 60 seconds | | + +### ieee802-dot1ab-lldp — LLDP Neighbors + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../lldp/ports/port/neighbors/neighbor/chassis-id` | `lldpcli -f json0 watch` (`lldp-added`/`lldp-updated`/`lldp-deleted`) | persistent subprocess monitor (`internal/lldpmonitor/`) | REACTIVE | Blank-line-delimited pretty JSON objects; parsed by framing-aware stream parser | +| `.../lldp/ports/port/neighbors/neighbor/port-id` | `lldpcli -f json0 watch` | persistent subprocess monitor (`internal/lldpmonitor/`) | REACTIVE | `json0` structural stability (arrays always arrays) | +| `.../lldp/ports/port/neighbors/neighbor/ttl` | `lldpcli -f json0 watch` | persistent subprocess monitor (`internal/lldpmonitor/`) | REACTIVE | Full neighbor payload in each event | +| `.../lldp/ports/port/neighbors/neighbor/system-name` | `lldpcli -f json0 watch` | persistent subprocess monitor (`internal/lldpmonitor/`) | REACTIVE | | +| `.../lldp/ports/port/neighbors/neighbor/system-capabilities` | `lldpcli -f json0 watch` | persistent subprocess monitor (`internal/lldpmonitor/`) | REACTIVE | | + +### infix-containers — Container State (Feature-Gated) + +**Feature gate**: This data source is only collected when `YANGERD_ENABLE_CONTAINERS=true`. When container support is not included in the Infix build, the container collector is not started and these paths are absent from the tree. + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../containers/container/name` | exec `podman ps --format json` | exec.Command | POLLING 10 seconds | **Phase 2**: container namespace handling deferred | +| `.../containers/container/state` | exec `podman ps --format json` | exec.Command | POLLING 10 seconds | Phase 2 | +| `.../containers/container/image` | exec `podman ps --format json` | exec.Command | POLLING 10 seconds | Phase 2 | + +### infix-dhcp-server — DHCP Leases + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../dhcp-server/leases/lease/ip-address` | /var/lib/misc/dnsmasq.leases | D-Bus Monitor `refreshDHCP()` | REACTIVE (D-Bus) | dnsmasq `DHCPLeaseAdded`/`Deleted`/`Updated` signals | +| `.../dhcp-server/leases/lease/hw-address` | /var/lib/misc/dnsmasq.leases | D-Bus Monitor `refreshDHCP()` | REACTIVE (D-Bus) | | +| `.../dhcp-server/leases/lease/hostname` | /var/lib/misc/dnsmasq.leases | D-Bus Monitor `refreshDHCP()` | REACTIVE (D-Bus) | | +| `.../dhcp-server/leases/lease/expire` | /var/lib/misc/dnsmasq.leases | D-Bus Monitor `refreshDHCP()` | REACTIVE (D-Bus) | UNIX timestamp in lease file | + +### infix-firewall — Firewall State + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `.../firewall/default-zone` | firewalld D-Bus `getDefaultZone()` | D-Bus Monitor `refreshFirewall()` | REACTIVE (D-Bus) | firewalld `Reloaded` signal + `NameOwnerChanged` | +| `.../firewall/log-denied` | firewalld D-Bus `getLogDenied()` | D-Bus Monitor `refreshFirewall()` | REACTIVE (D-Bus) | | +| `.../firewall/lockdown` | firewalld D-Bus `queryPanicMode()` | D-Bus Monitor `refreshFirewall()` | REACTIVE (D-Bus) | | +| `.../firewall/zones/zone` | firewalld D-Bus `getActiveZones()` + `getZoneSettings2()` | D-Bus Monitor `refreshFirewall()` | REACTIVE (D-Bus) | Per-zone: interfaces, sources, services, forwards, rich rules | +| `.../firewall/policies/policy` | firewalld D-Bus `getPolicies()` + `getPolicySettings()` | D-Bus Monitor `refreshFirewall()` | REACTIVE (D-Bus) | Per-policy: ingress/egress zones, action, priority, rich rules | +| `.../firewall/services/service` | firewalld D-Bus `listServices()` + `getServiceSettings2()` | D-Bus Monitor `refreshFirewall()` | REACTIVE (D-Bus) | Per-service: port/protocol definitions | + +### 5.9bis infix-services — mDNS Neighbors + +| YANG Path | Source | Go Method | Reactive/Polling | Notes | +|-----------|--------|-----------|-----------------|-------| +| `/infix-services:mdns/neighbors/neighbor/hostname` | Avahi D-Bus `ServiceBrowser`/`ServiceResolver` signals | `internal/mdnsmonitor/` via `godbus/dbus/v5` | REACTIVE (D-Bus) | Keyed by hostname | +| `/infix-services:mdns/neighbors/neighbor/address` | Avahi D-Bus resolver results | `internal/mdnsmonitor/` | REACTIVE (D-Bus) | Leaf-list of resolved addresses | +| `/infix-services:mdns/neighbors/neighbor/last-seen` | Event timestamp at signal handling | `time.Now()` in mDNS monitor | REACTIVE (D-Bus) | Updated on add/update events | +| `/infix-services:mdns/neighbors/neighbor/service/name` | Avahi service instance metadata | `internal/mdnsmonitor/` | REACTIVE (D-Bus) | Service list key | +| `/infix-services:mdns/neighbors/neighbor/service/type` | Avahi service type | `internal/mdnsmonitor/` | REACTIVE (D-Bus) | e.g. `_ssh._tcp` | +| `/infix-services:mdns/neighbors/neighbor/service/port` | Avahi resolver payload | `internal/mdnsmonitor/` | REACTIVE (D-Bus) | | +| `/infix-services:mdns/neighbors/neighbor/service/txt` | Avahi TXT records | `internal/mdnsmonitor/` | REACTIVE (D-Bus) | Leaf-list | + +### Summary + +| Category | Leaf Count | Strategy | +|----------|-----------|----------| +| REACTIVE (Monitor/Watcher) | 59 | `vishvananda/netlink` subscriptions (link, addr, neigh channels + bridge FDB/VLAN/MDB/STP events as triggers for `bridge -json -batch -` re-reads), ZAPI watcher (streaming route redistribution from zebra via zserv socket), D-Bus Monitor (dnsmasq DHCP lease signals + firewalld reload signals), LLDP monitor (`lldpcli -f json0 watch`), mDNS monitor (Avahi D-Bus), `iw event -t`, ethtool genetlink monitor (`ETHNL_MCGRP_MONITOR`), and `fswatcher` (inotify for procfs forwarding flags, `/etc/hostname`, and `/etc/localtime` via `WatchSymlink`). | +| ON-DEMAND (live provider) | 11 | `LiveSystemState()` provider on `ietf-system:system-state`: clock (current-datetime, boot-datetime), memory (total, free, available), load average (1/5/15 min), filesystem usage (/, /var, /cfg). Computed fresh at every IPC request — zero staleness. | +| POLLING 10 seconds | 6 | FRRouting (OSPF/RIP/BFD) via `vtysh` JSON queries | +| POLLING 10 seconds | 6 | Hardware sensors (hwmon temperature, fan, voltage, fault — sysfs files do not support inotify), container state (Phase 2, feature-gated: `YANGERD_ENABLE_CONTAINERS`), WiFi scan results via `wpa_cli` (feature-gated: `YANGERD_ENABLE_WIFI`) | +| POLLING 30 seconds | 3 | Ethtool statistics (counters only -- speed/duplex/autoneg now reactive), WireGuard peer data | +| POLLING 60 seconds | 4 | NTP state (chrony cmdmon protocol) | +| POLLING 300 seconds | 9 | Hardware inventory (DMI chassis data), OS platform info, users, software status (RAUC, initctl, boot order), DNS resolvers | + +### 5.10bis Polling Justification Notes + +The remaining polling sources are intentionally polling because no reliable subscription/event interface exists for the required data: + +- **WireGuard**: no kernel event stream for peer stats (`last-handshake`, `rx/tx bytes`); these values are available via `WG_CMD_GET_DEVICE` snapshots only. +- **FRR OSPF/RIP/BFD protocol state**: protocol internals are exposed through `vtysh show ...` request/response commands; FRR does not provide a stable streaming API for these views. +- **NTP (chrony)**: cmdmon protocol is strictly request/response (confirmed in revision 0.20); no subscribe mechanism. +- **Hardware sensors**: sysfs pseudo-files do not emit inotify modify events (confirmed in revision 0.14). +- **Ethtool statistics counters**: `ETHNL_MCGRP_MONITOR` emits setting-change notifications, not counter-change notifications. + +**Startup note**: All netlink-reactive data paths perform an initial full dump on daemon startup, using the subscribe-first-then-list pattern (subscriptions established BEFORE dump, following Antrea's approach). Link, address, and neighbor data is populated by writing bulk query commands (`ip -s -d -j link show`, `ip -j addr show`, `ip -j neigh show`) to the persistent `ip -json -force -batch -` subprocess. Route data is populated by the ZAPI watcher, which connects to zebra's zserv socket and receives a full dump of all routes matching the subscribed redistribution types (kernel, connected, static, OSPF, RIP) upon initial connection -- see Section 4.1octies. This replaces the previous `vtysh`-based initial route dump. OSPF, RIP, and BFD protocol-specific data is still collected via `vtysh` polling (unchanged). This populates the tree before the NLMonitor's select loop begins processing incremental netlink events. Without this, the tree appears empty until the first kernel event fires for each interface. +## Module-by-Module Mapping + +For each existing Python yanger script, this section documents the external commands and data sources it uses, and how those will be reimplemented as Go collector functions in yangerd. Each subsection covers what the Python code does, every external process it spawns (or file it reads), and the equivalent Go approach. + +--- + +### ietf_interfaces — `python/yanger/ietf_interfaces/` → `internal/collector/interfaces.go` + `internal/monitor/` + +**Python approach**: The package entry point (`__init__.py`) calls `link.interfaces()` and `container.interfaces()` to build the full `ietf-interfaces:interfaces` list. `link.py` delegates per-interface type handling to `ip.py`, `ethernet.py`, `bridge.py`, `wifi.py`, `wireguard.py`, `vlan.py`, and `lag.py`. Interface and address lists are pre-fetched by `common.py` and cached for the duration of the invocation. + +**External commands invoked**: + +| Command | Invoked in | Purpose | +|---------|-----------|---------| +| `ip -s -d -j link show [dev ]` | `common.py:iplinks()` | JSON dump of all link attributes, stats64, linkinfo (type, slave data), flags, operstate | +| `ip -j addr show [dev ]` | `common.py:ipaddrs()` | JSON dump of all interface addresses with family, prefix, protocol origin | +| `ip -j netns exec ip -s -d -j link show` | `common.py:iplinks(netns=...)` | Same as above but inside a container network namespace | +| `ip -j netns exec ip -j addr show` | `common.py:ipaddrs(netns=...)` | Address list inside a container network namespace | +| `ip -j netns list` | `container.py:ip_netns_list()` | Enumerate all named network namespaces (for container interfaces) | +| `ls /sys/class/net//wireless/` | `link.py:iplink2yang_type()` | Detect whether an `ether` link is a WiFi interface | + +**Reads** `/proc/sys/net/ipv6/conf//mtu` (ip.py) for IPv6 MTU. + +**Go replacement**: +- `ip -s -d -j link show` -> `ip -json -force -batch -` query (write `link show -s -d` to stdin); `vishvananda/netlink` `LinkSubscribeWithOptions` for events; stats64, operstate, flags, linkinfo all in JSON output +- `ip -j addr show` -> `ip -json -force -batch -` query (write `addr show` to stdin); `vishvananda/netlink` `AddrSubscribeWithOptions` for events +- `/proc/sys/net/ipv6/conf//mtu` → `os.ReadFile()` directly; no process spawn needed +- Wireless detection → `os.Stat("/sys/class/net//wireless")` (dir exists check) +- Container namespace traversal → deferred to Phase 2 (requires setns syscall or `ip netns exec` via `exec.Command`) + +--- + +### ietf_interfaces/ethernet.py → `internal/collector/ethernet.go` + +**Python approach**: Uses `ethtool --json` (twice per interface) to obtain speed/duplex/auto-negotiation and extended per-group statistics (eth-mac, rmon counters). Both calls emit JSON; the script maps counter names to YANG leaf names. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `ethtool --json ` | Speed (Mbps), duplex, auto-negotiation enable flag | +| `ethtool --json -S --all-groups` | Per-group hardware counters: `FramesTransmittedOK`, `FrameCheckSequenceErrors`, `OctetsReceivedOK`, etc. | + +**Go replacement**: +- Both `ethtool` calls → `mdlayher/ethtool` Go library (uses generic netlink ETHTOOL_GENL family; no subprocess needed) +- Speed/duplex/autoneg → **REACTIVE** via `internal/ethmonitor/` package: subscribes to `ETHNL_MCGRP_MONITOR` genetlink multicast group; on `ETHTOOL_MSG_LINKINFO_NTF` or `ETHTOOL_MSG_LINKMODES_NTF`, re-queries via `ethtool.Client.LinkInfo()` + `ethtool.Client.LinkMode()` and writes updated values to tree immediately. Additionally, `ethmonitor.RefreshInterface()` is called by the link event handler on every RTM_NEWLINK event, because `ETHNL_MCGRP_MONITOR` does NOT fire on link up/down — only on explicit settings renegotiation. This cross-subsystem trigger ensures speed/duplex/autoneg converge within milliseconds of link state changes. +- Extended stats → `ethtool.Client.Stats()` keyed by counter name string (no kernel notification available for statistics — remains **POLLING** at 30-second interval) +- Strategy: **HYBRID** — reactive for settings (speed, duplex, autoneg) via ethmonitor (unconditionally active on kernel 6.18), polling for statistics (counters have no kernel notification). + +--- + +### ietf_interfaces/bridge.py → `internal/collector/bridge.go` + +**Python approach**: Queries bridge VLAN tables, STP state from `mstpctl`, and multicast group membership from `mctl`. STP data is fetched per-bridge and per-port using `mstpctl showtree`, `showbridge`, and `showportdetail`. Multicast data is fetched with `mctl show igmp json` and `bridge mdb show -j dev
`. VLAN global state is fetched with `bridge vlan global show dev
` and `bridge vlan show -j`. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `bridge -j vlan show` | VLAN membership table for all ports (PVID, tagged/untagged flags) | +| `bridge -j vlan global show dev ` | Per-VLAN global bridge settings (vlan list for VID population) | +| `bridge -j mdb show dev
[vid ]` | Multicast group database (MDB) entries per bridge and VLAN | +| `mstpctl -f json showbridge ` | STP bridge state: force-protocol, hello-time, forward-delay, max-age, tx-hold-count | +| `mstpctl -f json showtree ` | STP tree state: priority, bridge-id, root-port, topology-change | +| `mstpctl -f json showportdetail ` | Per-port STP state: edge, external-path-cost, BPDU statistics | +| `mstpctl -f json showtreeport ` | Per-port per-tree STP state: port-id, role, designated bridge/port | +| `mctl -p show igmp json` | IGMP/MLD querier state per bridge/VLAN (mode: off/proxy/auto, query-interval) | + +- `bridge vlan show -j`, `bridge mdb show -j`, `bridge fdb show br
` -> persistent `bridge -json -batch -` query (write `vlan show`, `mdb show`, or `fdb show` to stdin); bridge events arrive via `vishvananda/netlink` channels (FDB via `NeighSubscribeWithOptions`, VLAN via `LinkSubscribeWithOptions`, MDB via raw netlink `RTNLGRP_MDB`, STP via `LinkSubscribeWithOptions` detecting `IFLA_BRPORT_STATE` in `IFLA_PROTINFO`). All events are triggers only -- full state is re-read via bridge batch. +- `mstpctl` and `mctl` calls → `exec.Command` with JSON parsing +- Strategy: **REACTIVE** (netlink event triggers + `bridge -json -batch -` re-reads); initial full state populates tree on startup via bridge batch queries + +--- + +### ietf_interfaces/wifi.py → `internal/collector/wifi.go` + +**Python approach**: Delegates to an on-device helper script `/usr/libexec/infix/iw.py`. For AP mode, gets interface info and connected station list. For station mode, gets link info and scan results from `wpa_cli`. Mode detection is done by calling `iw.py info ` and reading the `iftype` field. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `/usr/libexec/infix/iw.py info ` | Interface mode (iftype), SSID, channel, TX power — wraps `iw dev info` | +| `/usr/libexec/infix/iw.py station ` | Connected station list (AP mode) — wraps `iw dev station dump` | +| `/usr/libexec/infix/iw.py link ` | Link info for station mode: SSID, signal strength, RX/TX speed | +| `wpa_cli -i scan_result` | Network scan results (BSSID, SSID, RSSI, encryption flags) | + +**Go replacement**: +- All `iw.py` calls → `exec.Command("iw", "dev", ifname, "info")`, `exec.Command("iw", "dev", ifname, "station", "dump")` with custom text parsing +- `wpa_cli scan_result` → `exec.Command("wpa_cli", "-i", ifname, "scan_result")` with text parsing +- Primary method: REACTIVE via `iw event -t` — WiFi events (station association/disassociation, connection, channel switch) trigger re-queries via short-lived `exec.Command("iw", ...)` subprocesses +- `wpa_cli` queries remain polling-based (no event interface available) +- `wpa_cli` queries remain polling-based (no event interface available) + +--- + +### ietf_interfaces/wireguard.py → `internal/collector/wireguard.go` + +**Python approach**: Runs `wg show dump` and parses the tab-delimited output. The first line is the interface (skipped); subsequent lines are peers with public key, endpoint, allowed IPs, last handshake timestamp, RX/TX bytes, and persistent keepalive. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `wg show dump` | Peer list with endpoint, handshake time, RX/TX bytes, allowed IPs | + +**Go replacement**: +- `wg show dump` → `golang.zx2c4.com/wireguard/wgctrl` library (`wgctrl.Client.Device(name)`) +- Returns `wgtypes.Device` with `Peers []wgtypes.Peer` including `Endpoint`, `LastHandshakeTime`, `ReceiveBytes`, `TransmitBytes`, `AllowedIPs` — no subprocess needed +- Poll interval: 30 seconds + +--- + +### ietf_interfaces/vlan.py → part of `internal/collector/interfaces.go` + +**Python approach**: Pure data transformation — maps the `linkinfo.info_data.protocol` string (`802.1Q`, `802.1ad`) and `id` field from the `ip link show -j` JSON into YANG identity values. No external commands are invoked directly; all data comes from the already-fetched `iplinks()` result. + +**External commands invoked**: None (data comes from `ip -s -d -j link show` in `common.py:iplinks()`). + +**Go replacement**: +- VLAN linkinfo data is present in the `ip -json link show` output (linkinfo.info_data object contains protocol and id fields) +- The `linkinfo.info_data.protocol` (802.1Q/802.1ad) and `id` fields map directly to YANG `tag-type` and `id` +- No separate collector needed; extracted from the ip batch link query response inline in the interfaces handler + +--- + +### ietf_interfaces/container.py → `internal/collector/container_ifaces.go` (Phase 2, Feature-Gated) + +**Feature gate**: `YANGERD_ENABLE_CONTAINERS=true`. This collector is only active when container support is included in the Infix build. + +**Python approach**: Lists all named network namespaces via `ip -j netns list`, then for each namespace runs `ip -s -d -j link show` and `ip -j addr show` inside the namespace to find container interfaces (identified by `ifalias`). Cross-references with `podman ps` output to map interface names to container names. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `ip -j netns list` | Enumerate Linux network namespaces (one per running container) | +| `ip netns exec ip -s -d -j link show` | Interface list and stats inside the container namespace | +| `ip netns exec ip -j addr show` | Address list inside the container namespace | +| `podman ps -a --format=json` | Running containers with network/Names for cross-referencing | + +**Go replacement (Phase 2)**: +- `ip netns list` → `os.ReadDir("/run/netns")` or `exec.Command("ip", "-j", "netns", "list")` +- Per-namespace link/addr queries → `exec.Command("ip", "netns", "exec", nsName, "ip", "-json", "-s", "-d", "link", "show")` (cannot use the shared ip batch subprocess across namespaces) +- `podman ps` → `exec.Command("podman", "ps", "-a", "--format=json")` +- Deferred to Phase 2 due to namespace traversal complexity + +--- + +### ietf_routing.py -> `internal/zapiwatcher/` + `internal/collector/routing.go` + +**Python approach**: Fetches IPv4 and IPv6 route tables from FRRouting via `vtysh`, then lists all interfaces with IPv4/IPv6 forwarding enabled via `sysctl`. The `vtysh` JSON output includes route prefix, protocol, distance, metric, next hops, and active/installed flags. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `vtysh -c 'show ip route json'` | Full IPv4 RIB from FRRouting (kernel, connected, static, OSPF, RIP routes) | +| `vtysh -c 'show ipv6 route json'` | Full IPv6 RIB from FRRouting | +| `ip -j link show` | Interface list for forwarding-enabled interface enumeration | +| `sysctl -n net.ipv4.conf..forwarding` | IPv4 forwarding state per interface | +| `sysctl -n net.ipv6.conf..force_forwarding` | IPv6 forwarding state per interface | + +**Go replacement**: +- IPv4/IPv6 RIB -> `internal/zapiwatcher/` -- streaming ZAPI connection to zebra's zserv socket (Section 4.1octies). Replaces the previous netlink-triggered `vtysh` re-read approach. The ZAPI watcher subscribes to route redistribution for kernel, connected, static, OSPF, and RIP route types and receives incremental `REDISTRIBUTE_ROUTE_ADD` / `REDISTRIBUTE_ROUTE_DEL` messages. Upon connection, zebra sends a full dump of matching routes. Reconnects automatically on zebra restart with exponential backoff. Captures routes in zebra's RIB not installed in the kernel FIB. +- `sysctl` forwarding checks -> `os.ReadFile("/proc/sys/net/ipv4/conf//forwarding")` and equivalent IPv6 path +- OSPF/RIP/BFD protocol-specific data continues to use dedicated `vtysh` commands via `exec.Command` in separate collectors (`ospf.go`, `rip.go`, `bfd.go`), poll 10 seconds + +--- + +### ietf_ospf.py → `internal/collector/ospf.go` + +**Python approach**: Queries two data sources from FRRouting. OSPF area/interface/neighbor state is fetched via an on-device helper `/usr/libexec/statd/ospf-status` (which wraps FRRouting `vtysh` calls into a structured JSON format). OSPF routes are fetched directly via `vtysh -c 'show ip ospf route json'`. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `/usr/libexec/statd/ospf-status` | Structured OSPF JSON: router-id, areas, interfaces, neighbors, timers, DR/BDR | +| `vtysh -c 'show ip ospf route json'` | OSPF local RIB: prefixes, route types (intra/inter/external), next hops, metrics | + +**Go replacement**: +- `ospf-status` helper → `exec.Command("vtysh", "-c", "show ip ospf json")` + `exec.Command("vtysh", "-c", "show ip ospf neighbor detail json")` + `exec.Command("vtysh", "-c", "show ip ospf interface json")`, parse and merge +- `vtysh show ip ospf route json` → `exec.Command("vtysh", "-c", "show ip ospf route json")` +- All `vtysh` calls go through `exec.Command`; FRRouting must be running (graceful skip if unavailable) +- Poll interval: 10 seconds + +--- + +### ietf_rip.py → `internal/collector/rip.go` + +**Python approach**: Combines two FRRouting queries. RIP status (timers, distance, default metric, interface table, neighbor table) is fetched as raw text via `vtysh -c 'show ip rip status'` and parsed with regular expressions. RIP-learned routes are fetched as JSON via `vtysh -c 'show ip route rip json'`. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `vtysh -c 'show ip rip status'` | RIP global state: update/invalid/flush intervals, distance, default-metric, interface versions, neighbor last-update | +| `vtysh -c 'show ip route rip json'` | RIP-learned routes: prefix, metric, next-hop IP and interface | + +**Go replacement**: +- `vtysh show ip rip status` → `exec.Command("vtysh", "-c", "show ip rip status")`, text output parsed with regexp (same approach; no JSON alternative in FRR for this command) +- `vtysh show ip route rip json` → `exec.Command("vtysh", "-c", "show ip route rip json")`, JSON unmarshal +- Poll interval: 10 seconds; graceful skip if FRRouting not running + +--- + +### ietf_bfd_ip_sh.py → `internal/collector/bfd.go` + +**Python approach**: Fetches all BFD peer state from FRRouting via a single `vtysh` command. Filters to single-hop sessions only. Extracts discriminators, session state, timing intervals (in milliseconds, converted to microseconds for YANG), and derives detection time from multiplier × receive-interval. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `vtysh -c 'show bfd peers json'` | BFD peer list: peer IP, interface, local/remote discriminator, status, intervals, detect-multiplier | + +**Go replacement**: +- `vtysh show bfd peers json` → `exec.Command("vtysh", "-c", "show bfd peers json")`, JSON unmarshal +- Poll interval: 10 seconds; graceful skip if FRRouting not running + +--- + +### ietf_hardware.py → `internal/collector/hardware.go` + +**Python approach**: Assembles hardware component list from five sub-sources: (1) `/run/system.json` (board VPD data written by confd at boot), (2) USB port authorization from `/sys/bus/usb/devices/*/authorized_default`, (3) hwmon sensor files under `/sys/class/hwmon/hwmon*/temp*_input`, `fan*_input`, `in*_input`, `curr*_input`, `power*_input`, (4) thermal zones under `/sys/class/thermal/thermal_zone*/temp`, and (5) WiFi radio PHY info via the `iw.py` helper (only when `YANGERD_ENABLE_WIFI=true`) and GPS receiver status from `/run/gps-status.json` (only when `YANGERD_ENABLE_GPS=true`). + +**External commands invoked**: + +| Command / File | Purpose | +|---------------|---------| +| `/run/system.json` (file read) | Board VPD (vendor, product, serial, MAC, USB port list) — written by confd at startup | +| `ls /sys/class/hwmon` | Enumerate hwmon entries | +| `/sys/class/hwmon/hwmon*/name` (file read) | Device name for sensor component naming | +| `/sys/class/hwmon/hwmon*/{temp,fan,in,curr,power}*_input` (file reads) | Raw sensor values (millidegrees, RPM, millivolts, milliamps, microwatts) | +| `/sys/class/hwmon/hwmon*/{temp,fan,in,curr,power}*_label` (file reads) | Human-readable sensor label | +| `ls /sys/class/thermal` | Enumerate thermal zones | +| `/sys/class/thermal/thermal_zone*/type` (file read) | Thermal zone type name | +| `/sys/class/thermal/thermal_zone*/temp` (file read) | Temperature in millidegrees Celsius | +| `/usr/libexec/infix/iw.py list` | List all WiFi PHY names (only when `YANGERD_ENABLE_WIFI=true`) | +| `/usr/libexec/infix/iw.py dev` | Map PHY numbers to virtual interface names (only when `YANGERD_ENABLE_WIFI=true`) | +| `/usr/libexec/infix/iw.py info ` | Per-PHY capabilities: bands, driver, manufacturer, interface combinations (only when `YANGERD_ENABLE_WIFI=true`) | +| `/usr/libexec/infix/iw.py survey ` | Per-channel survey data (frequency, noise, active/busy/receive/transmit time) (only when `YANGERD_ENABLE_WIFI=true`) | +| `readlink -f /dev/gps` | Resolve GPS device symlinks to actual device paths (only when `YANGERD_ENABLE_GPS=true`) | +| `/run/gps-status.json` (file read) | Cached GPS/GNSS operational state (driver, fix mode, lat/lon/alt, satellite counts) (only when `YANGERD_ENABLE_GPS=true`) | +| `/sys/bus/usb/devices/*/authorized_default` (file reads) | USB port lock state (1=unlocked, 0=locked) | + +- `/run/system.json` → `os.ReadFile()` + JSON unmarshal +- hwmon sensor files (`*_input`, `*_fault`) → `collector/hardware.go` polling every 10 seconds (sysfs pseudo-files do not emit inotify events; the kernel generates values on `read()`, never calling `fsnotify_modify()`) +- thermal zone files (`temp`) → `collector/hardware.go` polling every 10 seconds (same sysfs limitation) +- `iw.py` calls → `exec.Command("iw", "list")`, `exec.Command("iw", "dev")`, `exec.Command("iw", "phy", phyName, "info")`, `exec.Command("iw", "dev", ifname, "survey", "dump")` with custom text or JSON parsing (skipped when `YANGERD_ENABLE_WIFI=false`) +- GPS status → `os.ReadFile("/run/gps-status.json")` + JSON unmarshal (skipped when `YANGERD_ENABLE_GPS=false`) +- USB port state → `os.ReadFile(authorizedDefaultPath)` +- Strategy: **POLLING** 10 seconds for sensors (sysfs inotify impossibility); **POLLING** 300 seconds for static inventory (VPD, chassis data) + +--- + +### ietf_system.py → `internal/collector/system.go` + +**Python approach**: Assembles `ietf-system:system` and `ietf-system:system-state` from multiple sub-collectors: hostname, users from `/etc/passwd` and `/etc/shadow` via `getent`, SSH authorized keys, timezone from `realpath /etc/localtime`, NTP sources via `chronyc`, DNS from `/etc/resolv.conf.head` and `resolvconf -l`, RAUC slot status via `rauc status`, init service list via `initctl`, boot-order from `fw_printenv`/`grub-editenv`, and resource usage from `/proc/meminfo`, `/proc/loadavg`, and `df`. + +**External commands invoked**: + +| Command / File | Purpose | +|---------------|---------| +| `hostname` | System hostname | +| `getent passwd` | User list with UID, shell path | +| `getent shadow` | Password hashes for users with 1000 ≤ UID < 10000 | +| `/var/run/sshd/.keys` (file read) | SSH authorized keys per user | +| `realpath /etc/localtime` | Resolve timezone symlink to zone name | +| `chronyc -c sources` | NTP source list: mode, state, address, stratum, poll, reach, offset | +| `/etc/resolv.conf.head` (file read) | Static DNS nameservers and search domains | +| `/sbin/resolvconf -l` | DHCP-assigned DNS nameservers | +| `rauc status --detailed --output-format=json` | RAUC software slot status: compatible, variant, booted slot, installed/activated timestamps | +| `rauc-installation-status` | In-progress upgrade: operation type, progress percentage, message | +| `initctl -j` | Finit service list: PID, identity, status, description, memory, uptime, restart-count | +| `fw_printenv BOOT_ORDER` | U-Boot boot order (preferred slot ordering) | +| `grub-editenv /mnt/aux/grub/grubenv list` | GRUB boot order (x86 targets) | +| `/etc/os-release` (file read) | OS name, version ID, build ID, architecture | +| `/proc/meminfo` (file read) | MemTotal, MemFree, MemAvailable | +| `/proc/loadavg` (file read) | 1-min, 5-min, 15-min load averages | +| `df -k ` | Filesystem size/used/available for `/`, `/var`, `/cfg` | +| `/proc/uptime` (file read) | System uptime in seconds (for boot-datetime calculation) | + +- `hostname` → fswatcher inotify on `/etc/hostname` (reactive, not polled by SystemCollector) +- `/etc/passwd`, `/etc/shadow` → fswatcher `Watch("/etc/shadow")` triggers `readUsers()` which reads both files via `os.ReadFile()` + line parsing (reactive, not polled by SystemCollector) +- `/var/run/sshd/.keys` → fswatcher `WatchDir("/var/run/sshd/")` triggers same `readUsers()` handler (reactive) +- `realpath /etc/localtime` → fswatcher `WatchSymlink` on `/etc/localtime` (reactive, not polled by SystemCollector); `readTimezone()` resolves symlink, strips zoneinfo prefix, handles Etc/GMT±N offset inversion +- NTP data → handled by `internal/collector/ntp.go` via `github.com/facebook/time/ntp/chrony` cmdmon protocol (not by system.go) +- `/etc/resolv.conf.head` → `os.ReadFile()` (polled by SystemCollector at 300s) +- `resolvconf -l` → `exec.Command("/sbin/resolvconf", "-l")` (polled by SystemCollector at 300s) +- `/etc/os-release`, `uname -r`, `uname -m` → `BootPlatform()` in `internal/collector/boot.go` (boot-once, not polled) +- `rauc status`, `rauc-installation-status` → `BootSoftware()` in `internal/collector/boot.go` (boot-once, not polled) +- `initctl -j` → `exec.Command("initctl", "-j")` (polled by SystemCollector at 300s) +- `fw_printenv` / `grub-editenv` → fswatcher on `/mnt/aux/grub/grubenv` and `/mnt/aux/uboot.env` triggers `ReadBootOrder()` in `internal/collector/boot.go` (reactive, not polled) +- `/proc/meminfo`, `/proc/loadavg`, `/proc/uptime` → `LiveSystemState()` on-demand provider (computed at IPC request time, not polled) +- `/proc/sys/net/ipv4/conf/*/forwarding` → `fswatcher` inotify (reactive updates) +- `df -k` → `syscall.Statfs()` per mount point in `LiveSystemState()` on-demand provider +- Strategy: **REACTIVE** for IP forwarding, hostname, timezone, users, SSH keys, and boot order (fswatcher inotify); **ON-DEMAND** for clock (current-datetime, boot-datetime), memory, load average, and filesystem usage (LiveSystemState provider); **BOOT-ONCE** for platform (os-release, uname) and software (RAUC slots, installation status); **POLLING** 300 seconds for DNS and services only + +--- +- All `chronyc -c` calls -> native Go cmdmon protocol via `github.com/facebook/time/ntp/chrony` over Unix socket `/var/run/chrony/chronyd.sock`. This eliminates all subprocess spawning for NTP data collection. The library speaks chrony's undocumented cmdmon protocol v6 natively, providing typed Go structs for tracking, sources, and sourcestats responses. +- Poll interval: configured via `YANGERD_POLL_INTERVAL_NTP` (default 60 seconds) + +### ietf_ntp.py → `internal/collector/ntp.go` + +**Python approach**: Calls `chronyc` three times per collection cycle: once for `sources` (NTP association list), once for `sourcestats` (offset/std-dev per source), and once for `tracking` (clock state, stratum, refid, root delay/dispersion). A fourth call to `serverstats` provides NTP server packet statistics. Additionally queries `ss -ulnp` to determine the listening UDP port. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `chronyc -c sources` | NTP source list: mode, state, address, stratum, poll, reach, lastRx, offset, error | +| `chronyc -c sourcestats` | Per-source statistics: estimated offset and standard deviation | +| `chronyc -c tracking` | Global clock state: stratum, refid, system offset, root delay, root dispersion, frequency, leap status | +| `chronyc -c serverstats` | Server statistics: packets received/dropped/sent/failed | +| `ss -ulnp` | UDP listening sockets — identify chronyd's NTP port | + +**Go replacement**: +- All `chronyc -c` calls -> native Go cmdmon protocol via `github.com/facebook/time/ntp/chrony` over Unix socket `/var/run/chrony/chronyd.sock`. This eliminates all subprocess spawning for NTP data collection. The library speaks chrony's undocumented cmdmon protocol v6 natively, providing typed Go structs for tracking, sources, and sourcestats responses. +- `ss -ulnp` -> parse `/proc/net/udp` directly for port 123 +- Poll interval: 60 seconds +- Investigation confirmed chrony has no D-Bus interface, no event-driven socket protocol, and no subscribe/push mechanism. The cmdmon protocol is strictly request-response (client sends `CMD_Request`, daemon sends `CMD_Reply` -- no server-initiated messages, no subscription opcodes). Polling is the only supported monitoring approach, per upstream chrony design. + +--- + +### infix_lldp.py → `internal/lldpmonitor/` + +**Python approach**: Queries lldpd for its LLDP neighbor database in JSON format via `lldpcli`. Parses per-interface chassis-id and port-id (with subtype mapping), constructs `remote-systems-data` entries grouped by local port name. + +**External commands invoked**: + +| Command | Purpose | +|---------|---------| +| `lldpcli show neighbors -f json` | LLDP neighbor table snapshot: per-interface chassis-id, port-id, age (for time-mark), rid | + +**Go replacement**: +- Snapshot polling is replaced with a persistent subprocess monitor: `lldpcli -f json0 watch` +- Stream parser handles pretty JSON objects delimited by blank lines (`\n\n`) and dispatches `lldp-added`, `lldp-updated`, `lldp-deleted` events +- `json0` output is required for structural stability (arrays always arrays) +- Strategy: **REACTIVE** via `internal/lldpmonitor/` (persistent subprocess + framing-aware parser) + +--- + +### infix_containers.py → `internal/collector/containers.go` (Phase 2, Feature-Gated) + +**Feature gate**: `YANGERD_ENABLE_CONTAINERS=true`. This collector is only active when container support is included in the Infix build. When `YANGERD_ENABLE_CONTAINERS=false`, the container collector is not started and no container data appears in the tree. + +**Python approach**: Lists all containers (including stopped) via `podman ps -a --format=json`. For each container, runs `podman inspect ` for network settings and cgroup path, reads cgroup resource limit files directly (`/sys/fs/cgroup/memory.max`, `cpu.max`), and runs `podman stats --no-stream --format json ` for live CPU/memory/IO/PID usage. + +**External commands invoked**: + +| Command / File | Purpose | +|---------------|---------| +| `podman ps -a --format=json` | Full container list: name, ID, image, state, status, ports, networks | +| `podman inspect ` | NetworkSettings (host mode detection), CgroupPath | +| `/sys/fs/cgroup/memory.max` (file read) | Container memory limit in bytes | +| `/sys/fs/cgroup/cpu.max` (file read) | Container CPU quota and period (for millicores calculation) | +| `podman stats --no-stream --format json --no-reset ` | Live resource usage: memory, CPU%, block I/O, network I/O, PIDs | + +**Go replacement (Phase 2)**: +- `podman ps -a --format=json` → `exec.Command("podman", "ps", "-a", "--format=json")` +- `podman inspect ` → `exec.Command("podman", "inspect", name)` +- cgroup file reads → `os.ReadFile(cgroupBasePath + "/memory.max")` etc. +- `podman stats` → `exec.Command("podman", "stats", "--no-stream", "--format", "json", "--no-reset", name)` +- Poll interval: 10 seconds + +--- + +### infix_dhcp_server.py → `internal/collector/dhcp.go` + +**Python approach**: Two data sources. (1) Reads the dnsmasq lease file directly (`/var/lib/misc/dnsmasq.leases`) — a whitespace-delimited flat file with expiry timestamp, MAC, IP, hostname, and client-id per line. (2) Queries dnsmasq DHCP statistics via D-Bus (`uk.org.thekelleys.dnsmasq` interface `GetMetrics()`) for offer/ack/nak/discover/request/release/inform counters. + +**External commands invoked**: + +| Command / Source | Purpose | +|-----------------|---------| +| `/var/lib/misc/dnsmasq.leases` (file read) | DHCP lease table: expiry epoch, MAC, IP, hostname, client-id | +| D-Bus `uk.org.thekelleys.dnsmasq` `GetMetrics()` | DHCP packet counters: offers, acks, naks, declines, discovers, requests, releases, informs | + +- Lease file (`/var/lib/misc/dnsmasq.leases`) → D-Bus Monitor `refreshDHCP()`, triggered by dnsmasq D-Bus signals (`DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`) +- D-Bus metrics → `godbus/dbus/v5` package: `bus.Object("uk.org.thekelleys.dnsmasq", ...).Call("GetMetrics", 0)`, called as part of the same `refreshDHCP()` handler +- Strategy: **REACTIVE** (D-Bus signals for lease events; metrics queried on each lease change) + +--- + +### infix_firewall.py → `internal/collector/firewall.go` + +**Python approach**: Queries firewalld entirely via D-Bus (no subprocess calls). Connects to `org.fedoraproject.FirewallD1` on the system bus and calls methods on the zone, policy, and service interfaces to enumerate active zones, policies, and services. Zone settings include interfaces, sources, services, port-forwards, and rich rules for ICMP filters. Policy settings include ingress/egress zones, action, priority, masquerade, and rich rules. + +**External commands invoked**: + +| Source | Method | Purpose | +|--------|--------|---------| +| D-Bus `org.fedoraproject.FirewallD1` | `getDefaultZone()`, `getLogDenied()`, `queryPanicMode()` | Global firewall state | +| D-Bus `org.fedoraproject.FirewallD1.zone` | `getActiveZones()`, `getZoneSettings2()` | Active zone list and per-zone settings (interfaces, sources, services, port-forwards) | +| D-Bus `org.fedoraproject.FirewallD1.policy` | `getPolicies()`, `getPolicySettings()` | Policy list and per-policy settings (ingress/egress zones, action, priority, rich-rules) | +| D-Bus `org.fedoraproject.FirewallD1` | `listServices()`, `getServiceSettings2()` | Service definitions with port/protocol | + +**Go replacement**: +- Data source preserved from Python: firewalld D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`) +- The Go implementation uses `godbus/dbus/v5` `Object.CallWithContext()` to invoke the same firewalld D-Bus methods that the Python code uses via `dbus.Interface` +- No subprocess execution (`nft`, `iptables`, etc.) is involved -- all data flows through the firewalld D-Bus API +- Trigger: firewalld D-Bus signals (`Reloaded` + `NameOwnerChanged` for restart detection) via D-Bus Monitor (Section 4.1novies) +- Strategy: **REACTIVE** (D-Bus signal trigger + D-Bus method call data retrieval) + +--- + +### Summary Table + +| Python Module | Go File | Primary Method | Phase | +|--------------|---------|----------------|-------| +| `ietf_interfaces/__init__.py` + `link.py` | `internal/monitor/link.go` + `internal/monitor/addr.go` | `vishvananda/netlink` LinkUpdate + AddrUpdate channels (event trigger) + full re-read via ip batch on every event (link: 3 queries + ethmonitor; addr: 1 query) | 1 | +| `ietf_interfaces/ip.py` | `internal/monitor/addr.go` | `vishvananda/netlink` AddrUpdate channel (event trigger) + full address re-read via `ip -json -batch` on every RTM_NEWADDR/RTM_DELADDR + `os.ReadFile` /proc/sys | 1 | +| `ietf_interfaces/ethernet.py` | `internal/collector/ethernet.go` + `internal/ethmonitor/` | `mdlayher/ethtool` + `mdlayher/genetlink` (reactive settings via ETHNL_MCGRP_MONITOR; polling stats) | 1 | +| `ietf_interfaces/bridge.py` | `internal/collector/bridge.go` | Bridge netlink event triggers (FDB via NeighUpdate, VLAN via LinkUpdate, STP via LinkUpdate with IFLA_BRPORT_STATE, MDB via raw RTNLGRP_MDB) + `bridge -json -batch -` re-reads | 1 | +| `ietf_interfaces/wifi.py` | `internal/collector/wifi.go` + `internal/iwmonitor/` | `iw event -t` (reactive) + `exec.Command` iw + wpa_cli | 2 (feature-gated: `YANGERD_ENABLE_WIFI`) | +| `ietf_interfaces/wireguard.py` | `internal/collector/wireguard.go` | `golang.zx2c4.com/wireguard/wgctrl` | 1 | +| `ietf_interfaces/vlan.py` | `internal/monitor/link.go` (inline) | `ip -json -batch` link query (linkinfo.info_data fields) | 1 | +| `ietf_interfaces/container.py` | `internal/collector/container_ifaces.go` | `exec.Command` ip-netns (cannot share ip batch across namespaces) | 2 (feature-gated: `YANGERD_ENABLE_CONTAINERS`) | +| `ietf_routing.py` | `internal/zapiwatcher/` + `internal/collector/routing.go` | ZAPI v6 streaming connection to zebra (`osrg/gobgp/v4/pkg/zebra`) for route redistribution; replaces `vtysh` for route table collection. See Section 4.1octies. | 1 | +| `ietf_ospf.py` | `internal/collector/ospf.go` | `exec.Command` vtysh | 2 | +| `ietf_rip.py` | `internal/collector/rip.go` | `exec.Command` vtysh | 2 | +| `ietf_bfd_ip_sh.py` | `internal/collector/bfd.go` | `exec.Command` vtysh | 2 | +| `ietf_hardware.py` | `internal/collector/hardware.go` | `os.ReadFile` sysfs (sensors, polling 10s) + `exec.Command` dmidecode (inventory) | 2 | +| `ietf_system.py` | `internal/collector/system.go` (DNS+services polling) + `internal/collector/live.go` (on-demand clock/memory/load/fs) + `internal/collector/boot.go` (boot-once platform/software, reactive boot-order) + fswatcher reactive handlers in `main.go` (hostname, timezone, users, SSH keys, boot order) | `os.ReadFile` /etc/* + `exec.Command` initctl/resolvconf + on-demand `LiveSystemState()` + boot-once `BootPlatform()`/`BootSoftware()` + fswatcher `Watch`/`WatchSymlink`/`WatchDir` | 2 | +| `ietf_ntp.py` | `internal/collector/ntp.go` | `github.com/facebook/time/ntp/chrony` cmdmon protocol over Unix socket | 2 | +| `infix_lldp.py` | `internal/lldpmonitor/` | Persistent `lldpcli -f json0 watch` subprocess + stream parser (`lldp-added`/`updated`/`deleted`) | 2 | +| `(new — from statd/avahi.c)` | `internal/mdnsmonitor/` | Avahi D-Bus `ServiceTypeBrowser`/`ServiceBrowser`/`ServiceResolver` signals | 1 | +| `infix_containers.py` | `internal/collector/containers.go` | `exec.Command` podman + `os.ReadFile` cgroup | 2 (feature-gated: `YANGERD_ENABLE_CONTAINERS`) | +| `infix_dhcp_server.py` | `internal/dbusmonitor/dbusmonitor.go` | D-Bus Monitor: dnsmasq signals (`DHCPLeaseAdded`/`Deleted`/`Updated`) → `refreshDHCP()` (lease file re-read + `GetMetrics()`) | 1 | +| `infix_firewall.py` | `internal/dbusmonitor/dbusmonitor.go` | D-Bus Monitor: firewalld signals (`Reloaded` + `NameOwnerChanged`) → `refreshFirewall()` (firewalld D-Bus method calls: zones, policies, services, global state) | 1 | +## 6. Project Structure + +The yangerd Go module lives at `src/yangerd/` inside the Infix repository, following the existing Infix pattern where each daemon is a self-contained subdirectory under `src/`. + +``` +src/yangerd/ +├── cmd/ +│ ├── yangerd/ +│ │ └── main.go # daemon entry point: flag parsing, signal handling, errgroup +│ └── yangerctl/ +│ └── main.go # CLI tool: subcommands get/health/dump/watch +├── internal/ +│ ├── tree/ +│ │ └── tree.go # Tree type: per-model sync.RWMutex + map[string]*modelEntry + on-demand providers +│ ├── monitor/ +│ │ ├── link.go # RTNLGRP_LINK goroutine: full interface re-read (3 ip batch queries) + ethmonitor.RefreshInterface() + last-change +│ │ ├── addr.go # RTNLGRP_*IFADDR goroutine: full address re-read via ip batch on any addr event +│ │ ├── neigh.go # RTNLGRP_NEIGH goroutine: full neighbor re-read via ip batch on any neigh event +│ ├── collector/ +│ │ ├── collector.go # Collector interface + RunAll() loop +│ │ ├── bridge.go # Bridge STP/VLAN/FDB: exec bridge + /sys/class/net +│ │ ├── wifi.go # WiFi state: exec iw dev +│ │ ├── wireguard.go # WireGuard peers: wgctrl +│ │ ├── ethtool.go # Ethernet speed/duplex/autoneg: mdlayher/ethtool +│ │ ├── ospf.go # OSPF state: exec vtysh -c 'show ip ospf ...' +│ │ ├── rip.go # RIP state: exec vtysh +│ │ ├── bfd.go # BFD sessions: exec vtysh +│ │ ├── hardware.go # Hardware sensors + inventory: /sys/class/hwmon + dmidecode +│ │ ├── system.go # System state: /etc/os-release, uname, users, software status, DNS +│ │ ├── live.go # On-demand system state: LiveSystemState() reads /proc/uptime, /proc/meminfo, /proc/loadavg, syscall.Statfs at IPC request time +│ │ ├── ntp.go # NTP sync status: chrony cmdmon protocol via facebook/time +│ │ ├── lldp.go # LLDP transform helpers (fed by LLDP monitor events) +│ │ ├── containers.go # Container state: exec podman ps (Phase 2, feature-gated: YANGERD_ENABLE_CONTAINERS) +│ │ ├── dhcp.go # DHCP lease parsing: parseDnsmasqLeases() + buildDHCPTree() (called by D-Bus Monitor) +│ │ └── firewall.go # Firewall state: buildFirewallTree() from firewalld D-Bus data (called by D-Bus Monitor) +│ ├── ipc/ +│ │ ├── server.go # Unix socket listener + connection handler goroutines +│ │ ├── client.go # Client dial/query helper (used by yangerctl) +│ │ └── protocol.go # Request/Response types + marshal/unmarshal +│ ├── ipbatch/ +│ │ └── batch.go # IPBatch subprocess manager: persistent ip -json -force -batch - +│ ├── fswatcher/ +│ │ └── fswatcher.go # inotify/fsnotify goroutine: watches procfs forwarding flags, /etc/hostname, /etc/localtime (WatchSymlink) +│ ├── nlmonitor/ +│ │ └── nlmonitor.go # NLMonitor: vishvananda/netlink subscriptions (link, addr, neigh, and bridge FDB/VLAN/MDB). Route data comes from ZAPI watcher -- no netlink route group subscription. +│ ├── iwmonitor/ +│ │ ├── monitor.go # iw event -t subprocess manager + event parser +│ │ └── query.go # Short-lived iw re-query helpers (info, station dump) +│ ├── lldpmonitor/ +│ │ └── monitor.go # lldpcli -f json0 watch subprocess manager + framed JSON parser +│ ├── ethmonitor/ +│ │ └── ethmonitor.go # Ethtool genetlink monitor: ETHNL_MCGRP_MONITOR subscription +│ ├── zapiwatcher/ +│ │ └── zapiwatcher.go # ZAPI watcher: connects to zebra zserv socket, subscribes to route redistribution, maintains route tree with reconnection +│ ├── dbusmonitor/ +│ │ └── dbusmonitor.go # D-Bus Monitor: dnsmasq lease signals + firewalld reload signals → reactive data refresh +│ ├── mdnsmonitor/ +│ │ └── mdnsmonitor.go # Avahi D-Bus monitor: reactive mDNS service browse/resolve updates +│ ├── scheduler/ +│ │ └── scheduler.go # Runs collectors via time.NewTicker at configured intervals +│ └── config/ +│ └── config.go # Config struct: socket path, polling intervals, log level +├── go.mod # module github.com/kernelkit/infix/src/yangerd +├── go.sum +└── Makefile # cross-compilation targets for Buildroot integration +``` + +### Package Descriptions + +**`cmd/yangerd/`** — The main daemon entry point. Initializes the in-memory tree, configuration, and all monitor/collector subsystems. Orchestrates the startup sequence (Section 4.2.2) and runs them under a single `errgroup.Group` used purely as a goroutine join point for clean shutdown. All `Run()` methods follow a strict error-swallowing contract: internal failures (subprocess crashes, netlink subscription errors, collector timeouts) are logged and retried internally within each goroutine. A `Run()` method only returns when `ctx.Done()` fires (i.e., on SIGTERM/SIGINT). This ensures that a single collector failure never propagates to cancel unrelated monitors or the IPC server. + +**`cmd/yangerctl/`** — A developer and operator CLI tool. Subcommands include `get ` (query yangerd and print JSON), `health` (display monitor status and tree size), `dump` (print the full in-memory tree), and `watch ` (poll and print changes). Connects to `/run/yangerd.sock` using the same IPC protocol as statd. + +**`internal/tree/`** -- The `Tree` type: a `map[string]*modelEntry` where each `modelEntry` holds its own `sync.RWMutex`, its `updated` timestamp, and a `json.RawMessage`. A top-level `sync.RWMutex` protects the map structure and a `providers` map for on-demand data functions. Provides `Set(key, raw)`, `Get(key) json.RawMessage`, `GetMulti(keys) []json.RawMessage`, `RegisterProvider(key, fn)`, and `LastUpdated(key)` accessor. When a provider is registered for a key, Get()/GetMulti() invokes the provider and shallow-merges the result with cached data — live fields override stale cached values without mutating the cache. This package has no external dependencies -- it only imports `sync` and `encoding/json` from the standard library. + +**`internal/monitor/`** — Event dispatcher goroutines that consume native Go netlink channel events via `vishvananda/netlink` subscriptions (LinkUpdate, AddrUpdate, NeighUpdate) and trigger state re-queries. For link, address, and neighbor events, re-queries go through the `ip -json -force -batch -` subprocess. Route data is sourced exclusively from the ZAPI watcher (see `internal/zapiwatcher/`) -- yangerd does not subscribe to netlink route groups. Each monitor follows the `Run(ctx context.Context) error` signature and calls `tree.Set()` after parsing the JSON response. + +**`internal/ipbatch/`** — Manages the persistent `ip -json -force -batch -` subprocess for state queries. `batch.go` implements the `IPBatch` type that maintains a persistent `ip -json -force -batch -` process, writing query commands to stdin and reading JSON array responses from stdout. Includes health monitoring, automatic restart with exponential backoff, and canary-query validation after restarts. This package handles data acquisition only — event monitoring is handled by the `internal/nlmonitor/` package via native Go netlink subscriptions. + +**`internal/collector/`** — Polling-based supplementary collectors for data not exposed via netlink. Additionally, `live.go` provides `LiveSystemState()`, an exported function registered as an on-demand provider for `ietf-system:system-state`. It reads `/proc/uptime`, `/proc/meminfo`, `/proc/loadavg`, and calls `syscall.Statfs()` on each IPC request, producing fresh JSON that is shallow-merged with the cached system-state data by the Tree's provider mechanism. Each file implements the `Collector` interface: + +```go +type Collector interface { + Name() string + Interval() time.Duration + Collect(ctx context.Context, tree *tree.Tree) error +} +``` + +`collector.go` provides `RunAll(ctx, collectors, tree)`, which launches one goroutine per collector and ticks it on its configured interval. Failed `Collect()` calls are logged and retried on the next tick — a single collector failure does not affect other collectors or the IPC server. + +**`internal/fswatcher/`** — Implements reactive monitoring for filesystem-based data sources that support inotify. It runs a single event loop that subscribes to inotify events via the `fsnotify` library. Paths like `/proc/sys/net/ipv4/conf/*/forwarding` (IP forwarding flags) and `/etc/hostname` are added at startup via `Watch()` (with glob expansion for wildcards). Symlink paths like `/etc/localtime` use `WatchSymlink()`, which watches the parent directory instead of the file — necessary because `fsnotify` follows symlinks, so a direct watch would monitor the target file rather than the symlink entry, missing symlink replacements. Modification events trigger a debounced re-read of the affected file, updating the tree immediately. Handles the `IN_IGNORED` event by automatically re-adding watches after file deletion/recreation. Note: sysfs pseudo-files under `/sys/class/hwmon/` and `/sys/class/thermal/` are NOT watched here — they do not emit inotify events (the kernel generates values on `read()`) and are instead collected by `collector/hardware.go` via polling. Bridge STP state is NOT watched here — it is handled reactively via netlink events and `bridge -json -batch -` re-reads. DHCP leases and firewall state are NOT watched here — they are handled reactively via D-Bus signals in `internal/dbusmonitor/`. + +**`internal/nlmonitor/`** — Implements native Go netlink event subscriptions via `vishvananda/netlink`. The `NLMonitor` struct subscribes to LinkUpdate, AddrUpdate, and NeighUpdate channels, plus bridge-specific events (FDB entries via NeighUpdate with NDA_MASTER flag, VLAN changes via LinkUpdate, STP port state changes via LinkUpdate with IFLA_BRPORT_STATE in IFLA_PROTINFO, MDB entries via raw RTNLGRP_MDB subscription). All bridge events are used as triggers only — full state is re-read via the `bridge -json -batch -` subprocess. yangerd does NOT subscribe to netlink route groups (RTNLGRP_IPV4_ROUTE, RTNLGRP_IPV6_ROUTE) — route data is sourced exclusively from the ZAPI watcher. A single `select` loop dispatches events to the appropriate monitor goroutines. Uses context cancellation for clean shutdown and a shared error callback for automatic re-subscription on netlink errors. +After any subscription error, the error callback performs a full-scope re-read of ALL entities for the affected event type — not just the entity that was being processed when the error occurred. For example, a link subscription error triggers `ip -json -force -batch -` queries for every known interface (link show, addr show, neigh show) to resynchronize the entire tree. Events that occurred during the error/resubscribe window are inherently lost (netlink provides no replay), so only a full re-read guarantees consistency. + + +**`internal/zapiwatcher/`** — Implements a streaming ZAPI client that connects to FRR zebra's zserv unix domain socket (`/var/run/frr/zserv.api`) and subscribes to route redistribution notifications. On startup, the watcher sends `ZEBRA_HELLO`, `ZEBRA_ROUTER_ID_ADD`, and `ZEBRA_REDISTRIBUTE_ADD` messages for each route type (kernel, connected, static, OSPF, RIP), which causes zebra to send a full RIB dump followed by incremental `REDISTRIBUTE_ROUTE_ADD` and `REDISTRIBUTE_ROUTE_DEL` notifications. Routes are parsed from `IPRouteBody` into the in-memory tree keyed by prefix and protocol. This captures routes that exist in zebra's RIB but not in the Linux kernel FIB (unresolvable nexthop, lost admin-distance election, ECMP overflow, table-map filtered). Reconnection is automatic with exponential backoff (100ms initial, 30s max, factor 2x); on reconnect, the full subscription handshake is replayed and the route subtree is replaced atomically to clear stale entries. Uses `github.com/osrg/gobgp/v4/pkg/zebra` for ZAPI v6 message framing. The watcher signals readiness via the same `sync.WaitGroup` mechanism used by the netlink monitors. +On disconnection, the watcher immediately clears the route subtree from the in-memory tree (`tree.Set("ietf-routing:routing", nil)`) rather than serving stale routes. This makes the data gap explicit: during the disconnection window, route queries return empty data. This matches the principle that stale routing data is worse than absent routing data — an operator seeing no routes knows something is wrong, whereas stale routes may silently black-hole traffic. On successful reconnect, the full RIB dump repopulates the subtree atomically. + + +**`internal/dbusmonitor/`** — Implements reactive monitoring for dnsmasq DHCP lease events and firewalld configuration reloads via D-Bus signal subscriptions. The `DBusMonitor` struct connects to the system D-Bus bus, subscribes to dnsmasq signals (`DHCPLeaseAdded`, `DHCPLeaseDeleted`, `DHCPLeaseUpdated`) and firewalld signals (`Reloaded`), and monitors `NameOwnerChanged` for service lifecycle detection. On dnsmasq signals, it re-reads `/var/lib/misc/dnsmasq.leases` and calls `GetMetrics()` via D-Bus method call, combining lease data and packet counters into the YANG tree. On firewalld signals, it re-reads the full firewall state via firewalld D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`) and transforms the results into the YANG tree. Service absence is handled via `NameOwnerChanged`: when a service stops, the corresponding tree key is cleared to empty; when it starts, a full data refresh is performed. Reconnection to the D-Bus bus is automatic with exponential backoff (100ms initial, 30s max, factor 2x). Follows the same `Run(ctx context.Context) error` signature as all other monitors. Uses `github.com/godbus/dbus/v5` for D-Bus connectivity. See Section 4.1novies. + +**`internal/iwmonitor/`** — Manages the persistent `iw event -t` subprocess for reactive 802.11 wireless monitoring. The `monitor.go` file contains the `IWMonitor` struct, the event parsing logic (`parseIWEvent`), and the main event loop goroutine. The `query.go` file provides helper functions that spawn short-lived `exec.Command("iw", ...)` subprocesses to re-query WiFi state (interface info, station list) in response to events. The package is initialized only when `YANGERD_ENABLE_WIFI=true`; when WiFi is not included in the build, it is not started. + +**`internal/ethmonitor/`** — Implements the native Go genetlink subscription to the kernel's ethtool `ETHNL_MCGRP_MONITOR` multicast group for reactive Ethernet settings monitoring. The `ethmonitor.go` file contains the `EthMonitor` struct, which holds a `genetlink.Conn` (for the multicast subscription) and an `ethtool.Client` (for typed re-queries). On receiving `ETHTOOL_MSG_LINKINFO_NTF` or `ETHTOOL_MSG_LINKMODES_NTF` notifications, the monitor re-queries speed, duplex, and auto-negotiation via the ethtool client and writes updated values to the in-memory tree. Unlike `iwmonitor` and `brmonitor`, this package does not manage a subprocess—it uses a native genetlink socket. The `RefreshInterface(ifname)` public method is called by the link monitor on every RTM_NEWLINK event, since ETHNL_MCGRP_MONITOR does not fire on link state transitions. On Infix's target kernel (6.18), ethtool netlink is unconditionally available — the ethmonitor is always active. If the `ETHNL_MCGRP_MONITOR` subscription fails at startup (e.g., on a developer machine with an older kernel), ethmonitor logs a fatal error and does not start. There is no fallback to polling — developers must test on the target kernel or a kernel version that supports ethtool genetlink (5.6+). + +**`internal/ipc/`** — The Unix domain socket server (`AF_UNIX SOCK_STREAM`). `server.go` accepts connections in a loop and dispatches each to a short-lived goroutine that reads the 4-byte big-endian length header, reads the JSON request body, acquires a tree read lock, serializes the response, and writes the framed reply. `protocol.go` defines the `Request` and `Response` structs and their JSON marshalling. +The IPC server always serves whatever data is currently in the in-memory tree, regardless of whether underlying subprocesses (ip batch, bridge batch) are temporarily unavailable due to restarts. During a subprocess restart window, the tree contains the last successfully collected state. This means IPC responses may reflect slightly stale data during the restart gap (typically under 30 seconds), but the server never blocks or returns errors due to subprocess unavailability — only due to protocol-level issues (malformed request, unknown path). + + +**`internal/config/`** — The `Config` struct read from a TOML or environment-variable source. Fields include socket path (default `/run/yangerd.sock`), per-collector polling intervals, log level, startup timeout before the IPC server begins accepting connections, and three boolean feature flags (`EnableWiFi`, `EnableContainers`, `EnableGPS`) parsed from the `YANGERD_ENABLE_WIFI`, `YANGERD_ENABLE_CONTAINERS`, and `YANGERD_ENABLE_GPS` environment variables. Parsing defaults each flag to `true` when the env var is unset. In production, Buildroot writes `/etc/default/yangerd` explicitly and sets unsupported features to `false`; therefore disabled packages are still disabled. The phrase “missing file enables all features” refers specifically to `/etc/default/yangerd` being absent (all vars unset → parser defaults apply). + +### Key Dependencies + +| Module | Version | Purpose | +|--------|---------|---------| +| iproute2 (`ip`, `bridge`) | system | Persistent `ip -json -force -batch -` subprocess for link/addr/neigh state queries; `bridge -json -batch -` for VLAN/FDB/STP queries. Event monitoring is handled natively via `vishvananda/netlink`, not via `ip monitor`. | +| github.com/fsnotify/fsnotify | v1.x | Cross-platform inotify for reactive file watching (procfs forwarding flags). Bridge STP state is handled via netlink events, not fsnotify. DHCP leases and firewall state are handled via D-Bus signals, not fsnotify. Note: sysfs pseudo-files (`/sys/class/hwmon/*`, `/sys/class/thermal/*`) are NOT monitored via fsnotify — they do not emit inotify events and are polled by `collector/hardware.go` instead. | +| github.com/vishvananda/netlink | v1.x | Native Go netlink subscriptions for reactive event monitoring: `LinkSubscribeWithOptions`, `AddrSubscribeWithOptions`, `NeighSubscribeWithOptions`. Also provides bridge FDB events (via NDA_MASTER flag on NeighUpdate), bridge VLAN events (via LinkUpdate), bridge STP port state events (via LinkUpdate with IFLA_BRPORT_STATE in IFLA_PROTINFO), and bridge MDB events (via raw RTNLGRP_MDB subscription). All bridge events are triggers only — full state re-read via `bridge -json -batch -`. Route data comes from the ZAPI watcher — no netlink route group subscription. | +| iw (iw tool) | system | Persistent `iw event -t` subprocess for reactive 802.11 wireless events; short-lived `iw dev info` and `iw dev station dump` for re-queries. Feature-gated: only active when `YANGERD_ENABLE_WIFI=true`. | +| github.com/mdlayher/ethtool | v0.x | Ethernet speed/duplex/autoneg queries via ethtool genetlink; used by both the polling collector (stats) and the ethmonitor (re-queries after notifications) | +| github.com/mdlayher/genetlink | v1.x | Generic netlink socket for subscribing to ETHNL_MCGRP_MONITOR multicast group; provides `Conn.JoinGroup()` + `Conn.Receive()` for native ethtool notification reception | +| github.com/godbus/dbus/v5 | v5.x | D-Bus system bus connection for the D-Bus Monitor Subsystem (Section 4.1novies): signal subscriptions for dnsmasq DHCP lease events and firewalld reload notifications, plus `GetMetrics()` method calls on dnsmasq for DHCP packet counters | +| github.com/facebook/time/ntp/chrony | latest | Native Go implementation of chrony's cmdmon protocol v6. Provides typed request/response structs for tracking, sources, sourcestats, and activity queries over the Unix socket at `/var/run/chrony/chronyd.sock`. Eliminates `exec chronyc` subprocess spawning for NTP data collection. Used by `internal/collector/ntp.go`. Apache-2.0 license; production-tested in Facebook's ntpcheck and Prometheus chrony_exporter. | +| golang.zx2c4.com/wireguard/wgctrl | v0.x | WireGuard peer statistics | +| golang.org/x/sync/errgroup | latest | Monitor goroutine lifecycle management | +| github.com/osrg/gobgp/v4 | latest | ZAPI client library for connecting to FRR zebra's zserv unix socket. Provides ZAPI v6 message framing, `ZEBRA_REDISTRIBUTE_ADD` subscription, and `IPRouteBody` parsing for route prefix, protocol, distance, metric, nexthops, and flags. Used by `internal/zapiwatcher/`. | + +All Go dependencies are pure Go with no CGo requirement. The module graph avoids C bindings entirely, which is a hard constraint for cross-compilation in Buildroot. The `iproute2` runtime dependency is always present on Infix targets as part of the base system. + +## 7. Deployment & Operations + + +The `go.mod` module path `github.com/kernelkit/infix/src/yangerd` matches the Infix directory structure, making the module self-describing with respect to its source location. + +Cross-compilation for embedded targets requires only standard Go toolchain invocation — no CGo means no cross-C-compiler complexity: + +```bash +GOARCH=arm64 GOOS=linux go build ./cmd/yangerd +GOARCH=arm GOOS=linux GOARM=7 go build ./cmd/yangerd +``` + +The canonical target build is via a Buildroot package at `package/yangerd/yangerd.mk`, using the standard `golang-package` infrastructure with `BR2_PACKAGE_YANGERD`. The `Makefile` in `src/yangerd/` is for local host development and static analysis only, mirroring the pattern used by other native daemons under `src/`. + +## Deployment +### Finit Service File + +yangerd is managed by finit, Infix's init system. The service definition lives at `/etc/finit.d/yangerd.conf`: + +``` +# yangerd — operational data daemon +service [S12345] env:-/etc/default/yangerd \ + log:prio:daemon.notice,tag:yangerd \ + yangerd -- yangerd operational data daemon +``` + +Key attributes: + +- **Runlevels `S,1,2,3,4,5`**: yangerd starts during system initialisation (runlevel S) and remains running through all multi-user runlevels. +- **Condition ``**: yangerd starts as soon as PID 1 (finit itself) is fully running. No external network or storage condition is required — yangerd only needs the kernel's netlink subsystem, which is always available. +- **Ordering relative to statd**: statd declares yangerd as a hard dependency via its finit condition. statd will not start until yangerd's service condition is satisfied. The finit condition for statd is: + +``` +service [2345] env:-/etc/default/statd \ + log:prio:daemon.notice,tag:statd \ + statd -- statd operational datastore daemon +``` + +The `` condition is *mandatory*: statd requires yangerd to be running. If yangerd fails to start, statd will not proceed -- there is no Python fallback path. This ensures that operational data always comes from yangerd's in-memory tree. + +### Socket + +yangerd creates and owns `/run/yangerd.sock` (type `SOCK_STREAM`, Unix domain) at startup. **There is no socket activation**: yangerd creates the socket itself by calling `net.Listen("unix", "/run/yangerd.sock")` before accepting connections. The file is removed on clean shutdown via a `defer os.Remove(...)` registered in `main()`. If a stale socket file exists at startup (e.g. after a crash), yangerd removes it before binding. + +Access to the socket is restricted to the `statd` group (`chmod 0660`). statd runs as a member of the `statd` group to permit reads without requiring root. + +### Environment Variables + +Three feature flags (`YANGERD_ENABLE_WIFI`, `YANGERD_ENABLE_CONTAINERS`, `YANGERD_ENABLE_GPS`) control runtime feature flags for optional subsystems. + +| Variable | Default | Description | +|----------|---------|-------------| +| `YANGERD_SOCKET` | `/run/yangerd.sock` | Path to the Unix domain socket yangerd creates and listens on. Override in tests or multi-instance setups. | +| `YANGERD_LOG_LEVEL` | `info` | Log verbosity: `trace`, `debug`, `info`, `warn`, `error`. Parsed at startup; changing the file requires a restart. | +| `YANGERD_TIMEOUT_MS` | `50` | Milliseconds statd waits for a response from yangerd on a single IPC request. Exceeding this timeout causes `ly_add_yangerd_data()` to return `SR_ERR_INTERNAL` for that request. | +| `YANGERD_STARTUP_TIMEOUT` | `5s` | How long yangerd waits, after launching all monitors, for the initial state dump goroutine to complete before marking itself `ready`. A Go `time.Duration` string (e.g. `5s`, `10s`). | +| `YANGERD_POLL_INTERVAL_OSPF` | `10s` | Interval between OSPF collector runs (executes `vtysh -c 'show ip ospf json'`). Longer interval reduces load from FRR queries. | +| `YANGERD_POLL_INTERVAL_NTP` | `60s` | Interval between NTP collector runs (queries chrony via native cmdmon protocol). Default is higher than OSPF because NTP state changes are infrequent. | +| `YANGERD_ENABLE_WIFI` | `true` | Enable WiFi operational data collection (IW Event Monitor + WiFi polling collector). Set to `false` by the Buildroot recipe when WiFi support (`BR2_PACKAGE_IW`) is not included in the build. When `false`, the `iwmonitor` and `collector/wifi.go` subsystems are not started and no WiFi-related data appears in the tree. | +| `YANGERD_ENABLE_CONTAINERS` | `true` | Enable container operational data collection (podman collector). Set to `false` by the Buildroot recipe when container support (`BR2_PACKAGE_PODMAN`) is not included in the build. When `false`, the `collector/containers.go` subsystem is not started and no container data appears in the tree. | +| `YANGERD_ENABLE_GPS` | `true` | Enable GPS/GNSS operational data collection. Set to `false` by the Buildroot recipe when GPS support is not included in the build. When `false`, GPS-related data in the hardware collector is skipped. | + +**Configuration reload policy**: All configuration (socket path, log level, polling intervals, feature flags) is read once at startup from environment variables. There is no hot-reload mechanism — changing any setting requires a daemon restart (`initctl restart yangerd`). This simplifies the implementation by avoiding concurrent access to configuration values and ensures that the daemon's behavior is deterministic for its entire lifetime. + + +### Startup Sequence + +The following steps occur in order during yangerd startup: + +1. **Parse environment**: read `YANGERD_SOCKET`, `YANGERD_LOG_LEVEL`, all interval variables, and the three feature flags (`YANGERD_ENABLE_WIFI`, `YANGERD_ENABLE_CONTAINERS`, `YANGERD_ENABLE_GPS`). +2. **Create socket**: call `net.Listen("unix", socketPath)` and `chmod 0660` the resulting file. From this point, connection attempts from statd will queue in the kernel backlog. +3. **Initialise tree**: create the `tree.Tree` (empty per-model-locked map). +4. **Launch monitors**: start the NLMonitor (netlink subscriptions for link, addr, neigh, and bridge events), goroutines for `monitor.Link`, `monitor.Addr`, and `monitor.Neigh` (always active), and the ZAPI watcher (`internal/zapiwatcher/`, connects to zebra's zserv socket for route data). Conditionally start feature-gated subsystems: + - **Always**: NLMonitor (netlink subscriptions), `monitor.Link`, `monitor.Addr`, `monitor.Neigh`, ZAPI watcher (route data via zserv), bridge batch, `ethmonitor`, `fswatcher` + - **If `YANGERD_ENABLE_WIFI=true`**: start `iwmonitor` (persistent `iw event -t` subprocess) and `collector/wifi.go` + - **If `YANGERD_ENABLE_CONTAINERS=true`**: start `collector/containers.go` and `collector/container_ifaces.go` + - **If `YANGERD_ENABLE_GPS=true`**: enable GPS data collection in `collector/hardware.go` + Each monitor immediately calls its corresponding list API to populate an initial snapshot: + - `link.LinkList()` — enumerates all existing links + - `addr.AddrList(nil, netlink.FAMILY_ALL)` — enumerates all addresses on all links + - `neigh.NeighList(0, netlink.FAMILY_ALL)` — enumerates all ARP/NDP entries + - ZAPI watcher: sends `ZEBRA_HELLO` + `ZEBRA_ROUTER_ID_ADD` + `ZEBRA_REDISTRIBUTE_ADD` per route type; zebra responds with a full RIB dump + - fswatcher: calls `InitialRead()` after glob expansion and watch setup — reads every watched procfs file (forwarding flags) and populates the tree (sub-millisecond, synchronous) + Note: only `LinkSubscribeWithOptions{ListExisting: true}` auto-delivers existing entries via the event channel; the address and neighbour monitors must call their respective list APIs explicitly. Route data is bootstrapped by the ZAPI watcher's redistribution subscription, which triggers a full dump from zebra. +5. **Launch initial dump goroutine**: a separate goroutine waits for the three netlink monitor goroutines and the ZAPI watcher to signal completion of their initial data load (via a `sync.WaitGroup`). Once all four complete, it sets the daemon-wide `ready` flag to `true`. +6. **Start IPC accept loop**: the main goroutine begins accepting connections from `/run/yangerd.sock`. +7. **While `ready == false`**: any incoming IPC request returns immediately with a JSON response `{"status": "starting", "code": 503}`. statd treats `code == 503` as a transient unavailability signal and returns `SR_ERR_INTERNAL` to sysrepo for that request. The management client sees an empty operational subtree during this brief window (typically under one second). + +#### Startup Readiness Protocol + +The `ready` flag transitions from `false` to `true` only when ALL of the following initial data loads complete: + +1. **NLMonitor**: `LinkList()`, `AddrList()`, and `NeighList()` have each returned and populated the tree. +2. **ZAPI watcher**: The initial RIB dump from zebra is complete (all `REDISTRIBUTE_ROUTE_ADD` messages for the initial dump have been received and processed). +3. **BridgeBatch**: Initial `vlan show`, `fdb show`, and `mdb show` queries have completed. +4. **FSWatcher**: `InitialRead()` has completed — all watched procfs files (forwarding flags) have been read and their values stored in the tree. This completes sub-millisecond but is included in the WaitGroup for correctness, ensuring forwarding state is never absent from the tree when the daemon begins serving queries. + +Each component signals completion via a shared `sync.WaitGroup`. The `main()` goroutine calls `wg.Wait()` with a timeout of `YANGERD_STARTUP_TIMEOUT` (default `5s`). If the timeout expires before all components signal, the daemon logs a warning identifying which components have not yet completed and sets `ready = true` anyway — serving partial data is preferable to blocking statd indefinitely. + +If a required data source (e.g., zebra) is permanently unavailable, the startup timeout ensures the daemon becomes ready within a bounded time. The affected tree keys will be empty until the source becomes available, and the health endpoint will report the specific subsystem as `"failed"`. + +#### Graceful Shutdown Sequence + +On SIGTERM or SIGINT, yangerd performs an ordered shutdown: + +1. **Stop accepting**: Close the `net.Listener` to stop accepting new IPC connections. In-flight handler goroutines continue to completion. +2. **Cancel context**: Cancel the root `context.Context`, which propagates to all monitor and collector goroutines. +3. **Drain monitors**: Wait for all `Run()` methods to return (they return on `ctx.Done()`). The `errgroup.Wait()` call blocks until all goroutines exit. +4. **Drain batch subprocesses**: `IPBatch` and `BridgeBatch` close their stdin pipes, causing the subprocesses to exit. Wait for process exit to avoid zombies. +5. **Remove socket**: `os.Remove("/run/yangerd.sock")` via `defer` in `main()`. + +The entire shutdown completes within 5 seconds under normal conditions. If a subprocess hangs, `exec.CommandContext` ensures it is killed when the context is cancelled. + +#### Signal Handling + +yangerd handles the following signals: +- **SIGTERM**: Initiates graceful shutdown (see Graceful Shutdown Sequence above). +- **SIGINT**: Same as SIGTERM — initiates graceful shutdown. Useful for interactive debugging. + +All other signals use their default kernel behavior. Notably: +- **SIGHUP**: Not handled — does not trigger config reload (see Configuration reload policy above). +- **SIGUSR1/SIGUSR2**: Not handled. Use `yangerctl health` or `yangerctl dump` for runtime diagnostics instead of signal-based debug dumps. + +8. **Once `ready == true`**: IPC requests are served from the in-memory tree. Monitors and the ZAPI watcher continue running indefinitely, updating the tree on every netlink event or ZAPI route notification. + +### Local Development Build + +```bash +cd src/yangerd +go build ./cmd/yangerd # build the daemon binary +go build ./cmd/yangerctl # build the CLI diagnostic tool +go vet ./... # static analysis +go test ./... # run all unit tests +``` + +For cross-compilation to Infix targets: + +```bash +GOARCH=arm64 GOOS=linux go build ./cmd/yangerd +GOARCH=arm GOOS=linux GOARM=7 go build ./cmd/yangerd +GOARCH=riscv64 GOOS=linux go build ./cmd/yangerd +``` + +No `CGO_ENABLED=0` flag is needed because yangerd contains no CGo code, but setting it explicitly (`CGO_ENABLED=0`) is recommended in CI to enforce the constraint. + +### Buildroot Package + +The canonical target build is via `package/yangerd/yangerd.mk` using the standard `golang-package` Buildroot infrastructure: + +```makefile +################################################################################ +# +# yangerd +# +################################################################################ + +YANGERD_VERSION = 1.0.0 +YANGERD_SITE = $(BR2_EXTERNAL_INFIX_PATH)/src/yangerd +YANGERD_SITE_METHOD = local +YANGERD_LICENSE = BSD-2-Clause +YANGERD_LICENSE_FILES = LICENSE + +YANGERD_BUILD_TARGETS = cmd/yangerd cmd/yangerctl + +define YANGERD_INSTALL_INIT_FINIT + $(INSTALL) -D -m 0644 $(YANGERD_PKGDIR)/yangerd.conf \ + $(TARGET_DIR)/etc/finit.d/yangerd.conf +endef + +# Generate /etc/default/yangerd with build-time feature flags. +# Each flag is derived from the corresponding BR2_PACKAGE_* selection. +# When a feature is not selected in the Buildroot config, its flag is +# set to false and the corresponding collectors are not started at runtime. +define YANGERD_INSTALL_TARGET_CMDS + $(INSTALL) -d $(TARGET_DIR)/etc/default + echo '# yangerd build-time feature flags (generated by yangerd.mk)' \ + > $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_ENABLE_WIFI=$(if $(BR2_PACKAGE_IW),true,false)' \ + >> $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_ENABLE_CONTAINERS=$(if $(BR2_PACKAGE_PODMAN),true,false)' \ + >> $(TARGET_DIR)/etc/default/yangerd + echo 'YANGERD_ENABLE_GPS=$(if $(BR2_PACKAGE_GPSD),true,false)' \ + >> $(TARGET_DIR)/etc/default/yangerd +endef + +$(eval $(golang-package)) +``` + +The `golang-package` macro handles `GOARCH`/`GOOS` setting from the Buildroot target tuple, vendor directory management, and stripping of debug symbols from the installed binary. + + +## 8. Testing Strategy + +Testing yangerd spans six levels: testability contracts defining interface boundaries for all external dependencies, unit tests for internal packages, integration tests for the full daemon, regression tests comparing output parity with existing Python yanger scripts, CI enforcement via the race detector, and a concrete verification loop defining the per-module definition of done. + +### Unit Tests (`go test ./internal/...`) + +Unit tests exercise each internal package in isolation, with no external process invocations and no kernel dependencies. + +#### `internal/tree` + +| Test | Description | +|------|-------------| +| `TestTreeSetGet` | Call `tree.Set("ietf-interfaces:interfaces", data)`, then `tree.Get("ietf-interfaces:interfaces")`; assert returned bytes are byte-identical to `data`. | +| `TestTreeConcurrentReadWrite` | Spawn 100 goroutines each calling `tree.Set()` with unique module keys, and 100 goroutines each calling `tree.Get()` on the same keys concurrently. Run with `-race`; no data race must be reported. Verify that per-model locks allow concurrent writes to different modules without blocking. | +| `TestTreeGetMissing` | Call `tree.Get("/nonexistent")` on an empty tree; assert the result is `nil` and no panic occurs. | +| `TestTreeSetOverwrite` | Call `tree.Set("/k", data1)`, then `tree.Set("/k", data2)`; assert `tree.Get("/k")` returns `data2`. | +| `TestTreePrefixScan` | Set three keys under `/a/` and two under `/b/`; call `tree.Scan("/a/")` and assert exactly three entries are returned. | + +#### `internal/ipc` + +| Test | Description | +|------|-------------| +| `TestProtocolMarshal` | Construct a `Request{Method: "get", Path: "/ietf-interfaces:interfaces"}`, marshal to JSON, unmarshal into a new struct, assert field equality. Repeat for `Response`. | +| `TestFraming` | Write a 1-byte version header followed by a 4-byte big-endian length header followed by a JSON payload into a `bytes.Buffer`. Read back using the IPC protocol reader. Assert the recovered payload is byte-exact and the version byte matches `YANGERD_VERSION`. Also test with a payload of exactly 0 bytes and a payload of 65535 bytes. Test that a mismatched version byte causes the reader to return a version-mismatch error. | +| `TestIPCServer` | Start a server on a `tmpdir` socket using `ipc.Listen()`. Connect a client using `ipc.Dial()`. Send a `get /` request. Assert the response is a valid JSON object. Shut down the server and verify the client receives an `io.EOF` or connection-closed error. | +| `TestIPCServerConcurrent` | Start a server, connect 50 clients simultaneously, each sending one request and reading one response. Assert no request is lost, no response is misrouted to the wrong client. | + +#### `internal/collector` + +| Test | Description | +|------|-------------| +| `TestParseDnsmasqLeases` | Provide a sample `/var/lib/misc/dnsmasq.leases` file (three entries, mixed IPv4/IPv6). Call `collector.ParseDnsmasqLeases(reader)`. Assert the returned slice has length 3 and each entry's MAC, IP, and hostname fields match expectations. | +| `TestParseVtyshOspf` | Provide sample JSON output from `vtysh -c 'show ip ospf json'` (two neighbors, one `Full` and one `ExStart`). Call `collector.ParseVtyshOspfJSON(data)`. Assert neighbor count is 2, one neighbor has state `"Full"`. | +| `TestCollectorRunAll` | Create a mock collector that returns an error on first call and succeeds on second. Call `RunAll([]Collector{mockCollector})` twice. Assert the tree is updated on the second call, and the first error is logged but does not terminate the collector loop or affect other collectors. | +| `TestCollectorTimeout` | Create a mock collector that blocks for 5 seconds. Call it with a context having a 100 milliseconds deadline. Assert the call returns within 200 milliseconds with a context error, and the tree retains the previous value for that key. | + +### Integration Tests + +Integration tests launch a real yangerd binary (built by `go test -v -run TestIntegration...`) against a controlled environment: + +- **Netlink injection (full interface re-read)**: use Go's `net.Pipe()` to create a mock netlink event source and a mock `IPBatch` that records queries written to it. Inject a synthetic `RTM_NEWLINK` message (interface `eth99` transitioning to `UP`). Assert that: + 1. Within 100 milliseconds, the mock `IPBatch` receives exactly three queries: `link show dev eth99`, `-s link show dev eth99`, and `addr show dev eth99` (the full interface re-read set). + 2. `yangerctl get /ietf-interfaces:interfaces/interface[name='eth99']/oper-status` returns `"up"`. + 3. `yangerctl get /ietf-interfaces:interfaces/interface[name='eth99']/statistics` contains rx/tx counters from the `-s link show` response. + 4. A mock `EthMonitor` records that `RefreshInterface("eth99")` was called exactly once (validating the cross-subsystem ethtool re-query trigger). + 5. If oper-status changed, `yangerctl get /ietf-interfaces:interfaces/interface[name='eth99']/last-change` returns a timestamp within 100ms of the injection time. + This validates the full RTM_NEWLINK path: netlink event -> monitor goroutine -> full re-read (3 ip batch queries) -> tree write -> ethtool re-query -> IPC response. + +- **IPC end-to-end**: start yangerd against the host's real netlink (requires root or `CAP_NET_ADMIN`; run as a privileged CI job step). Send `get /ietf-interfaces:interfaces` over the Unix socket. Assert the JSON response contains at least one interface entry with a `name` field. + +- **503 during startup**: intercept the `ready` flag using a test hook (a `testing.T`-injected boolean gate). Verify that requests received before the flag is set receive `{"code": 503, "status": "starting"}`. Verify that the first request after the flag transitions receives a normal `{"code": 200}` response. + +### Regression Tests + +Regression tests verify that yangerd's output is structurally correct according to the YANG schema and matches golden-file reference data captured from a known-good system state. The test matrix is: + +| Module | Golden File | yangerd path | Architecture | +|--------|-------------|--------------|-------------| +| ietf-interfaces | `golden/interfaces.json` | /ietf-interfaces:interfaces | x86_64, aarch64 | +| ietf-routing (routes) | `golden/routing-ribs.json` | /ietf-routing:routing/ribs | x86_64, aarch64 | +| ietf-routing (neighbors) | `golden/routing-neigh.json` | /ietf-routing:routing (arp) | x86_64, aarch64 | +| ethtool statistics | `golden/ethtool-stats.json` | /ietf-interfaces:interfaces/.../statistics | x86_64 | + +For each cell in the matrix: +1. Boot a Qemu x86_64 or aarch64 Infix image. +2. Call `yangerctl get ` and capture the JSON output to `actual.json`. +3. Compare against the golden file using a YANG-aware structural comparator (`jd` or a custom Go tool) that treats list ordering as insignificant and ignores ephemeral counters (byte counts, uptime) that legitimately differ between runs. +4. Pass `actual.json` through `yanglint` (libyang validation) to verify it is accepted as valid YANG instance data. +5. Assert zero structural differences in non-counter fields. + +Golden files are generated once from a reference system with a known network configuration and committed to the test suite. They are updated whenever the YANG model changes or yangerd's output format is intentionally modified. + +### Race Detector Policy + +All unit tests and integration tests run with `-race` enabled in CI: + +```yaml +# In .github/workflows/build.yml +- name: yangerd unit tests + run: | + cd src/yangerd + go test -race ./... +``` + +The per-model `sync.RWMutex` locks in `internal/tree`, the buffered channels in each monitor, and the context-based shutdown in collector goroutines are all required to be race-free under this policy. Any PR introducing a data race (as detected by `-race`) is automatically blocked. + +Specific race-sensitive areas tested: +- `tree.Set()` called from a monitor goroutine concurrently with `tree.Get()` from an IPC handler goroutine. +- Monitor goroutine shutdown via `ctx.Done()` while an IPC handler holds a read lock. +- `ready` flag transition from `false` to `true` visible to all IPC handler goroutines without stale reads. + +### Testability Contracts (Interface Boundaries) + +Every external dependency is abstracted behind a Go interface. This enables unit testing with mock implementations -- no kernel, no D-Bus, no FRR, no hardware. The production binary uses the real implementations; `go test` uses mocks. + +| Dependency | Interface | Package | Production Implementation | Mock | +|------------|-----------|---------|--------------------------|------| +| Netlink subscriptions | `NetlinkSubscriber` | `internal/nlmonitor/` | `vishvananda/netlink` channels | Channel-fed fake with injectable events | +| ip batch subprocess | `Executor` | `internal/ipbatch/` | Persistent `ip -json -force -batch -` process | In-memory map returning canned JSON per query | +| bridge batch subprocess | `Executor` | `internal/bridgebatch/` | Persistent `bridge -json -batch -` process | In-memory map returning canned JSON per query | +| D-Bus connection | `DBusConnector` | `internal/dbusmonitor/` | `godbus/dbus/v5` system bus | Fake bus with configurable method returns and injectable signals | +| ZAPI (zebra) socket | `ZAPIDialer` | `internal/zapiwatcher/` | `net.Dial("unix", "/var/run/frr/zserv.api")` | `net.Pipe()` with scripted ZAPI v6 messages | +| Ethtool genetlink | `EthtoolQuerier` | `internal/ethmonitor/` | `mdlayher/ethtool` client | Struct literal returns | +| Chrony cmdmon | `ChronyClient` | `internal/collector/` | `facebook/time/ntp/chrony` | Struct literal returns | +| Command execution | `CommandRunner` | `internal/collector/` | `exec.CommandContext` | Canned stdout/stderr per command | +| File reads | `FileReader` | `internal/collector/` | `os.ReadFile` / `filepath.Glob` | `fstest.MapFS` or in-memory bytes | + +Interface definitions: + +```go +// internal/ipbatch/batch.go (also used by internal/bridgebatch/) +type Executor interface { + Query(ctx context.Context, cmd string) (json.RawMessage, error) + Close() error +} + +// internal/nlmonitor/nlmonitor.go +type NetlinkSubscriber interface { + LinkSubscribe(ch chan<- netlink.LinkUpdate, done <-chan struct{}) error + AddrSubscribe(ch chan<- netlink.AddrUpdate, done <-chan struct{}) error + NeighSubscribe(ch chan<- netlink.NeighUpdate, done <-chan struct{}) error +} + +// internal/dbusmonitor/dbusmonitor.go +type DBusConnector interface { + Signal(ch chan<- *dbus.Signal) + AddMatchSignal(opts ...dbus.MatchOption) error + Object(dest string, path dbus.ObjectPath) DBusObject + Close() error +} + +type DBusObject interface { + Call(method string, flags dbus.Flags, args ...interface{}) *dbus.Call +} + +// internal/zapiwatcher/zapiwatcher.go +type ZAPIDialer interface { + Dial(ctx context.Context) (net.Conn, error) +} + +// internal/ethmonitor/ethmonitor.go +type EthtoolQuerier interface { + LinkInfo(ifi int) (*ethtool.LinkInfo, error) + LinkMode(ifi int) (*ethtool.LinkMode, error) +} + +// internal/collector/ntp.go +type ChronyClient interface { + Tracking(ctx context.Context) (*chrony.ReplyTracking, error) + Sources(ctx context.Context) ([]chrony.ReplySourceData, error) +} + +// internal/collector/runner.go (shared by vtysh, iw, podman, dmidecode) +type CommandRunner interface { + Run(ctx context.Context, name string, args ...string) ([]byte, error) +} + +// internal/collector/reader.go (shared by /proc, /sys, lease file readers) +type FileReader interface { + ReadFile(path string) ([]byte, error) + Glob(pattern string) ([]string, error) +} +``` + +**Import restriction rule**: No `internal/` package may import `os/exec`, `os.ReadFile`, `vishvananda/netlink`, `godbus/dbus`, `mdlayher/ethtool`, or `facebook/time/ntp/chrony` directly in production code outside of the interface implementation files. All access goes through the interface. This is enforced by a `go vet` linter check (or `depguard` via `golangci-lint`) in CI. + +**Mock location**: Reusable mock implementations live in `internal/testutil/`. Package-specific mocks live in `_test.go` files within their package. + +### Verification Loop (Definition of Done) + +A module is complete when all of the following pass on a developer workstation with no target hardware, no kernel dependencies, and no running services: + +```bash +# 1. Compiles with zero errors +go build ./cmd/yangerd ./cmd/yangerctl + +# 2. Static analysis clean +go vet ./... + +# 3. All tests pass, no data races +go test -race -count=1 ./... +``` + +Step 3 implicitly validates golden-file parity: every collector's test function loads canned input from `testdata/`, runs it through the collector with mocked dependencies, and compares the resulting YANG JSON against a `.golden` file committed to the repository. A mismatch fails the test. + +**Golden-file capture process** (one-time, from a running Infix system with the current Python yanger scripts): + +1. Capture expected output for each module: + ```bash + # On target, for each yanger module: + yangerctl get /ietf-interfaces:interfaces > golden/interfaces.json + yangerctl get /ietf-routing:routing > golden/routing.json + yangerctl get /ietf-hardware:hardware > golden/hardware.json + # ... for all 14 modules + ``` +2. Capture the corresponding raw inputs that produced that output: + ```bash + ip -json link show > testdata/interfaces/ip-link.json + ip -json addr show > testdata/interfaces/ip-addr.json + ip -json -s link show > testdata/interfaces/ip-link-stats.json + vtysh -c 'show ip ospf json' > testdata/ospf/vtysh-ospf.json + cat /var/lib/misc/dnsmasq.leases > testdata/dhcp/leases.txt + # ... for all data sources + ``` +3. Commit both `testdata/` (inputs) and `golden/` (expected outputs) to the repository. +4. Each collector's unit test creates a mock with the canned inputs, runs the collector, and asserts the output matches the golden file using a structural JSON diff (key structure must match; volatile fields like counters and timestamps are ignored). + +**YANG schema validation** (CI only -- requires `yanglint`): + +```bash +# Validate each golden file against the YANG schema +for f in golden/*.json; do + yanglint --format json -t data -m yang/*.yang "$f" +done +``` + +This runs in CI but not on every developer `go test` invocation, since `yanglint` requires libyang (a C dependency). The golden-file structural comparison in `go test` catches output regressions; `yanglint` catches schema violations. + +**Per-module completion checklist**: + +1. Go interface defined in the consuming package +2. Production implementation wired in `cmd/yangerd/main.go` +3. Mock implementation in `internal/testutil/` or `_test.go` +4. Canned inputs captured in `testdata//` +5. Golden output captured in `golden/.json` +6. Unit test: canned input -> mock -> collector -> assert output == golden +7. `go test -race` passes for the package +8. No direct imports of external libraries outside interface implementation files + +When all migration modules and new modules pass this checklist (13 migrated modules + 1 new module = 14 total YANG modules), yangerd is feature-complete and ready for integration testing on target hardware. + +## 9. Migration Plan + + +yangerd ships as a single, complete delivery covering all 14 YANG modules. There is no phased rollout -- yangerd completely replaces the Python yanger scripts in one step. Migration scope is 13 modules (12 existing Python modules plus new `infix-services:mdns` migrated from `statd/avahi.c`). + +### Module Inventory +**Text parser test fixtures**: The `iw event` and `vtysh` output parsers process human-readable text that varies across tool versions. Test fixtures capture known-good outputs from specific versions (iw 6.9, vtysh from FRR 10.5.1) including edge cases: truncated output, empty responses, multi-line entries, and malformed lines. Each fixture is stored as a `.txt` file in `testdata/` alongside the expected parsed Go struct as a `.golden` JSON file. + + +All 14 modules are implemented and delivered together (with additional supporting bridge and WireGuard collectors listed for completeness): + +| Module | YANG Path | Data Source | Go File | +|--------|-----------|-------------|---------| +| ietf-interfaces | `/ietf-interfaces:interfaces` | Netlink RTNLGRP_LINK + RTNLGRP_*IFADDR | `internal/monitor/link.go`, `addr.go` | +| ietf-routing (RIBs) | `/ietf-routing:routing/ribs` | ZAPI watcher (streaming from zebra zserv socket) | `internal/zapiwatcher/zapiwatcher.go` | +| ietf-routing (ARP/NDP) | `/ietf-routing:routing` (neighbor tables) | Netlink RTNLGRP_NEIGH | `internal/monitor/neigh.go` | +| Interface statistics | `/ietf-interfaces:interfaces/interface/statistics` | mdlayher/ethtool genetlink | `internal/collector/ethtool.go` | +| ietf-routing (OSPF) | `.../control-plane-protocol/ietf-ospf:ospf` | `vtysh -c 'show ip ospf json'` | `internal/collector/ospf.go` | +| ietf-routing (RIP) | `.../control-plane-protocol/ietf-rip:rip` | `vtysh -c 'show ip rip json'` | `internal/collector/rip.go` | +| ietf-routing (BFD) | `.../control-plane-protocol/ietf-bfd:bfd` | `vtysh -c 'show bfd peers json'` | `internal/collector/bfd.go` | +| ietf-hardware | `/ietf-hardware:hardware` | `/sys/class/hwmon`, `dmidecode` | `internal/collector/hardware.go` | +| ietf-system | `/ietf-system:system-state` | `/proc/uptime`, `/etc/os-release`, `/proc/loadavg` | `internal/collector/system.go` | +| ietf-ntp | `/ietf-ntp:ntp/state` | chrony cmdmon protocol (tracking + sources) | `internal/collector/ntp.go` | +| ieee802-dot1ab-lldp | `/ieee802-dot1ab-lldp:lldp` | `lldpcli -f json0 watch` | `internal/lldpmonitor/monitor.go` | +| infix-containers | `/infix-containers:containers` | `podman ps --format json` | `internal/collector/containers.go` (feature-gated) | +| infix-dhcp-server | `/infix-dhcp-server:dhcp-server` | `/var/lib/misc/dnsmasq.leases` | `internal/collector/dhcp.go` | +| infix-firewall | `/infix-firewall:firewall` | firewalld D-Bus method calls (zones, policies, services, global state) | `internal/collector/firewall.go` | +| infix-services (mDNS) | `/infix-services:mdns/neighbors` | Avahi D-Bus (`org.freedesktop.Avahi`) ServiceBrowser/ServiceResolver signals | `internal/mdnsmonitor/mdnsmonitor.go` | +| bridge STP/VLAN/FDB/MDB | bridge state | Netlink event triggers + `bridge -json -batch -` re-reads | `internal/collector/bridge.go` | +| WireGuard | WireGuard tunnels | `wgctrl.Client.Devices()` | `internal/collector/wireguard.go` | + +**Initial state bootstrap** (required because only `LinkSubscribeWithOptions{ListExisting: true}` auto-delivers existing entries; address, neighbour, and route monitors must bootstrap explicitly): + +```go +// In monitor/addr.go -- startup bootstrap +existing, err := netlink.AddrList(nil, netlink.FAMILY_ALL) +if err != nil { + log.Warnf("addr bootstrap failed: %v", err) +} else { + for _, a := range existing { + tree.Set(addrToPath(a), marshalAddr(a)) + } +} +``` + +The same pattern applies to `NeighList()`. Route data is bootstrapped by the ZAPI watcher's redistribution subscription (see Section 4.1octies), not by `RouteListFiltered()`. + +All collectors use `context.WithTimeout()` with per-command timeouts to bound each external process invocation: vtysh commands 5s, nft 5s, iw queries 2s, dmidecode 5s (see Section 4.7 Design Rationale for the full timeout table). On timeout, the previous tree value is retained and a warning is logged. Collectors are registered in `cmd/yangerd/main.go` and scheduled by `internal/scheduler/scheduler.go`, which runs each collector at its configured poll interval using `time.NewTicker`. + +### Deliverables + +- `internal/monitor/{link,addr,neigh}.go` +- `internal/zapiwatcher/zapiwatcher.go` +- `internal/collector/{ethtool,ospf,rip,bfd,hardware,system,ntp,containers,dhcp,firewall,wifi,bridge,wireguard}.go` +- `internal/lldpmonitor/monitor.go` +- `internal/mdnsmonitor/mdnsmonitor.go` +- `internal/tree/tree.go` +- `internal/ipc/{server,client,protocol}.go` +- `cmd/yangerd/main.go` +- `cmd/yangerctl/main.go` +- `package/yangerd/yangerd.mk` + `yangerd.conf` +- Unit tests + integration tests for all modules +- Regression tests on x86_64, aarch64, and armv7 +- Removal of Python yanger scripts from Buildroot package +- Updated finit service file with `group frr` for vtysh access + +### Milestone Criteria + +All 14 modules pass regression tests across x86_64, aarch64, and armv7 in CI. The Python yanger scripts are removed from the Buildroot package. statd's `get_oper_data()` function calls only `ly_add_yangerd_data()` -- there is no Python fallback path. +## 10. Risk Assessment + +### 10.1 Detailed Risks + +**Risk 1 — ip batch subprocess crash or netlink subscription failure** +The `ip -json -force -batch -` subprocess is a long-lived external process managed by yangerd for state queries. If it crashes unexpectedly (segfault, OOM-killed) or hangs (blocked on a kernel call), yangerd loses its ability to query link, address, and neighbor state until the subprocess is restarted. A hung subprocess could also leave stale file descriptors or pipe buffers that interfere with the replacement process. Additionally, if the ip binary is upgraded on disk while yangerd is running, the replacement subprocess may exhibit different JSON output format or behavior. Separately, the native Go netlink subscriptions (via `vishvananda/netlink`) could fail if the kernel's netlink buffer overflows under heavy load, causing dropped events and temporarily stale data. + + +To mitigate this risk, the `internal/ipbatch/` package implements health monitoring for the batch subprocess. The subprocess is supervised by a dedicated goroutine that detects unexpected EOF on stdout (indicating process exit) and restarts the subprocess with exponential backoff starting at one hundred milliseconds and capping at thirty seconds. Before accepting the restarted subprocess, yangerd performs a canary query (`link show dev lo`) to verify it produces valid JSON. For the native netlink subscriptions in `internal/nlmonitor/`, the shared error callback triggers context cancellation and full re-subscription. On re-subscription, a full state resynchronization is triggered by writing bulk dump commands to the ip batch subprocess, ensuring the in-memory tree is consistent with the current kernel state. Context cancellation provides clean shutdown of both the subprocess and netlink subscriptions. The health endpoint reports subprocess uptime, restart count, netlink subscription status, and last error for operational visibility. + +**Risk 2 — Memory pressure under high-frequency netlink event storms** +On large Layer 2 segments or during periods of network instability such as Address Resolution Protocol storms, the kernel can generate thousands of neighbor events per second. Each event causes a tree write including a mutex lock, JSON serialization, and map insertion, which may trigger frequent garbage collection cycles. Under sustained heavy load, this can produce elevated central processing unit usage and garbage collection pause times that are visible as increased latency for IPC requests from statd, potentially causing management timeouts and service degradation. + +We mitigate this risk by debouncing tree writes for each module key using a one hundred millisecond coalescing window, ensuring that only the final value in a burst of events is committed to the shared tree. Additionally, the netlink subscription channels are buffered to hold up to two hundred and fifty-six events; any events beyond this limit are dropped, and a counter is incremented to provide visibility into the loss. A per-monitor event rate gauge is also exposed via the health endpoint to make storm conditions visible to operators and automated monitoring systems, allowing for proactive troubleshooting of network anomalies and preventing cascading failures in the management plane. + +**Risk 3 — dbus/external process query timeouts** +Phase 2 collectors that invoke external processes such as vtysh or podman, or query native protocols such as chrony cmdmon and D-Bus APIs, may block if those processes are slow to start, waiting for a file lock, or unresponsive due to extreme system resource contention. A blocked collector goroutine could cause the corresponding tree key to remain in a stale state indefinitely if the collection logic does not account for execution delays. This would result in incorrect or outdated operational data being served to management clients, which could lead to incorrect diagnostic conclusions or automated system failures. + +Every collection operation is wrapped with a context that enforces a per-command timeout (2-5 seconds depending on source: e.g., iw 2s; vtysh/dmidecode/podman 5s; D-Bus calls 2-5s). If a collector exceeds this deadline, the operation is aborted, a warning is logged to the system journal, and the last known good value is retained in the in-memory tree to prevent serving empty data. The collector then waits for the next scheduled interval before attempting the operation again. This ensures that a single slow or hung process cannot block other collectors or degrade the responsiveness of the IPC server, maintaining the overall stability of the daemon under various failure modes and ensuring that the system remains manageable even under duress. + +**Risk 4 — Incomplete tree state at first statd query (startup race)** +Both statd and yangerd start concurrently during the system initialization process managed by finit. It is highly probable that statd's first operational data request will fire before yangerd has completed its initial state snapshot from the kernel using the bootstrap listing APIs. If yangerd responded with an empty or partial tree in this state, sysrepo might cache incorrect operational data, leading to a misleading view of the system state for the first few seconds after boot and potentially causing monitoring alerts to trigger unnecessarily. + +To prevent this, yangerd maintains a ready flag that is only set to true once all initial netlink dumps have successfully completed and populated the tree. While this flag is false, every IPC response is returned with a five hundred and three service unavailable status code. The statd daemon is configured to treat this status code as a transient error and will retry on the next sysrepo callback invocation. During the brief startup window (typically under one second), statd logs a warning indicating that yangerd is still initializing and returns `SR_ERR_INTERNAL` to sysrepo, which causes the management client to see an empty operational subtree for that brief period. Once yangerd signals readiness, all subsequent queries are served from the fully populated in-memory tree. + +**Risk 5 — FRR group membership** +The vtysh utility and the zebra zserv socket are used to query state from the FRRouting suite. vtysh connects to per-daemon control sockets, and the ZAPI watcher connects to the zserv unix socket (`/var/run/frr/zserv.api`). Both socket paths are owned by the frr user and group with restricted permissions that prevent unauthorized access. If the yangerd process is not running with the correct group memberships, OSPF/RIP/BFD collector queries via vtysh and the ZAPI watcher's route data stream will both fail with permission denied errors, resulting in empty operational subtrees for routing protocols and routes. + +This risk is addressed through the deployment configuration in the finit service file and the Buildroot package definition. The service file explicitly specifies that yangerd should run with membership in the frr group, granting it the necessary permissions to communicate with both vtysh control sockets and the zebra zserv socket. Furthermore, the post-installation script in the Buildroot package ensures that the yangerd system user is correctly added to the frr group on the target filesystem. These configuration steps are verified during the integration testing phase to ensure that both protocol state collection via vtysh and route data collection via the ZAPI watcher are functional across all supported hardware platforms and software configurations. + +**Risk 6 — dmidecode privilege** +The dmidecode utility requires elevated privileges to read System Management BIOS data from physical memory or the specialized sysfs interface. On many platforms, this requires the CAP_SYS_RAWIO capability to access low-memory addresses that are not otherwise exposed to unprivileged users. Without this capability, the utility will exit with a permission denied error, causing the hardware inventory collector to fail and leaving the inventory tree empty, which prevents identification of the specific hardware revision or serial number. + +We provide two mitigation options that can be selected based on the specific security requirements of the deployment. The first option is to grant the necessary capability to the yangerd service through the finit configuration, allowing it to run the utility directly with the required privileges. The second, more restrictive option is to pre-cache the hardware inventory data during the system build process or at initial boot from a privileged context, saving the output to a file that yangerd can read as an unprivileged user. This avoids the need for elevated privileges at runtime while still providing accurate hardware inventory information to management clients through the YANG models, maintaining a strict security posture. + +**Risk 7 — inotify watch limit exhaustion** +The Linux kernel maintains a per-user limit on the number of active inotify watches (`/proc/sys/fs/inotify/max_user_watches`). On systems with many DHCP lease files, it is possible for yangerd to exhaust this limit, especially if other daemons are also using inotify. When the limit is reached, any attempt to add a new watch will fail with `ENOSPC`. Note: hardware sensors are not watched via inotify (sysfs pseudo-files do not emit inotify events), and bridge STP state is not watched via inotify (it uses netlink events), so neither contributes to watch exhaustion. + +To mitigate this, the `internal/fswatcher/` package logs a clear warning identifying the specific path that failed to be watched. For every such failure, yangerd automatically falls back to the polling collector for that data source. This ensures that data collection continues at the configured polling interval, maintaining operational visibility at the cost of increased latency and CPU wake-ups, rather than failing entirely. +These paths map to specific YANG leaves: `/proc/sys/net/ipv4/conf/*/forwarding` maps to `ietf-ip:ipv4/forwarding`, `/proc/sys/net/ipv6/conf/*/forwarding` maps to `ietf-ip:ipv6/forwarding`, and `/proc/sys/net/ipv6/conf/*/accept_redirects` maps to neighbor discovery configuration leaves. + + +**Risk 8 — bridge batch subprocess failure or bridge netlink event loss** +yangerd manages a persistent `bridge -json -batch -` subprocess for bridge-specific state queries (VLANs, MDB, FDB, STP). Bridge events are received natively via `vishvananda/netlink`: FDB entries arrive as NeighUpdate events with the NDA_MASTER flag, VLAN changes arrive as LinkUpdate events, STP port state changes arrive as LinkUpdate events carrying IFLA_BRPORT_STATE in IFLA_PROTINFO, and MDB entries are received via a raw netlink socket subscribed to RTNLGRP_MDB. All events are used as triggers only -- full state is re-read via bridge batch. A crash in the bridge batch subprocess would prevent state re-queries from completing. A failure in the netlink subscriptions would stop reactive updates for bridge state. + +The bridge batch subprocess uses the same robust health monitoring as the ip batch subprocess, providing automatic restarts with exponential backoff and canary-query validation. For bridge netlink events, the shared NLMonitor error callback handles subscription failures by triggering re-subscription and a full re-query of bridge state via the batch subprocess. This ensures the in-memory tree remains synchronized with the kernel after any failure. The health endpoint reports bridge batch subprocess status and bridge netlink subscription status separately. + +**Risk 9 — iw event subprocess failure (when WiFi enabled)** + +The `iw event -t` subprocess may exit unexpectedly due to kernel driver issues or nl80211 subsystem errors. This risk applies only when WiFi support is included in the build (`YANGERD_ENABLE_WIFI=true`). If the subprocess exits during operation, WiFi event notifications stop and the in-memory tree retains stale wireless data until the subprocess is restarted. + +The `internal/iwmonitor/` package mitigates this with the same exponential backoff restart pattern used by the NLMonitor re-subscription and batch subprocess restarts (initial delay 100ms, max 30s, factor 2x). Upon restart, a full re-query of all known wireless interfaces is performed. When WiFi is not included in the build (`YANGERD_ENABLE_WIFI=false`), the IW Event Monitor is not started at all and no WiFi data appears in the tree. + +**Risk 10 — ethtool genetlink subscription failure** + +The `internal/ethmonitor/` package subscribes to the kernel's `ETHNL_MCGRP_MONITOR` genetlink multicast group at startup. Since Infix targets Linux kernel 6.18, ethtool netlink is unconditionally available and the subscription is expected to always succeed. If the `genetlink.Conn` dial or `JoinGroup()` call fails, it indicates a system misconfiguration (e.g., missing kernel module, permission denied) rather than a kernel version issue. Such failures are logged at ERROR. + +If the genetlink subscription succeeds initially but the connection is later broken (e.g., due to a kernel module reload or netlink buffer overflow), the ethmonitor logs a warning and attempts to re-establish the subscription with exponential backoff (initial delay 100ms, max 30s, factor 2x). During the reconnection window, the ethtool collector's 30-second polling cycle for statistics continues to provide counter data. Settings data (speed, duplex, autoneg) may be briefly stale until the subscription is restored or until the next RTM_NEWLINK event triggers a RefreshInterface() call. + + +**Risk 11 — ZAPI watcher failure (zebra unavailability or restart)** +The ZAPI watcher connects to zebra's zserv unix domain socket to receive route redistribution notifications. If zebra is not running at yangerd startup (e.g., delayed start, crash, or intentional restart), the watcher cannot establish its initial connection and the route subtree will be empty until zebra becomes available. If zebra restarts while the watcher is connected, the watcher receives an EOF on its receive channel and must reconnect and re-subscribe. During the reconnection window, no route updates are received and the in-memory tree retains stale route data from the previous session. + +The `internal/zapiwatcher/` package mitigates this with exponential backoff reconnection (initial delay 100ms, max 30s, factor 2x). On each successful reconnection, the full ZAPI subscription handshake is replayed (HELLO + ROUTER_ID_ADD + REDISTRIBUTE_ADD per route type), which causes zebra to send a complete RIB dump. The watcher uses a full replacement strategy: it builds a new route map from the dump and atomically replaces the route subtree in the tree, ensuring that stale routes from the previous session are cleared. The health endpoint reports ZAPI watcher connection status (connected, reconnecting, failed) and the timestamp of the last successful route update. ZAPI v6 wire format has been stable across FRR 8.x, 9.x, and 10.x (including the target FRR 10.5.1), reducing the risk of protocol version mismatch after FRR upgrades. +### 10.2 Risk Summary + +| # | Risk | Likelihood | Impact | Status | +|---|------|-----------|--------|--------| +| 1 | ip batch subprocess crash or netlink subscription failure | Low–Medium | High | Mitigated — health-monitored subprocess with auto-restart, exponential backoff, canary query; netlink re-subscription with full resync | +| 2 | Netlink event storm (memory/CPU) | Low–Medium | Medium | Mitigated — 100 milliseconds debounce per key; 256-event buffer; health metrics | +| 3 | dbus/process query timeout | Low | Medium | Mitigated — 2-5 seconds `context.WithTimeout` (command-specific); stale value retained; retry on next tick | +| 4 | Startup race (incomplete tree at first query) | High | Low | Mitigated -- `code 503` response; statd retries on next callback; brief empty window during init | +| 5 | FRR group membership (vtysh + zserv socket) | High | Medium | Deployment requirement — finit `group frr`; Buildroot adds user to group | + +| 6 | dmidecode privilege (CAP_SYS_RAWIO) | Low | Medium | Mitigated — pre-cache at build time or grant CAP_SYS_RAWIO via finit | +| 7 | inotify watch limit exhaustion | Low | Medium | Mitigated — logs warning and falls back to polling collector | +| 8 | bridge batch subprocess failure or bridge netlink event loss | Low | Medium | Mitigated — health-monitored batch subprocess with auto-restart; netlink re-subscription with full bridge state resync | +| 9 | iw event subprocess failure (WiFi enabled) | Low | Low | Mitigated — feature-gated subsystem (`YANGERD_ENABLE_WIFI`); exponential backoff restart when enabled | +| 10 | ethtool genetlink subscription failure | Low | Low | Mitigated — unconditionally available on kernel 6.18; failure indicates misconfiguration, not kernel gap; exponential backoff reconnection | +| 11 | ZAPI watcher failure (zebra unavailability, reconnection gap, protocol mismatch) | Medium | Medium | Mitigated — exponential backoff reconnection; full RIB re-sync on reconnect; stale route data cleared atomically; ZAPI v6 stable across FRR 8.x–10.x | +| 12 | D-Bus service unavailability (dnsmasq or firewalld not running, D-Bus daemon restart) | Low | Low | Mitigated — `NameOwnerChanged` signal detects service disappearance and reappearance; full data refresh on service (re)start; stale data retained until refresh succeeds; exponential backoff reconnection to D-Bus system bus | +## Appendices + +### A.1 Netlink Group Reference + +The following table lists all `RTNLGRP_*` multicast groups monitored by yangerd via native Go netlink subscriptions (`vishvananda/netlink`). These groups are subscribed to directly using `LinkSubscribeWithOptions`, `AddrSubscribeWithOptions`, and `NeighSubscribeWithOptions`, plus a raw netlink socket for `RTNLGRP_MDB`. The constant values are from the Linux kernel's `rtnetlink.h` header. + +| Group Name | Constant Value | Event Types | Monitor File | +|------------|---------------|-------------|-------------| +| `RTNLGRP_LINK` | 1 | `RTM_NEWLINK`, `RTM_DELLINK` | `monitor/link.go` | On `RTM_NEWLINK`: triggers full interface re-read (3 `ip -json -batch` queries) + `ethmonitor.RefreshInterface()` to re-query ethtool settings, since `ETHNL_MCGRP_MONITOR` does not fire on link state transitions. Updates `last-change` timestamp. | +| `RTNLGRP_NEIGH` | 3 | `RTM_NEWNEIGH`, `RTM_DELNEIGH` | `monitor/neigh.go` | On any neigh event (add or remove): triggers full neighbor re-read via `neigh show dev ` through ip batch. Event is trigger only — not parsed for data. Delete events produce a re-read that omits the removed neighbor. | +| `RTNLGRP_IPV4_IFADDR` | 5 | `RTM_NEWADDR`, `RTM_DELADDR` (IPv4) | `monitor/addr.go` | On any addr event (add or remove): triggers full address re-read via `addr show dev ` through ip batch. Event is trigger only — not parsed for data. | +| `RTNLGRP_IPV6_IFADDR` | 9 | `RTM_NEWADDR`, `RTM_DELADDR` (IPv6) | `monitor/addr.go` | Same re-read pattern as IPv4; both groups dispatched by AF inside `monitor/addr.go`. | +| `RTNLGRP_MDB` | 26 | `RTM_NEWMDB`, `RTM_DELMDB` | `nlmonitor/nlmonitor.go` | On any MDB event: triggers full MDB state re-read via `mdb show` through bridge batch. Event is trigger only — not parsed for data. | + +Notes: +- `RTNLGRP_IPV4_IFADDR` and `RTNLGRP_IPV6_IFADDR` are subscribed together in a single `netlink.Subscribe()` call by passing both group constants. The resulting events are dispatched by address family inside `monitor/addr.go`. +- `RTNLGRP_NEIGH` covers both ARP (IPv4) and NDP (IPv6) neighbour events — no separate IPv6 group is needed. +- Kernel buffer overflow (`ENOBUFS`) on any of these subscriptions is handled by logging a warning and performing a full re-list (e.g. `AddrList()`) to recover any dropped events, followed by re-subscription. + +### A.2 YANG Module Registry + +All 14 YANG modules that yangerd handles, with their canonical YANG path prefix and the corresponding Python predecessor (if any): + +| YANG Module | Path Prefix | Replaces | +|-------------|------------|----------| +| `ietf-interfaces` | `/ietf-interfaces:interfaces` | `interface.py` | +| `ietf-routing` (RIBs/routes) | `/ietf-routing:routing/ribs` | `routing.py` | +| `ietf-routing` (ARP/NDP neighbors) | `/ietf-routing:routing` (neighbor tables) | `routing.py` | +| `ietf-routing` (OSPF) | `/ietf-routing:routing/control-plane-protocols/control-plane-protocol/ietf-ospf:ospf` | `ospf.py` | +| `ietf-routing` (RIP) | `/ietf-routing:routing/control-plane-protocols/control-plane-protocol/ietf-rip:rip` | `rip.py` | +| `ietf-routing` (BFD) | `/ietf-routing:routing/control-plane-protocols/control-plane-protocol/ietf-bfd:bfd` | `bfd.py` | +| `ietf-hardware` | `/ietf-hardware:hardware` | `hardware.py` | +| `ietf-system` | `/ietf-system:system-state` | `system.py` | +| `ietf-ntp` | `/ietf-ntp:ntp/state` | `ntp.py` | +| `ieee802-dot1ab-lldp` | `/ieee802-dot1ab-lldp:lldp` | `lldp.py` (served reactively via `lldpcli -f json0 watch`) | +| `infix-containers` | `/infix-containers:containers` | `containers.py` (feature-gated: `YANGERD_ENABLE_CONTAINERS`) | +| `infix-dhcp-server` | `/infix-dhcp-server:dhcp-server` | `dhcp-server.py` | +| `infix-firewall` | `/infix-firewall:firewall` | `firewall.py` | +| `infix-services` (mDNS) | `/infix-services:mdns` | `(new — migrated from statd/avahi.c via Avahi D-Bus)` | +Note: the registry lists 14 rows because `ietf-routing` covers three distinct sub-trees (RIBs, neighbors, and routing protocol instances) that correspond to distinct `sr_oper_get_subscribe()` paths, while `infix-services:mdns` is an additional module entry beyond legacy Python parity. + +### A.3 Glossary +**inotify** +A Linux kernel subsystem that provides notifications about filesystem events (creation, modification, deletion) to user-space applications. yangerd uses inotify to implement reactive file watching for procfs forwarding flags. Bridge STP state is handled via netlink events, not inotify. DHCP lease and firewall state changes are handled via D-Bus signal subscriptions (see Section 4.1novies), not inotify. Note: sysfs pseudo-files (hwmon sensors, thermal zones) do not emit inotify events and are polled instead. + +**fsnotify** +The cross-platform Go library (`github.com/fsnotify/fsnotify`) that wraps Linux inotify (and other OS-specific equivalents) to provide a high-level API for filesystem events. + +**bridge netlink events** +Bridge-specific kernel events received by yangerd via native Go netlink subscriptions. FDB (forwarding database) events arrive as `NeighUpdate` messages with the `NDA_MASTER` flag set, indicating they belong to a bridge master device. VLAN membership changes arrive as `LinkUpdate` messages with bridge VLAN attributes. STP port state changes arrive as `LinkUpdate` messages carrying `IFLA_BRPORT_STATE` in `IFLA_PROTINFO`. MDB (multicast database) events are received via a raw netlink socket subscribed to `RTNLGRP_MDB` (group 26). All bridge events are used as triggers only -- full state is re-read via the `bridge -json -batch -` subprocess. + +**D-Bus Monitor** +The yangerd subsystem (`internal/dbusmonitor/`) that subscribes to D-Bus system bus signals for reactive monitoring of service-managed data. It watches `DHCPLeaseAdded`, `DHCPLeaseDeleted`, and `DHCPLeaseUpdated` signals from dnsmasq, and `Reloaded` signals from firewalld. Each signal triggers a full data refresh: DHCP refreshes re-read `/var/lib/misc/dnsmasq.leases` and call dnsmasq's `GetMetrics()` D-Bus method; firewall refreshes query firewalld via D-Bus method calls (`getDefaultZone()`, `getActiveZones()`, `getZoneSettings2()`, `getPolicies()`, `getPolicySettings()`, `listServices()`, `getServiceSettings2()`, `getLogDenied()`, `queryPanicMode()`). The D-Bus Monitor also watches `NameOwnerChanged` on `org.freedesktop.DBus` to detect service restarts and trigger immediate data re-reads. Implemented using `godbus/dbus/v5` with `AddMatchSignal()` for signal subscriptions. See Section 4.1novies. +If the lease file is unreadable or contains malformed data, `refreshDHCP()` logs a warning and leaves the tree unchanged (serving last-known-good data). The `GetMetrics()` D-Bus method call uses a 2-second timeout; on timeout or error, the metrics portion is omitted from the tree update while the lease data (if successfully parsed) is still applied. Similarly, `refreshFirewall()` applies a 5-second timeout to the firewalld D-Bus method calls; on timeout or error, the firewall tree retains its previous state. + + +**iw event** +The `iw event -t` command from the `iw` tool, which subscribes to the Linux kernel's nl80211 netlink family and emits timestamped, human-readable text lines on stdout for each 802.11 wireless event. Events include station associations/disassociations, connection/disconnection, channel switches, scan activity, and regulatory domain changes. Unlike yangerd's core netlink subscriptions (which use native Go via `vishvananda/netlink`), `iw event` is run as a subprocess because there is no mature Go nl80211 library. `iw event` does not produce JSON output—it requires custom text parsing. yangerd runs this as the only persistent event-monitoring subprocess (all other event monitoring uses native Go netlink channels). + +**nl80211** +The Linux kernel's netlink-based interface for 802.11 wireless device configuration and monitoring. It is the successor to the older Wireless Extensions (WEXT) interface. The `iw` tool communicates with the kernel via nl80211 generic netlink messages. nl80211 defines over 300 attributes for wireless device state, including station information, scan results, regulatory domains, and channel configuration. yangerd accesses nl80211 indirectly through the `iw` command-line tool rather than implementing a Go-native nl80211 client, avoiding the complexity of parsing the extensive attribute set. + +**ethnl (ethtool netlink)** +The Linux kernel's genetlink family for querying and configuring Ethernet device settings. It provides a structured netlink interface to ethtool functionality that was previously only accessible via ioctl. The family name is `"ethtool"` and it exposes commands for link info, link modes, features, WOL, rings, channels, coalesce, pause, EEE, FEC, module parameters, and more. Unconditionally available on Infix's target kernel (6.18). yangerd uses the ethtool netlink family both for typed queries (via `mdlayher/ethtool`) and for reactive monitoring (via `mdlayher/genetlink` subscription to the monitor multicast group). + +**ETHTOOL_MSG_*_NTF** +Notification message types emitted by the kernel's ethtool netlink family when Ethernet device settings change. Each corresponds to a specific settings domain: `ETHTOOL_MSG_LINKINFO_NTF` (command 28) for link info changes (speed, PHY type, transceiver), `ETHTOOL_MSG_LINKMODES_NTF` (command 29) for link mode changes (advertised speeds, autoneg, duplex), `ETHTOOL_MSG_FEATURES_NTF` for offload feature changes, etc. Statistics and counters do not have NTF message types—they must be polled. + +**genetlink multicast** +A mechanism in the Linux generic netlink subsystem that allows user-space processes to subscribe to named multicast groups and receive asynchronous notifications from the kernel. Each genetlink family can define one or more multicast groups. The ethtool family defines a single group named `"monitor"` (constant `ETHNL_MCGRP_MONITOR`) that delivers all `_NTF` notification messages. yangerd subscribes to this group via `genetlink.Conn.JoinGroup()` to receive ethtool setting change notifications. + +**ETHNL_MCGRP_MONITOR** +The single multicast group defined by the kernel's ethtool genetlink family, named `"monitor"`. Subscribing to this group via `genetlink.Conn.JoinGroup(groupID)` delivers all ethtool notification messages (`ETHTOOL_MSG_LINKINFO_NTF`, `ETHTOOL_MSG_LINKMODES_NTF`, `ETHTOOL_MSG_FEATURES_NTF`, etc.) to the subscriber. The group ID is obtained at runtime by looking up the `"monitor"` group in the ethtool family's multicast group list via `genetlink.Family.Groups`. + + +**Full Interface Re-read** +The pattern where the link event handler (`monitor/link.go`) responds to an RTM_NEWLINK event by writing three queries to the persistent `ip -json -force -batch -` subprocess: `link show dev ` (link state), `-s link show dev ` (link state + hardware counters), and `addr show dev ` (IP addresses). This captures the complete interface state at a single coherent point in time and updates the entire YANG subtree for that interface atomically. The full re-read also triggers a cross-subsystem ethtool re-query via `ethmonitor.RefreshInterface()`, since `ETHNL_MCGRP_MONITOR` does not fire on link up/down events. + +**IPC Indirection** +The architecture pattern where statd does not directly collect operational data but delegates to a separate long-running daemon (yangerd) via a Unix socket. This decouples data collection timing from sysrepo callback timing: collection is reactive (driven by kernel events) or periodic (driven by a scheduler), while sysrepo callbacks are pull-on-demand. The indirection boundary is the socket — statd knows only the request/response protocol, not the collection mechanism. + +**Reactive** +Data that is updated in response to asynchronous events rather than on a fixed timer. Reactive event sources include kernel netlink multicast messages, ethtool genetlink notifications, ZAPI route redistribution messages, bridge netlink triggers, and D-Bus signals (for service-managed data such as DHCP leases and firewall rules). A reactive update path has event-driven latency: the tree entry is updated within microseconds of the event, making the data current without polling. Contrast with *polling*. + +**Polling** +Data collected on a fixed interval by querying a native protocol (e.g. chrony cmdmon), running an external process (e.g. `vtysh`), or reading a file. Polling is necessary for data sources that do not emit asynchronous events (neither kernel events nor D-Bus signals). yangerd uses polling only for data that cannot be obtained reactively (Phase 2 collectors). NTP data is polled via the chrony cmdmon protocol (native Go, no subprocess) because chrony has no event/subscription mechanism -- the protocol is strictly request-response. Polling interval is configurable per-collector via environment variables. + +**RTNLGRP** +Routing Netlink Group — a numbered multicast group in the Linux netlink subsystem. Processes subscribe to one or more groups when opening a `NETLINK_ROUTE` socket. The kernel sends a copy of each matching event to every subscribed socket. yangerd uses four RTNLGRP groups (LINK, NEIGH, IPV4_IFADDR, IPV6_IFADDR) to receive notifications about changes to link state, addresses, and neighbours. + +**ip -json -force -batch -** +A persistent `iproute2` subprocess that reads commands from stdin and produces JSON arrays on stdout. yangerd uses this as its primary mechanism for querying kernel network state for links, addresses, and neighbors, replacing direct netlink socket access via Go libraries. The `-json` flag enables JSON output, `-force` continues past errors (reporting them on stderr), and `-batch -` reads from stdin. Each command written to stdin produces exactly one JSON array on stdout (one per line). This approach delegates all netlink TLV attribute parsing to iproute2, which is always compiled against the running kernel's headers and supports every netlink attribute the kernel exposes. Note: route data is NOT queried via ip batch — route data comes from the ZAPI watcher's streaming connection to zebra's zserv socket (see Section 4.1octies). + +**ip monitor -json (historical)** +The `iproute2` command `ip monitor -json` was originally considered for event monitoring but was found to NOT produce JSON output (confirmed by iproute2 source code analysis: `ip/ipmonitor.c` never calls `new_json_obj()`; see also Ubuntu bug #2116779). yangerd uses native Go netlink subscriptions via `vishvananda/netlink` instead. The `ip` binary is still used for state queries via `ip -json -force -batch -`, where the `-json` flag works correctly. + +**wgctrl** +A pure-Go library (`golang.zx2c4.com/wireguard/wgctrl`) for querying WireGuard interface state via the WireGuard netlink family (`WireGuard genl family`). It returns typed Go structs for each WireGuard interface, its peers, allowed IPs, and handshake timestamps. Used by yangerd's Phase 2 WireGuard collector. + +**vtysh** +FRRouting's integrated virtual shell — a CLI that connects to the control sockets of FRR daemons (ospfd, ripd, bfdd, bgpd) and forwards commands. yangerd's Phase 2 routing protocol collectors invoke `vtysh -c 'show ... json'` to obtain JSON-formatted protocol state. Requires membership in the `frr` Unix group. + +**IPC frame** +The wire unit of the yangerd IPC protocol: a 1-byte protocol version (currently `1`), followed by a 4-byte big-endian unsigned integer encoding the payload length in bytes, followed immediately by that many bytes of JSON-encoded payload. The version byte enables future protocol changes to be detected unambiguously; a receiver that encounters an unknown version must close the connection. The maximum payload size enforced by yangerd is **4 MiB** (4 × 1024 × 1024 bytes); this is a software limit, not a protocol limit. Both request and response use the same framing. Partial reads (TCP-style) are handled by reading in a loop until exactly `length` bytes are accumulated before parsing. + +**Operational datastore** +The sysrepo datastore holding current runtime state, as opposed to the `running`, `candidate`, and `startup` configuration datastores. The operational datastore is read-only from the management protocol perspective (NETCONF ``, RESTCONF GET) and is populated by `sr_oper_get_subscribe()` callbacks registered by statd. yangerd's data ultimately reaches operators via this datastore after statd parses it with libyang and pushes it into sysrepo. + +**sr_oper_get_subscribe** +The sysrepo C API function that registers a callback for operational data subtree queries. Signature: `sr_error_t sr_oper_get_subscribe(sr_session_ctx_t *session, const char *module_name, const char *path, sr_oper_get_items_cb callback, void *private_data, uint32_t opts, sr_subscription_ctx_t **subscription)`. Current legacy statd code calls this 13 times in `subscribe_to_all()`. The target yangerd design covers 14 YANG modules total (13 migration modules + `infix-services:mdns`). Each registered callback calls `ly_add_yangerd_data()` to populate the operational tree from yangerd's IPC response. + +## Troubleshooting Guide + +### IPC Connection Issues +If statd is unable to connect to yangerd, first verify that the daemon is running using the initctl status yangerd command. If the daemon is active, check the permissions on /run/yangerd.sock; it should be owned by root:yangerd with 0660 permissions. If the socket file is missing, check the system logs for any startup errors that might have caused the daemon to exit prematurely. You can also attempt to connect manually using the yangerctl health command to verify that the IPC server is responding to requests. Network namespace isolation can also interfere with socket communication if not correctly configured. + +### Stale Data in the Tree +When a collector fails to update its designated module in the in-memory tree, yangerd retains the last known good value to prevent serving empty data. If you suspect that the data for a particular module like OSPF or LLDP is stale, use yangerctl health to check the timestamp of the last successful collection for that specific collector. A failure in a collector is usually accompanied by a warning message in the system log. Common causes for stale data include incorrect group memberships, unresponsive background services that the collectors depend on (e.g., FRRouting not yet running for OSPF/RIP/BFD), or feature-gated subsystems that are disabled in the build. Verification of kernel module status for protocols like WireGuard is also recommended. + +### Performance Bottlenecks +Although yangerd is designed for high performance, extreme conditions can lead to increased latency. Use the top or htop utility to monitor the central processing unit and memory usage of the yangerd process. If memory usage is unexpectedly high, it may indicate a leak in a collector or an exceptionally large routing table that exceeds typical deployment scales. High central processing unit usage during event storms is mitigated by debouncing, but sustained storms may still impact responsiveness. Monitoring the drop counters in the health endpoint will indicate if the netlink event buffer is being exceeded, suggesting that the system is under more load than it can handle reactively. + +## Detailed IPC Examples + +### Example 1: Full Interface List Query +A client wishing to retrieve the entire operational state for all interfaces sends the following framed request. The length header would be forty-seven bytes to account for the JSON payload. +```json +{"method": "get", "path": "/ietf-interfaces:interfaces"} +``` +The server responds with a success message containing the list of all interfaces and their associated statistics, carrier status, and assigned addresses. The response is encapsulated in the same length-prefixed framing format. + +### Example 2: Routing Table Query +To retrieve only the IPv4 routing table, the path should be specified as follows in the request body. +```json +{"method": "get", "path": "/ietf-routing:routing/ribs/rib[name='ipv4-master']"} +``` +The response will contain a structured representation of all IPv4 routes currently installed in the kernel's routing table, including destination prefixes, next-hop addresses, and outgoing interface names. diff --git a/src/yangerd/go.mod b/src/yangerd/go.mod new file mode 100644 index 000000000..ee8dc783e --- /dev/null +++ b/src/yangerd/go.mod @@ -0,0 +1,24 @@ +module github.com/kernelkit/infix/src/yangerd + +go 1.23.0 + +require ( + github.com/fsnotify/fsnotify v1.9.0 + github.com/godbus/dbus/v5 v5.2.2 + github.com/mdlayher/ethtool v0.4.1 + github.com/mdlayher/genetlink v1.3.2 + github.com/mdlayher/netlink v1.8.0 + github.com/vishvananda/netlink v1.3.1 + golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 +) + +require ( + github.com/google/go-cmp v0.7.0 // indirect + github.com/mdlayher/socket v0.5.1 // indirect + github.com/vishvananda/netns v0.0.5 // indirect + golang.org/x/crypto v0.41.0 // indirect + golang.org/x/net v0.43.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.zx2c4.com/wireguard v0.0.0-20231211153847-12269c276173 // indirect +) diff --git a/src/yangerd/go.sum b/src/yangerd/go.sum new file mode 100644 index 000000000..3422b6466 --- /dev/null +++ b/src/yangerd/go.sum @@ -0,0 +1,34 @@ +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= +github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/mdlayher/ethtool v0.4.1 h1:BSUOGbnNNuPfGRFlKwtONPLJAQiLw8JdgS7E+SuN3/M= +github.com/mdlayher/ethtool v0.4.1/go.mod h1:9XP/78xTcyrfP8HvM0pI5ETaGHAuxrHRs+xkst29NlY= +github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw= +github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o= +github.com/mdlayher/netlink v1.8.0 h1:e7XNIYJKD7hUct3Px04RuIGJbBxy1/c4nX7D5YyvvlM= +github.com/mdlayher/netlink v1.8.0/go.mod h1:UhgKXUlDQhzb09DrCl2GuRNEglHmhYoWAHid9HK3594= +github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= +github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= +github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws= +github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc= +github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= +github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= +github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= +github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= +golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= +golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.zx2c4.com/wireguard v0.0.0-20231211153847-12269c276173 h1:/jFs0duh4rdb8uIfPMv78iAJGcPKDeqAFnaLBropIC4= +golang.zx2c4.com/wireguard v0.0.0-20231211153847-12269c276173/go.mod h1:tkCQ4FQXmpAgYVh++1cq16/dH4QJtmvpRv19DWGAHSA= +golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 h1:3GDAcqdIg1ozBNLgPy4SLT84nfcBjr6rhGtXYtrkWLU= +golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10/go.mod h1:T97yPqesLiNrOYxkwmhMI0ZIlJDm+p0PMR8eRVeR5tQ= diff --git a/src/yangerd/internal/backoff/backoff.go b/src/yangerd/internal/backoff/backoff.go new file mode 100644 index 000000000..a12a2951d --- /dev/null +++ b/src/yangerd/internal/backoff/backoff.go @@ -0,0 +1,51 @@ +// Package backoff provides exponential backoff retry logic with +// context-aware sleep, shared across reactive monitors. +package backoff + +import ( + "context" + "math" + "time" +) + +// Backoff implements exponential backoff with a configurable initial +// delay, maximum delay, and growth factor. +type Backoff struct { + Initial time.Duration + Max time.Duration + Factor float64 +} + +// Default returns a Backoff with the standard yangerd parameters: +// 100ms initial, 30s max, factor 2. +func Default() *Backoff { + return &Backoff{ + Initial: 100 * time.Millisecond, + Max: 30 * time.Second, + Factor: 2.0, + } +} + +// Next returns the next delay value after current. If current is +// zero, Initial is returned. +func (b *Backoff) Next(current time.Duration) time.Duration { + if current <= 0 { + return b.Initial + } + next := time.Duration(math.Min(float64(current)*b.Factor, float64(b.Max))) + if next <= 0 { + return b.Initial + } + return next +} + +// Sleep waits for duration d or until ctx is cancelled, whichever +// comes first. Returns ctx.Err() if the context was cancelled. +func Sleep(ctx context.Context, d time.Duration) error { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(d): + return nil + } +} diff --git a/src/yangerd/internal/bridgebatch/bridgebatch.go b/src/yangerd/internal/bridgebatch/bridgebatch.go new file mode 100644 index 000000000..2d5795f29 --- /dev/null +++ b/src/yangerd/internal/bridgebatch/bridgebatch.go @@ -0,0 +1,223 @@ +// Package bridgebatch manages a persistent `bridge -json -batch -` +// subprocess for querying bridge FDB, VLAN, MDB, and STP state. +// Identical design to ipbatch: mutex-serialized queries, dead/alive +// state management, and exponential backoff restart. +// +// Like ipbatch, `bridge -batch -` produces NO stdout for commands +// that fail. Query uses a read timeout to detect this and kills +// the subprocess so restartLoop can recover. +package bridgebatch + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "math" + "os/exec" + "sync" + "sync/atomic" + "time" +) + +// ErrBatchDead is returned by Query when the subprocess is not running. +var ErrBatchDead = errors.New("bridge batch process is dead") + +const ( + canaryCommand = "vlan show" + + queryTimeout = 5 * time.Second + reconnectInitial = 100 * time.Millisecond + reconnectMax = 30 * time.Second + reconnectFactor = 2.0 +) + +// BridgeBatch wraps a persistent `bridge -json -batch -` subprocess. +type BridgeBatch struct { + cmd *exec.Cmd + stdin io.WriteCloser + lines chan []byte + stderr io.ReadCloser + mu sync.Mutex + alive atomic.Bool + log *slog.Logger + ctx context.Context + cancel context.CancelFunc +} + +// New spawns the bridge batch subprocess. +func New(ctx context.Context, log *slog.Logger) (*BridgeBatch, error) { + ctx, cancel := context.WithCancel(ctx) + b := &BridgeBatch{ + log: log, + ctx: ctx, + cancel: cancel, + } + if err := b.start(); err != nil { + cancel() + return nil, err + } + go b.restartLoop() + return b, nil +} + +func (b *BridgeBatch) start() error { + cmd := exec.CommandContext(b.ctx, "bridge", "-json", "-batch", "-") + stdin, err := cmd.StdinPipe() + if err != nil { + return fmt.Errorf("stdin pipe: %w", err) + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("start bridge batch: %w", err) + } + b.mu.Lock() + b.cmd = cmd + b.stdin = stdin + b.lines = make(chan []byte, 8) + b.stderr = stderr + b.alive.Store(true) + b.mu.Unlock() + go b.readLines(stdout) + go b.drainStderr() + return nil +} + +func (b *BridgeBatch) readLines(r io.Reader) { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 4*1024*1024), 4*1024*1024) + for scanner.Scan() { + line := make([]byte, len(scanner.Bytes())) + copy(line, scanner.Bytes()) + b.lines <- line + } + b.alive.Store(false) +} + +// Query sends a command to the bridge batch process and returns the +// JSON response. +func (b *BridgeBatch) Query(command string) (json.RawMessage, error) { + if !b.alive.Load() { + return nil, ErrBatchDead + } + b.mu.Lock() + defer b.mu.Unlock() + + if !b.alive.Load() { + return nil, ErrBatchDead + } + + if _, err := fmt.Fprintf(b.stdin, "%s\n", command); err != nil { + b.alive.Store(false) + return nil, fmt.Errorf("write command: %w", err) + } + + select { + case line, ok := <-b.lines: + if !ok { + b.alive.Store(false) + return nil, fmt.Errorf("bridge batch process exited") + } + return json.RawMessage(line), nil + case <-time.After(queryTimeout): + b.log.Warn("bridge batch query timeout, killing subprocess", "cmd", command) + b.alive.Store(false) + if b.cmd != nil && b.cmd.Process != nil { + b.cmd.Process.Kill() + } + return nil, fmt.Errorf("timeout waiting for response to: %s", command) + } +} + +// Close terminates the subprocess and cancels the restart loop. +func (b *BridgeBatch) Close() { + b.cancel() + b.mu.Lock() + if b.stdin != nil { + b.stdin.Close() + } + if b.cmd != nil && b.cmd.Process != nil { + b.cmd.Process.Kill() + } + b.alive.Store(false) + b.mu.Unlock() +} + +// Status returns "running" or "restarting". +func (b *BridgeBatch) Status() string { + if b.alive.Load() { + return "running" + } + return "restarting" +} + +func (b *BridgeBatch) drainStderr() { + scanner := bufio.NewScanner(b.stderr) + for scanner.Scan() { + b.log.Warn("bridge batch stderr", "line", scanner.Text()) + } +} + +func (b *BridgeBatch) restartLoop() { + delay := reconnectInitial + for { + select { + case <-b.ctx.Done(): + return + default: + } + + if b.alive.Load() { + select { + case <-b.ctx.Done(): + return + case <-time.After(200 * time.Millisecond): + continue + } + } + + b.log.Info("bridge batch: subprocess died, restarting", "delay", delay) + select { + case <-b.ctx.Done(): + return + case <-time.After(delay): + } + + b.mu.Lock() + if b.cmd != nil && b.cmd.Process != nil { + b.cmd.Process.Kill() + b.cmd.Wait() + } + b.mu.Unlock() + + if err := b.start(); err != nil { + b.log.Warn("bridge batch: restart failed", "err", err) + delay = time.Duration(math.Min( + float64(delay)*reconnectFactor, + float64(reconnectMax))) + continue + } + + if _, err := b.Query(canaryCommand); err != nil { + b.log.Warn("bridge batch: canary query failed", "err", err) + b.alive.Store(false) + delay = time.Duration(math.Min( + float64(delay)*reconnectFactor, + float64(reconnectMax))) + continue + } + + b.log.Info("bridge batch: restarted successfully") + delay = reconnectInitial + } +} diff --git a/src/yangerd/internal/collector/boot.go b/src/yangerd/internal/collector/boot.go new file mode 100644 index 000000000..5c03c6cad --- /dev/null +++ b/src/yangerd/internal/collector/boot.go @@ -0,0 +1,170 @@ +package collector + +import ( + "context" + "encoding/json" + "log" + "strconv" + "strings" +) + +func BootPlatform(fs FileReader) json.RawMessage { + data, err := fs.ReadFile("/etc/os-release") + if err != nil { + log.Printf("boot: os-release: %v", err) + return nil + } + platform := make(map[string]interface{}) + for _, line := range strings.Split(string(data), "\n") { + idx := strings.IndexByte(line, '=') + if idx < 0 { + continue + } + key := line[:idx] + val := strings.Trim(line[idx+1:], "\"") + if mapped, ok := platformKeyMap[key]; ok { + platform[mapped] = val + } + } + result, _ := json.Marshal(map[string]interface{}{"platform": platform}) + return result +} + +func BootSoftware(ctx context.Context, cmd CommandRunner) json.RawMessage { + software := make(map[string]interface{}) + + raucOut, err := cmd.Run(ctx, "rauc", "status", "--detailed", "--output-format=json") + if err == nil { + var raucData map[string]interface{} + if json.Unmarshal(raucOut, &raucData) == nil { + if v, ok := raucData["compatible"]; ok { + software["compatible"] = v + } + if v, ok := raucData["variant"]; ok { + software["variant"] = v + } + if v, ok := raucData["booted"]; ok { + software["booted"] = v + } + bootSoftwareSlots(software, raucData) + } + } + + bootOrder := ReadBootOrder(ctx, cmd) + if bootOrder != nil { + software["boot-order"] = bootOrder + } + + result, _ := json.Marshal(map[string]interface{}{"infix-system:software": software}) + return result +} + +func ReadBootOrder(ctx context.Context, cmd CommandRunner) []string { + out, err := cmd.Run(ctx, "fw_printenv", "BOOT_ORDER") + if err == nil { + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, "BOOT_ORDER") { + parts := strings.SplitN(line, "=", 2) + if len(parts) == 2 { + return strings.Fields(parts[1]) + } + } + } + } + + out, err = cmd.Run(ctx, "grub-editenv", "/mnt/aux/grub/grubenv", "list") + if err == nil { + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, "ORDER") { + parts := strings.SplitN(line, "=", 2) + if len(parts) == 2 { + return strings.Fields(strings.TrimSpace(parts[1])) + } + } + } + } + + return nil +} + +func bootSoftwareSlots(software map[string]interface{}, raucData map[string]interface{}) { + slotsRaw, ok := raucData["slots"] + if !ok { + return + } + slotsArr, ok := slotsRaw.([]interface{}) + if !ok { + return + } + + var slots []interface{} + for _, slotItem := range slotsArr { + slotMap, ok := slotItem.(map[string]interface{}) + if !ok { + continue + } + for name, valRaw := range slotMap { + val, ok := valRaw.(map[string]interface{}) + if !ok { + continue + } + s := map[string]interface{}{ + "name": name, + "bootname": val["bootname"], + "class": val["class"], + "state": val["state"], + } + + slotStatus, _ := val["slot_status"].(map[string]interface{}) + if slotStatus == nil { + slots = append(slots, s) + continue + } + + bundle := make(map[string]interface{}) + if b, ok := slotStatus["bundle"].(map[string]interface{}); ok { + if v := b["compatible"]; v != nil { + bundle["compatible"] = v + } + if v := b["version"]; v != nil { + bundle["version"] = v + } + } + s["bundle"] = bundle + + if ck, ok := slotStatus["checksum"].(map[string]interface{}); ok { + if v := ck["size"]; v != nil { + s["size"] = strconv.FormatInt(int64(toInt(v)), 10) + } + if v := ck["sha256"]; v != nil { + s["sha256"] = v + } + } + + installed := make(map[string]interface{}) + if inst, ok := slotStatus["installed"].(map[string]interface{}); ok { + if v := inst["timestamp"]; v != nil { + installed["datetime"] = v + } + if v := inst["count"]; v != nil { + installed["count"] = toInt(v) + } + } + s["installed"] = installed + + activated := make(map[string]interface{}) + if act, ok := slotStatus["activated"].(map[string]interface{}); ok { + if v := act["timestamp"]; v != nil { + activated["datetime"] = v + } + if v := act["count"]; v != nil { + activated["count"] = toInt(v) + } + } + s["activated"] = activated + + slots = append(slots, s) + } + } + software["slot"] = slots +} diff --git a/src/yangerd/internal/collector/boot_test.go b/src/yangerd/internal/collector/boot_test.go new file mode 100644 index 000000000..9b53c0bf2 --- /dev/null +++ b/src/yangerd/internal/collector/boot_test.go @@ -0,0 +1,236 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" +) + +const ( + testOSRelease = `NAME="Infix" +VERSION_ID="25.01.0" +BUILD_ID="v25.01.0" +ARCHITECTURE="x86_64" +HOME_URL="https://kernelkit.github.io" +` + + testRaucStatus = `{ + "compatible": "Infix x86_64", + "variant": "", + "booted": "rootfs.0", + "slots": [ + { + "rootfs.0": { + "bootname": "A", + "class": "rootfs", + "state": "booted", + "slot_status": { + "bundle": { + "compatible": "Infix x86_64", + "version": "25.01.0" + }, + "checksum": { + "sha256": "abc123", + "size": 134217728 + }, + "installed": { + "timestamp": "2025-01-15T10:30:00Z", + "count": 3 + }, + "activated": { + "timestamp": "2025-01-15T10:31:00Z", + "count": 3 + } + } + } + }, + { + "rootfs.1": { + "bootname": "B", + "class": "rootfs", + "state": "inactive", + "slot_status": { + "bundle": { + "compatible": "Infix x86_64", + "version": "24.10.0" + }, + "checksum": { + "sha256": "def456", + "size": 130000000 + }, + "installed": { + "timestamp": "2024-10-01T08:00:00Z", + "count": 1 + }, + "activated": { + "timestamp": "2024-10-01T08:01:00Z", + "count": 1 + } + } + } + } + ] +}` + + testBootOrder = "BOOT_ORDER=A B\n" +) + +func TestBootPlatform(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/etc/os-release": []byte(testOSRelease), + }, + } + + raw := BootPlatform(fs) + if raw == nil { + t.Fatal("BootPlatform returned nil") + } + + var result map[string]interface{} + if err := json.Unmarshal(raw, &result); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + platform, ok := result["platform"].(map[string]interface{}) + if !ok { + t.Fatal("missing platform key") + } + + checks := map[string]string{ + "os-name": "Infix", + "os-version": "25.01.0", + "os-release": "v25.01.0", + "machine": "x86_64", + } + for key, expected := range checks { + got, ok := platform[key].(string) + if !ok || got != expected { + t.Fatalf("platform[%q]: expected %q, got %v", key, expected, platform[key]) + } + } +} + +func TestBootPlatformMissingFile(t *testing.T) { + fs := &testutil.MockFileReader{Files: map[string][]byte{}} + raw := BootPlatform(fs) + if raw != nil { + t.Fatalf("expected nil for missing os-release, got %s", raw) + } +} + +func TestBootSoftware(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "rauc status --detailed --output-format=json": []byte(testRaucStatus), + "fw_printenv BOOT_ORDER": []byte(testBootOrder), + }, + Errors: map[string]error{}, + } + + raw := BootSoftware(context.Background(), runner) + if raw == nil { + t.Fatal("BootSoftware returned nil") + } + + var result map[string]interface{} + if err := json.Unmarshal(raw, &result); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + sw, ok := result["infix-system:software"].(map[string]interface{}) + if !ok { + t.Fatal("missing infix-system:software key") + } + + if sw["compatible"] != "Infix x86_64" { + t.Fatalf("compatible: expected 'Infix x86_64', got %v", sw["compatible"]) + } + if sw["booted"] != "rootfs.0" { + t.Fatalf("booted: expected 'rootfs.0', got %v", sw["booted"]) + } + + bootOrder, ok := sw["boot-order"].([]interface{}) + if !ok || len(bootOrder) != 2 { + t.Fatalf("expected boot-order [A B], got %v", sw["boot-order"]) + } + if bootOrder[0] != "A" || bootOrder[1] != "B" { + t.Fatalf("boot-order: expected [A B], got %v", bootOrder) + } + + slots, ok := sw["slot"].([]interface{}) + if !ok || len(slots) != 2 { + t.Fatalf("expected 2 slots, got %v", sw["slot"]) + } +} + +func TestBootSoftwareAllCommandsFail(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{ + "rauc status --detailed --output-format=json": fmt.Errorf("not found"), + "fw_printenv BOOT_ORDER": fmt.Errorf("not found"), + "grub-editenv /mnt/aux/grub/grubenv list": fmt.Errorf("not found"), + }, + } + + raw := BootSoftware(context.Background(), runner) + if raw == nil { + t.Fatal("BootSoftware should return non-nil even when all commands fail") + } + + var result map[string]interface{} + json.Unmarshal(raw, &result) + sw := result["infix-system:software"].(map[string]interface{}) + if _, ok := sw["boot-order"]; ok { + t.Fatal("boot-order should not be present when commands fail") + } +} + +func TestReadBootOrderFwPrintenv(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "fw_printenv BOOT_ORDER": []byte("BOOT_ORDER=A B\n"), + }, + Errors: map[string]error{}, + } + + order := ReadBootOrder(context.Background(), runner) + if len(order) != 2 || order[0] != "A" || order[1] != "B" { + t.Fatalf("expected [A B], got %v", order) + } +} + +func TestReadBootOrderGrubFallback(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "grub-editenv /mnt/aux/grub/grubenv list": []byte("ORDER=B A\n"), + }, + Errors: map[string]error{ + "fw_printenv BOOT_ORDER": fmt.Errorf("command not found"), + }, + } + + order := ReadBootOrder(context.Background(), runner) + if len(order) != 2 || order[0] != "B" || order[1] != "A" { + t.Fatalf("expected [B A], got %v", order) + } +} + +func TestReadBootOrderBothFail(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{ + "fw_printenv BOOT_ORDER": fmt.Errorf("not found"), + "grub-editenv /mnt/aux/grub/grubenv list": fmt.Errorf("not found"), + }, + } + + order := ReadBootOrder(context.Background(), runner) + if order != nil { + t.Fatalf("expected nil, got %v", order) + } +} diff --git a/src/yangerd/internal/collector/collector.go b/src/yangerd/internal/collector/collector.go new file mode 100644 index 000000000..8dc2abb26 --- /dev/null +++ b/src/yangerd/internal/collector/collector.go @@ -0,0 +1,55 @@ +// Package collector defines the Collector interface and the RunAll +// scheduler that drives periodic data collection into the Tree. +package collector + +import ( + "context" + "log" + "sync" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// Collector gathers operational data and writes it to the Tree. +type Collector interface { + Name() string + Interval() time.Duration + Collect(ctx context.Context, t *tree.Tree) error +} + +// RunAll starts one goroutine per Collector, each ticking at the +// collector's configured interval. A failed Collect is logged and +// retried on the next tick. All goroutines exit when ctx is cancelled. +func RunAll(ctx context.Context, wg *sync.WaitGroup, t *tree.Tree, collectors []Collector, pokeCh <-chan struct{}) { + for _, c := range collectors { + wg.Add(1) + go runOne(ctx, wg, t, c, pokeCh) + } +} + +func runOne(ctx context.Context, wg *sync.WaitGroup, t *tree.Tree, c Collector, pokeCh <-chan struct{}) { + defer wg.Done() + + if err := c.Collect(ctx, t); err != nil { + log.Printf("collector %s: initial: %v", c.Name(), err) + } + + ticker := time.NewTicker(c.Interval()) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + if err := c.Collect(ctx, t); err != nil { + log.Printf("collector %s: %v", c.Name(), err) + } + case <-pokeCh: + if err := c.Collect(ctx, t); err != nil { + log.Printf("collector %s: poke: %v", c.Name(), err) + } + } + } +} diff --git a/src/yangerd/internal/collector/containers.go b/src/yangerd/internal/collector/containers.go new file mode 100644 index 000000000..7cd2f5091 --- /dev/null +++ b/src/yangerd/internal/collector/containers.go @@ -0,0 +1,501 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "log" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +var sizeRe = regexp.MustCompile(`(?i)^\s*([0-9.]+)\s*([KMGT]?I?B)?\s*$`) + +// ContainerCollector gathers infix-containers operational data. +type ContainerCollector struct { + cmd CommandRunner + fs FileReader + interval time.Duration +} + +// NewContainerCollector creates a ContainerCollector with the given dependencies. +func NewContainerCollector(cmd CommandRunner, fs FileReader, interval time.Duration) *ContainerCollector { + return &ContainerCollector{cmd: cmd, fs: fs, interval: interval} +} + +// Name implements Collector. +func (c *ContainerCollector) Name() string { return "containers" } + +// Interval implements Collector. +func (c *ContainerCollector) Interval() time.Duration { return c.interval } + +// Collect implements Collector. It produces one tree key: +// "infix-containers:containers". +func (c *ContainerCollector) Collect(ctx context.Context, t *tree.Tree) error { + data := c.collectJSON(ctx) + if data == nil { + // No containers: remove the key rather than leaving a bare + // "containers" node, so clients see the feature as absent. + t.Delete("infix-containers:containers") + return nil + } + t.Set("infix-containers:containers", data) + return nil +} + +// collectJSON returns the operational containers subtree, or nil when no +// container exists. Returning nil (not an empty {"container":[]}) lets the +// caller drop the key entirely so an enabled-but-idle container feature does +// not surface as operational data. +func (c *ContainerCollector) collectJSON(ctx context.Context) json.RawMessage { + containers := []interface{}{} + + psList := c.podmanPS(ctx) + for _, ps := range psList { + cont := c.container(ctx, ps) + if cont != nil { + containers = append(containers, cont) + } + } + + if len(containers) == 0 { + return nil + } + + out := map[string]interface{}{ + "container": containers, + } + + data, err := json.Marshal(out) + if err != nil { + return nil + } + return data +} + +// CollectContainers runs a full container collection and returns the +// result as JSON suitable for tree.Set("infix-containers:containers"). +func CollectContainers(cmd CommandRunner, fs FileReader) json.RawMessage { + c := &ContainerCollector{cmd: cmd, fs: fs} + return c.collectJSON(context.TODO()) +} + +func (c *ContainerCollector) podmanPS(ctx context.Context) []map[string]interface{} { + out, err := c.cmd.Run(ctx, "podman", "ps", "-a", "--format=json") + if err != nil { + log.Printf("collector containers: ps: %v", err) + return nil + } + + var list []map[string]interface{} + if err := json.Unmarshal(out, &list); err == nil { + return list + } + + var generic []interface{} + if err := json.Unmarshal(out, &generic); err != nil { + log.Printf("collector containers: ps parse: %v", err) + return nil + } + + for _, item := range generic { + if m, ok := item.(map[string]interface{}); ok { + list = append(list, m) + } + } + + return list +} + +func (c *ContainerCollector) podmanInspect(ctx context.Context, name string) map[string]interface{} { + out, err := c.cmd.Run(ctx, "podman", "inspect", name) + if err != nil { + log.Printf("collector containers: inspect %s: %v", name, err) + return map[string]interface{}{} + } + + var list []map[string]interface{} + if err := json.Unmarshal(out, &list); err == nil && len(list) > 0 { + return list[0] + } + + var generic []interface{} + if err := json.Unmarshal(out, &generic); err == nil { + for _, item := range generic { + if m, ok := item.(map[string]interface{}); ok { + return m + } + } + } + + var single map[string]interface{} + if err := json.Unmarshal(out, &single); err == nil { + return single + } + + log.Printf("collector containers: inspect %s parse: invalid json", name) + return map[string]interface{}{} +} + +func (c *ContainerCollector) resourceStats(ctx context.Context, name string) map[string]interface{} { + out, err := c.cmd.Run(ctx, "podman", "stats", "--no-stream", "--format", "json", "--no-reset", name) + if err != nil { + log.Printf("collector containers: stats %s: %v", name, err) + return nil + } + + var statsList []map[string]interface{} + if err := json.Unmarshal(out, &statsList); err != nil { + var single map[string]interface{} + if err2 := json.Unmarshal(out, &single); err2 != nil { + log.Printf("collector containers: stats %s parse: %v", name, err) + return nil + } + statsList = append(statsList, single) + } + + if len(statsList) == 0 { + return nil + } + + stat := statsList[0] + rusage := make(map[string]interface{}) + + if memUsage, ok := stat["mem_usage"].(string); ok { + parts := strings.SplitN(memUsage, "/", 2) + if len(parts) == 2 { + memKiB := parseSizeKiB(strings.TrimSpace(parts[0])) + rusage["memory"] = strconv.Itoa(memKiB) + } + } + + if cpuPercent, ok := stat["cpu_percent"].(string); ok { + cpuPercent = strings.TrimSpace(strings.TrimSuffix(cpuPercent, "%")) + if cpuVal, err := strconv.ParseFloat(cpuPercent, 64); err == nil { + rusage["cpu"] = fmt.Sprintf("%.2f", cpuVal) + } + } + + if blockIO, ok := stat["block_io"].(string); ok { + parts := strings.SplitN(blockIO, "/", 2) + if len(parts) == 2 { + readKiB := parseSizeKiB(strings.TrimSpace(parts[0])) + writeKiB := parseSizeKiB(strings.TrimSpace(parts[1])) + + bio := make(map[string]interface{}) + if readKiB > 0 { + bio["read"] = strconv.Itoa(readKiB) + } + if writeKiB > 0 { + bio["write"] = strconv.Itoa(writeKiB) + } + rusage["block-io"] = bio + } + } + + if netIO, ok := stat["net_io"].(string); ok { + parts := strings.SplitN(netIO, "/", 2) + if len(parts) == 2 { + rxKiB := parseSizeKiB(strings.TrimSpace(parts[0])) + txKiB := parseSizeKiB(strings.TrimSpace(parts[1])) + + nio := make(map[string]interface{}) + if rxKiB > 0 { + nio["received"] = strconv.Itoa(rxKiB) + } + if txKiB > 0 { + nio["sent"] = strconv.Itoa(txKiB) + } + rusage["net-io"] = nio + } + } + + if pids, ok := stat["pids"]; ok { + pidInt := toInt(pids) + rusage["pids"] = pidInt + } + + if len(rusage) == 0 { + return nil + } + + return rusage +} + +func (c *ContainerCollector) readCgroupLimits(inspect map[string]interface{}) map[string]interface{} { + stateRaw, ok := inspect["State"] + if !ok { + return nil + } + state, ok := stateRaw.(map[string]interface{}) + if !ok { + return nil + } + + cgroupPath, ok := state["CgroupPath"].(string) + if !ok || cgroupPath == "" { + return nil + } + + cgroupBase := "/sys/fs/cgroup" + cgroupPath + memVal := 0 + cpuVal := 0 + + if data, err := c.fs.ReadFile(filepath.Join(cgroupBase, "memory.max")); err == nil { + memVal = parseCgroupMemory(strings.TrimSpace(string(data))) + } + + if data, err := c.fs.ReadFile(filepath.Join(cgroupBase, "cpu.max")); err == nil { + cpuVal = parseCgroupCPU(strings.TrimSpace(string(data))) + } + + if memVal <= 0 && cpuVal <= 0 { + return nil + } + + result := make(map[string]interface{}) + if memVal > 0 { + result["memory"] = strconv.Itoa(memVal) + } + if cpuVal > 0 { + result["cpu"] = cpuVal + } + + return result +} + +func (c *ContainerCollector) network(ps map[string]interface{}, inspect map[string]interface{}) map[string]interface{} { + networkSettingsRaw, hasNetworkSettings := inspect["NetworkSettings"] + if hasNetworkSettings { + if networkSettings, ok := networkSettingsRaw.(map[string]interface{}); ok { + if networksRaw, ok := networkSettings["Networks"]; ok { + if networks, ok := networksRaw.(map[string]interface{}); ok { + if _, ok := networks["host"]; ok { + return map[string]interface{}{"host": true} + } + } + } + } + } + + net := map[string]interface{}{ + "interface": []interface{}{}, + "publish": []interface{}{}, + } + + networks := asStringSlice(ps["Networks"]) + ifaces := net["interface"].([]interface{}) + for _, n := range networks { + ifaces = append(ifaces, map[string]interface{}{"name": n}) + } + net["interface"] = ifaces + + running := strings.EqualFold(asString(ps["State"]), "running") + if !running { + return net + } + + portsRaw, ok := ps["Ports"] + if !ok { + return net + } + + ports, ok := portsRaw.([]interface{}) + if !ok || len(ports) == 0 { + return net + } + + publish := net["publish"].([]interface{}) + for _, portRaw := range ports { + port, ok := portRaw.(map[string]interface{}) + if !ok { + continue + } + + hostIP := asString(port["host_ip"]) + hostPort := asString(port["host_port"]) + if hostPort == "" { + hostPort = strconv.Itoa(toInt(port["host_port"])) + } + containerPort := asString(port["container_port"]) + if containerPort == "" { + containerPort = strconv.Itoa(toInt(port["container_port"])) + } + protocol := asString(port["protocol"]) + + if hostPort == "0" || hostPort == "" || containerPort == "0" || containerPort == "" || protocol == "" { + continue + } + + addr := "" + if hostIP != "" { + addr = hostIP + ":" + } + + publish = append(publish, fmt.Sprintf("%s%s:%s/%s", addr, hostPort, containerPort, protocol)) + } + net["publish"] = publish + + return net +} + +func (c *ContainerCollector) container(ctx context.Context, ps map[string]interface{}) map[string]interface{} { + names := asStringSlice(ps["Names"]) + if len(names) == 0 { + return nil + } + + name := names[0] + running := strings.EqualFold(asString(ps["State"]), "running") + + out := map[string]interface{}{ + "name": name, + "id": asString(ps["Id"]), + "image": asString(ps["Image"]), + "image-id": asString(ps["ImageID"]), + "running": running, + "status": asString(ps["Status"]), + } + + inspect := c.podmanInspect(ctx, name) + + // Report the actual running command line as the config-false + // "cmdline" leaf (an unrestricted string), built from inspect's + // Path + Args like the legacy yanger collector. Do NOT report it + // into the config-true "command" leaf: that leaf has a restrictive + // pattern and a real command line (e.g. one containing "&&" or + // quotes) fails YANG validation, which rejects the entire + // containers subtree on read. + if path := asString(inspect["Path"]); path != "" { + parts := append([]string{path}, asStringSlice(inspect["Args"])...) + out["cmdline"] = strings.Join(parts, " ") + } + + if net := c.network(ps, inspect); len(net) > 0 { + out["network"] = net + } + + if limits := c.readCgroupLimits(inspect); limits != nil { + out["resource-limit"] = limits + } + + if running { + if usage := c.resourceStats(ctx, name); usage != nil { + out["resource-usage"] = usage + } + } + + return out +} + +func asString(v interface{}) string { + s, ok := v.(string) + if ok { + return s + } + return "" +} + +func asStringSlice(v interface{}) []string { + switch vv := v.(type) { + case []string: + return vv + case []interface{}: + out := make([]string, 0, len(vv)) + for _, e := range vv { + if s, ok := e.(string); ok && s != "" { + out = append(out, s) + } + } + return out + case string: + if vv == "" { + return nil + } + return splitLines(vv) + default: + return nil + } +} + +func parseSizeKiB(sizeStr string) int { + if strings.TrimSpace(sizeStr) == "" { + return 0 + } + + m := sizeRe.FindStringSubmatch(strings.ToUpper(strings.TrimSpace(sizeStr))) + if len(m) < 2 { + return 0 + } + + value, err := strconv.ParseFloat(m[1], 64) + if err != nil { + return 0 + } + + unit := "B" + if len(m) >= 3 && m[2] != "" { + unit = strings.ToUpper(m[2]) + } + + multipliers := map[string]float64{ + "B": 1.0 / 1024.0, + "KB": 1000.0 / 1024.0, + "KIB": 1, + "MB": (1000.0 * 1000.0) / 1024.0, + "MIB": 1024, + "GB": (1000.0 * 1000.0 * 1000.0) / 1024.0, + "GIB": 1024 * 1024, + "TB": (1000.0 * 1000.0 * 1000.0 * 1000.0) / 1024.0, + "TIB": 1024 * 1024 * 1024, + } + + mult, ok := multipliers[unit] + if !ok { + mult = 1 + } + + return int(value * mult) +} + +func parseCgroupMemory(memStr string) int { + memStr = strings.TrimSpace(memStr) + if memStr == "" || memStr == "max" { + return 0 + } + + memBytes, err := strconv.ParseUint(memStr, 10, 64) + if err != nil { + return 0 + } + + return int(memBytes / 1024) +} + +func parseCgroupCPU(cpuStr string) int { + cpuStr = strings.TrimSpace(cpuStr) + if cpuStr == "" { + return 0 + } + + parts := strings.Fields(cpuStr) + if len(parts) != 2 || parts[0] == "max" { + return 0 + } + + quota, err := strconv.Atoi(parts[0]) + if err != nil { + return 0 + } + period, err := strconv.Atoi(parts[1]) + if err != nil || period == 0 { + return 0 + } + + return (quota * 1000) / period +} diff --git a/src/yangerd/internal/collector/containers_test.go b/src/yangerd/internal/collector/containers_test.go new file mode 100644 index 000000000..fced4437e --- /dev/null +++ b/src/yangerd/internal/collector/containers_test.go @@ -0,0 +1,452 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +func collectContainers(t *testing.T, runner *testutil.MockRunner, fs *testutil.MockFileReader) map[string]interface{} { + t.Helper() + + c := NewContainerCollector(runner, fs, 30*time.Second) + tr := tree.New() + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect failed: %v", err) + } + + raw := tr.Get("infix-containers:containers") + if raw == nil { + t.Fatal("missing infix-containers:containers in tree") + } + + out := make(map[string]interface{}) + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal containers: %v", err) + } + + return out +} + +func containerList(t *testing.T, data map[string]interface{}) []interface{} { + t.Helper() + + containers, ok := data["container"].([]interface{}) + if !ok { + t.Fatalf("missing container list: %v", data) + } + + return containers +} + +func TestContainerBasicInfo(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + { + "Names": ["web"], + "Id": "abc123", + "Image": "docker.io/library/nginx:latest", + "ImageID": "sha256:image", + "State": "running", + "Status": "Up 2 hours", + "Command": ["nginx", "-g", "daemon off;"], + "Networks": ["podman0"], + "Ports": [] + } + ]`), + "podman inspect web": []byte(`[{"Path":"nginx","Args":["-g","daemon off;"]}]`), + "podman stats --no-stream --format json --no-reset web": []byte(`[]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + + out := collectContainers(t, runner, fs) + containers := containerList(t, out) + if len(containers) != 1 { + t.Fatalf("expected 1 container, got %d", len(containers)) + } + + c := containers[0].(map[string]interface{}) + if c["name"] != "web" { + t.Fatalf("name: expected web, got %v", c["name"]) + } + if c["id"] != "abc123" { + t.Fatalf("id: expected abc123, got %v", c["id"]) + } + if c["image"] != "docker.io/library/nginx:latest" { + t.Fatalf("image mismatch: %v", c["image"]) + } + if c["status"] != "Up 2 hours" { + t.Fatalf("status mismatch: %v", c["status"]) + } + if c["cmdline"] != "nginx -g daemon off;" { + t.Fatalf("cmdline mismatch: %v", c["cmdline"]) + } + if c["running"] != true { + t.Fatalf("running expected true, got %v", c["running"]) + } +} + +func TestContainerHostNetwork(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + { + "Names": ["hostnet"], + "Id": "id1", + "Image": "img", + "ImageID": "sha256:1", + "State": "running", + "Status": "Up", + "Command": ["sleep", "60"], + "Networks": ["podman0"], + "Ports": [{"host_ip":"", "host_port":8080, "container_port":80, "protocol":"tcp"}] + } + ]`), + "podman inspect hostnet": []byte(`[{"NetworkSettings":{"Networks":{"host":{}}}}]`), + "podman stats --no-stream --format json --no-reset hostnet": []byte(`[]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + out := collectContainers(t, runner, fs) + + c := containerList(t, out)[0].(map[string]interface{}) + net, ok := c["network"].(map[string]interface{}) + if !ok { + t.Fatalf("missing network: %v", c) + } + if net["host"] != true { + t.Fatalf("expected host network true, got %v", net["host"]) + } +} + +func TestContainerBridgeNetwork(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + { + "Names": ["bridge"], + "Id": "id2", + "Image": "img", + "ImageID": "sha256:2", + "State": "running", + "Status": "Up", + "Command": ["app"], + "Networks": ["podman0", "br0"], + "Ports": [ + {"host_ip":"127.0.0.1", "host_port":8080, "container_port":80, "protocol":"tcp"}, + {"host_ip":"", "host_port":8443, "container_port":443, "protocol":"tcp"} + ] + } + ]`), + "podman inspect bridge": []byte(`[{"NetworkSettings":{"Networks":{"bridge":{}}}}]`), + "podman stats --no-stream --format json --no-reset bridge": []byte(`[]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + out := collectContainers(t, runner, fs) + + c := containerList(t, out)[0].(map[string]interface{}) + net := c["network"].(map[string]interface{}) + + ifaces := net["interface"].([]interface{}) + if len(ifaces) != 2 { + t.Fatalf("expected 2 interfaces, got %d", len(ifaces)) + } + if ifaces[0].(map[string]interface{})["name"] != "podman0" { + t.Fatalf("interface[0] mismatch: %v", ifaces[0]) + } + + publish := net["publish"].([]interface{}) + if len(publish) != 2 { + t.Fatalf("expected 2 published ports, got %d", len(publish)) + } + if publish[0] != "127.0.0.1:8080:80/tcp" { + t.Fatalf("publish[0] mismatch: %v", publish[0]) + } + if publish[1] != "8443:443/tcp" { + t.Fatalf("publish[1] mismatch: %v", publish[1]) + } +} + +func TestContainerCgroupLimits(t *testing.T) { + cgroupPath := "/machine.slice/libpod-abc.scope" + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + {"Names":["limited"],"Id":"id3","Image":"img","ImageID":"sha256:3","State":"exited","Status":"Exited","Command":["app"],"Networks":[],"Ports":[]} + ]`), + "podman inspect limited": []byte(fmt.Sprintf(`[{"State":{"CgroupPath":%q},"NetworkSettings":{"Networks":{"bridge":{}}}}]`, cgroupPath)), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/sys/fs/cgroup/machine.slice/libpod-abc.scope/memory.max": []byte("1073741824\n"), + "/sys/fs/cgroup/machine.slice/libpod-abc.scope/cpu.max": []byte("200000 100000\n"), + }, + Globs: map[string][]string{}, + } + + out := collectContainers(t, runner, fs) + c := containerList(t, out)[0].(map[string]interface{}) + + limit, ok := c["resource-limit"].(map[string]interface{}) + if !ok { + t.Fatalf("missing resource-limit: %v", c) + } + if limit["memory"] != "1048576" { + t.Fatalf("memory limit expected 1048576, got %v", limit["memory"]) + } + if toInt(limit["cpu"]) != 2000 { + t.Fatalf("cpu limit expected 2000, got %v", limit["cpu"]) + } +} + +func TestContainerCgroupUnlimited(t *testing.T) { + cgroupPath := "/machine.slice/libpod-unlimited.scope" + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + {"Names":["nolimit"],"Id":"id4","Image":"img","ImageID":"sha256:4","State":"exited","Status":"Exited","Command":["app"],"Networks":[],"Ports":[]} + ]`), + "podman inspect nolimit": []byte(fmt.Sprintf(`[{"State":{"CgroupPath":%q},"NetworkSettings":{"Networks":{"bridge":{}}}}]`, cgroupPath)), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/sys/fs/cgroup/machine.slice/libpod-unlimited.scope/memory.max": []byte("max\n"), + "/sys/fs/cgroup/machine.slice/libpod-unlimited.scope/cpu.max": []byte("max 100000\n"), + }, + Globs: map[string][]string{}, + } + + out := collectContainers(t, runner, fs) + c := containerList(t, out)[0].(map[string]interface{}) + if _, ok := c["resource-limit"]; ok { + t.Fatalf("resource-limit should be omitted for unlimited cgroup values: %v", c["resource-limit"]) + } +} + +func TestContainerResourceStats(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + { + "Names":["stats"],"Id":"id5","Image":"img","ImageID":"sha256:5", + "State":"running","Status":"Up","Command":["app"],"Networks":["podman0"],"Ports":[] + } + ]`), + "podman inspect stats": []byte(`[{"NetworkSettings":{"Networks":{"bridge":{}}}}]`), + "podman stats --no-stream --format json --no-reset stats": []byte(`[ + { + "mem_usage":"123.4MB / 1.5GB", + "cpu_percent":"12.34%", + "block_io":"1.2MB / 3.4GB", + "net_io":"1.2MB / 3.4GB", + "pids":5 + } + ]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + out := collectContainers(t, runner, fs) + + c := containerList(t, out)[0].(map[string]interface{}) + usage, ok := c["resource-usage"].(map[string]interface{}) + if !ok { + t.Fatalf("missing resource-usage: %v", c) + } + + if usage["memory"] != "120507" { + t.Fatalf("memory usage expected 120507, got %v", usage["memory"]) + } + if usage["cpu"] != "12.34" { + t.Fatalf("cpu usage expected 12.34, got %v", usage["cpu"]) + } + bio := usage["block-io"].(map[string]interface{}) + if bio["read"] != "1171" { + t.Fatalf("block-io read expected 1171, got %v", bio["read"]) + } + if bio["write"] != "3320312" { + t.Fatalf("block-io write expected 3320312, got %v", bio["write"]) + } + nio := usage["net-io"].(map[string]interface{}) + if nio["received"] != "1171" { + t.Fatalf("net-io received expected 1171, got %v", nio["received"]) + } + if nio["sent"] != "3320312" { + t.Fatalf("net-io sent expected 3320312, got %v", nio["sent"]) + } + if toInt(usage["pids"]) != 5 { + t.Fatalf("pids expected 5, got %v", usage["pids"]) + } +} + +func TestContainerStopped(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + { + "Names":["stopped"],"Id":"id6","Image":"img","ImageID":"sha256:6", + "State":"exited","Status":"Exited (0)","Command":["app"], + "Networks":["podman0"],"Ports":[{"host_ip":"","host_port":8080,"container_port":80,"protocol":"tcp"}] + } + ]`), + "podman inspect stopped": []byte(`[{"NetworkSettings":{"Networks":{"bridge":{}}}}]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + out := collectContainers(t, runner, fs) + + c := containerList(t, out)[0].(map[string]interface{}) + if _, ok := c["resource-usage"]; ok { + t.Fatalf("stopped container must not include resource-usage: %v", c["resource-usage"]) + } + + net := c["network"].(map[string]interface{}) + publish := net["publish"].([]interface{}) + if len(publish) != 0 { + t.Fatalf("stopped container must not include published ports, got %v", publish) + } +} + +func TestContainerMultiple(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "podman ps -a --format=json": []byte(`[ + {"Names":["one"],"Id":"id7","Image":"img1","ImageID":"sha256:7","State":"running","Status":"Up","Command":["a"],"Networks":[],"Ports":[]}, + {"Names":["two"],"Id":"id8","Image":"img2","ImageID":"sha256:8","State":"exited","Status":"Exited","Command":["b"],"Networks":[],"Ports":[]} + ]`), + "podman inspect one": []byte(`[{}]`), + "podman stats --no-stream --format json --no-reset one": []byte(`[]`), + "podman inspect two": []byte(`[{}]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + out := collectContainers(t, runner, fs) + + containers := containerList(t, out) + if len(containers) != 2 { + t.Fatalf("expected 2 containers, got %d", len(containers)) + } + if containers[0].(map[string]interface{})["name"] != "one" { + t.Fatalf("first container name mismatch: %v", containers[0]) + } + if containers[1].(map[string]interface{})["name"] != "two" { + t.Fatalf("second container name mismatch: %v", containers[1]) + } +} + +func TestParseSizeKiB(t *testing.T) { + tests := []struct { + name string + input string + expected int + }{ + {name: "mb", input: "1.5MB", expected: 1464}, + {name: "kb", input: "512kB", expected: 500}, + {name: "gib", input: "2GiB", expected: 2097152}, + {name: "mib", input: "64MiB", expected: 65536}, + {name: "bytes", input: "2048B", expected: 2}, + {name: "empty", input: "", expected: 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseSizeKiB(tt.input) + if got != tt.expected { + t.Errorf("parseSizeKiB(%q): expected %d, got %d", tt.input, tt.expected, got) + } + }) + } +} + +func TestParseCgroupMemory(t *testing.T) { + tests := []struct { + name string + input string + expected int + }{ + {name: "max", input: "max", expected: 0}, + {name: "bytes", input: "1073741824", expected: 1048576}, + {name: "empty", input: "", expected: 0}, + {name: "invalid", input: "abc", expected: 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseCgroupMemory(tt.input) + if got != tt.expected { + t.Errorf("parseCgroupMemory(%q): expected %d, got %d", tt.input, tt.expected, got) + } + }) + } +} + +func TestParseCgroupCPU(t *testing.T) { + tests := []struct { + name string + input string + expected int + }{ + {name: "max", input: "max 100000", expected: 0}, + {name: "limited", input: "50000 100000", expected: 500}, + {name: "empty", input: "", expected: 0}, + {name: "invalid", input: "abc", expected: 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseCgroupCPU(tt.input) + if got != tt.expected { + t.Errorf("parseCgroupCPU(%q): expected %d, got %d", tt.input, tt.expected, got) + } + }) + } +} + +func TestContainerGracefulDegradation(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{ + "podman ps -a --format=json": fmt.Errorf("podman not found"), + }, + } + + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + + c := NewContainerCollector(runner, fs, 30*time.Second) + tr := tree.New() + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect failed: %v", err) + } + + // With no containers the key must be absent, not a bare + // {"container":[]} node -- otherwise an enabled-but-idle container + // feature surfaces as operational data. + if raw := tr.Get("infix-containers:containers"); raw != nil { + t.Fatalf("expected no containers key when podman ps fails, got %s", raw) + } +} diff --git a/src/yangerd/internal/collector/hardware.go b/src/yangerd/internal/collector/hardware.go new file mode 100644 index 000000000..48ed695df --- /dev/null +++ b/src/yangerd/internal/collector/hardware.go @@ -0,0 +1,1123 @@ +package collector + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "net" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/nl80211" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +var hwSensorTypeRe = regexp.MustCompile(`.*_(phy|sfp|fan|temp|sensor|psu|cpu|gpu|memory|disk)\d*$`) +var hwSensorSuffixExtractRe = regexp.MustCompile(`.*_((?:phy|sfp|fan|temp|sensor|psu|cpu|gpu|memory|disk)\d*)$`) +var hwTrailingNumUnderscoreRe = regexp.MustCompile(`_(\d+)$`) +var hwPhyNumRe = regexp.MustCompile(`(\d+)$`) + +// HardwareCollector gathers ietf-hardware operational data. +type HardwareCollector struct { + cmd CommandRunner + fs FileReader + interval time.Duration + + enableWifi bool + enableGPS bool +} + +// NewHardwareCollector creates a HardwareCollector with the given dependencies. +func NewHardwareCollector(cmd CommandRunner, fs FileReader, interval time.Duration, enableWifi, enableGPS bool) *HardwareCollector { + return &HardwareCollector{ + cmd: cmd, + fs: fs, + interval: interval, + enableWifi: enableWifi, + enableGPS: enableGPS, + } +} + +// Name implements Collector. +func (c *HardwareCollector) Name() string { return "hardware" } + +// Interval implements Collector. +func (c *HardwareCollector) Interval() time.Duration { return c.interval } + +// Collect implements Collector. It produces one tree key: +// "ietf-hardware:hardware". +func (c *HardwareCollector) Collect(ctx context.Context, t *tree.Tree) error { + systemjson := c.readSystemJSON() + + components := make([]interface{}, 0) + components = append(components, c.motherboard_component(systemjson)...) + components = append(components, c.vpd_components(systemjson)...) + components = append(components, c.usb_port_components(systemjson)...) + components = append(components, c.hwmon_sensor_components(ctx)...) + components = append(components, c.thermal_sensor_components(ctx)...) + + if c.enableWifi { + components = append(components, c.wifi_radio_components(ctx)...) + } + if c.enableGPS { + components = append(components, c.gps_receiver_components(ctx)...) + } + + hardware := map[string]interface{}{ + "component": components, + } + + if data, err := json.Marshal(hardware); err == nil { + t.Set("ietf-hardware:hardware", data) + } + + return nil +} + +func (c *HardwareCollector) readSystemJSON() map[string]interface{} { + data, err := c.fs.ReadFile("/run/system.json") + if err != nil { + return map[string]interface{}{} + } + + out := make(map[string]interface{}) + if err := json.Unmarshal(data, &out); err != nil { + log.Printf("collector hardware: system.json: %v", err) + return map[string]interface{}{} + } + + return out +} + +func (c *HardwareCollector) motherboard_component(systemjson map[string]interface{}) []interface{} { + if len(systemjson) == 0 { + return nil + } + + component := map[string]interface{}{ + "name": "mainboard", + "class": "iana-hardware:chassis", + "state": map[string]interface{}{ + "admin-state": "unknown", + "oper-state": "enabled", + }, + } + + if v, ok := systemjson["vendor"].(string); ok && v != "" { + component["mfg-name"] = v + } + if v, ok := systemjson["product-name"].(string); ok && v != "" { + component["model-name"] = v + } + if v, ok := systemjson["serial-number"].(string); ok && v != "" { + component["serial-num"] = v + } + if v, ok := systemjson["part-number"].(string); ok && v != "" { + component["hardware-rev"] = v + } + if v, ok := systemjson["mac-address"].(string); ok && v != "" { + component["infix-hardware:phys-address"] = v + } + + return []interface{}{component} +} + +func vpd_vendor_extensions(data interface{}) []interface{} { + raw, ok := data.([]interface{}) + if !ok { + return nil + } + + vendorExtensions := make([]interface{}, 0, len(raw)) + for _, item := range raw { + pair, ok := item.([]interface{}) + if !ok || len(pair) < 2 { + continue + } + vendorExtensions = append(vendorExtensions, map[string]interface{}{ + "iana-enterprise-number": pair[0], + "extension-data": pair[1], + }) + } + + return vendorExtensions +} + +func (c *HardwareCollector) vpd_components(systemjson map[string]interface{}) []interface{} { + vpdRaw, ok := systemjson["vpd"].(map[string]interface{}) + if !ok { + return nil + } + + components := make([]interface{}, 0, len(vpdRaw)) + for _, vpdItemRaw := range vpdRaw { + vpdItem, ok := vpdItemRaw.(map[string]interface{}) + if !ok { + continue + } + + component := map[string]interface{}{ + "class": "infix-hardware:vpd", + "infix-hardware:vpd-data": map[string]interface{}{}, + } + + if board, ok := vpdItem["board"].(string); ok && board != "" { + component["name"] = board + } + + dataRaw, ok := vpdItem["data"].(map[string]interface{}) + if ok { + if mfgDateStr, ok := dataRaw["manufacture-date"].(string); ok && mfgDateStr != "" { + if mfgDate, err := time.Parse("01/02/2006 15:04:05", mfgDateStr); err == nil { + component["mfg-date"] = mfgDate.UTC().Format("2006-01-02T15:04:05Z") + } + } + + if mfg, ok := dataRaw["manufacturer"].(string); ok && mfg != "" { + component["mfg-name"] = mfg + } + if model, ok := dataRaw["product-name"].(string); ok && model != "" { + component["model-name"] = model + } + if serial, ok := dataRaw["serial-number"].(string); ok && serial != "" { + component["serial-num"] = serial + } + + vpdData, ok := component["infix-hardware:vpd-data"].(map[string]interface{}) + if !ok { + vpdData = make(map[string]interface{}) + component["infix-hardware:vpd-data"] = vpdData + } + for key, val := range dataRaw { + if val == nil { + continue + } + if key == "vendor-extension" { + if ext := vpd_vendor_extensions(val); len(ext) > 0 { + vpdData["infix-hardware:vendor-extension"] = ext + } + continue + } + vpdData[key] = val + } + } + + if _, ok := component["name"]; ok { + components = append(components, component) + } + } + + return components +} + +func (c *HardwareCollector) usb_port_components(systemjson map[string]interface{}) []interface{} { + usbPortsRaw, ok := systemjson["usb-ports"].([]interface{}) + if !ok { + return nil + } + + components := make([]interface{}, 0, len(usbPortsRaw)) + for _, usbPortRaw := range usbPortsRaw { + usbPort, ok := usbPortRaw.(map[string]interface{}) + if !ok { + continue + } + + name, ok := usbPort["name"].(string) + if !ok || name == "" { + continue + } + path, ok := usbPort["path"].(string) + if !ok || path == "" { + continue + } + + authorizedDefault, err := c.fs.ReadFile(path + "/authorized_default") + if err != nil { + continue + } + + state := "locked" + if strings.TrimSpace(string(authorizedDefault)) == "1" { + state = "unlocked" + } + + components = append(components, map[string]interface{}{ + "name": name, + "class": "infix-hardware:usb", + "state": map[string]interface{}{ + "admin-state": state, + "oper-state": "enabled", + }, + }) + } + + return components +} + +func normalize_sensor_name(name string) string { + name = strings.TrimSuffix(name, "-thermal") + name = strings.TrimSuffix(name, "_thermal") + + if m := hwSensorSuffixExtractRe.FindStringSubmatch(name); len(m) > 1 { + name = m[1] + } + + name = hwTrailingNumUnderscoreRe.ReplaceAllString(name, "$1") + return name +} + +func humanizeSensorLabel(label string) string { + if label == "" { + return "" + } + parts := strings.Fields(strings.ReplaceAll(label, "_", " ")) + out := make([]string, 0, len(parts)) + for _, part := range parts { + if part == strings.ToUpper(part) { + out = append(out, part) + continue + } + r := []rune(strings.ToLower(part)) + if len(r) == 0 { + continue + } + r[0] = []rune(strings.ToUpper(string(r[0])))[0] + out = append(out, string(r)) + } + return strings.Join(out, " ") +} + +func sensorComponent(name string, value int, valueType, valueScale, label string) map[string]interface{} { + component := map[string]interface{}{ + "name": name, + "class": "iana-hardware:sensor", + "sensor-data": map[string]interface{}{ + "value": value, + "value-type": valueType, + "value-scale": valueScale, + "value-precision": 0, + "value-timestamp": yangDateTime(time.Now()), + "oper-status": "ok", + }, + } + + if d := humanizeSensorLabel(label); d != "" { + component["description"] = d + } + + return component +} + +func (c *HardwareCollector) listDir(ctx context.Context, dir string) ([]string, error) { + out, err := c.cmd.Run(ctx, "ls", dir) + if err != nil { + return nil, err + } + return splitLines(string(out)), nil +} + +func (c *HardwareCollector) readSensorString(path string) (string, bool) { + data, err := c.fs.ReadFile(path) + if err != nil { + return "", false + } + return strings.TrimSpace(string(data)), true +} + +func (c *HardwareCollector) readSensorInt(path string) (int, bool) { + data, err := c.fs.ReadFile(path) + if err != nil { + return 0, false + } + v, err := strconv.Atoi(strings.TrimSpace(string(data))) + if err != nil { + return 0, false + } + return v, true +} + +func sensorName(baseName, sensorNum string) string { + if sensorNum == "1" || sensorNum == "0" { + return baseName + } + return baseName + sensorNum +} + +func (c *HardwareCollector) get_wifi_phy_info(ctx context.Context, client *nl80211.Client) map[string]map[string]interface{} { + phyInfo := make(map[string]map[string]interface{}) + if err := ctx.Err(); err != nil { + return phyInfo + } + + phys, err := client.ListPhys() + if err != nil { + return phyInfo + } + + for _, phy := range phys { + if phy == "" { + continue + } + phyInfo[phy] = map[string]interface{}{ + "band": "Unknown", + "iface": "", + "description": "WiFi Radio", + } + } + + phyNumToName := make(map[string]string) + for phyName := range phyInfo { + m := hwPhyNumRe.FindStringSubmatch(phyName) + if len(m) > 1 { + phyNumToName[m[1]] = phyName + } + } + + devMap, err := client.PhyInterfaces() + if err == nil { + for phyNum, ifaces := range devMap { + phyName, ok := phyNumToName[phyNum] + if !ok { + continue + } + if len(ifaces) == 0 { + continue + } + if entry, ok := phyInfo[phyName]; ok { + entry["iface"] = ifaces[0] + } + } + } + + for phy, info := range phyInfo { + band := strDefault(info["band"], "Unknown") + iface := strDefault(info["iface"], "") + switch { + case iface != "" && band != "Unknown": + info["description"] = "WiFi Radio " + phy + case band != "Unknown": + info["description"] = "WiFi Radio (" + band + ")" + case iface != "": + info["description"] = "WiFi Radio " + phy + default: + info["description"] = "WiFi Radio" + } + } + + return phyInfo +} + +func (c *HardwareCollector) hwmon_sensor_components(ctx context.Context) []interface{} { + components := make([]interface{}, 0) + deviceSensors := make(map[string][]map[string]interface{}) + + hwmonEntries, err := c.listDir(ctx, "/sys/class/hwmon") + if err != nil { + return components + } + + for _, entry := range hwmonEntries { + if !strings.HasPrefix(entry, "hwmon") { + continue + } + hwmonPath := "/sys/class/hwmon/" + entry + + deviceName, ok := c.readSensorString(hwmonPath + "/name") + if !ok || deviceName == "" { + continue + } + if devName, ok := c.readSensorString(hwmonPath + "/device/name"); ok && devName != "" { + deviceName = devName + } + + baseName := normalize_sensor_name(deviceName) + if baseName == "" { + continue + } + + entries, err := c.listDir(ctx, hwmonPath) + if err != nil { + continue + } + + fanFiles := make([]string, 0) + for _, e := range entries { + if strings.HasPrefix(e, "fan") && strings.HasSuffix(e, "_input") { + fanFiles = append(fanFiles, e) + } + } + + for _, e := range entries { + if !strings.HasPrefix(e, "temp") || !strings.HasSuffix(e, "_input") { + continue + } + sensorNum := strings.TrimPrefix(strings.SplitN(e, "_", 2)[0], "temp") + value, ok := c.readSensorInt(hwmonPath + "/" + e) + if !ok { + continue + } + label := "" + sensor := "" + if rawLabel, ok := c.readSensorString(fmt.Sprintf("%s/temp%s_label", hwmonPath, sensorNum)); ok { + label = rawLabel + sensor = baseName + "-" + normalize_sensor_name(rawLabel) + } else { + sensor = sensorName(baseName, sensorNum) + } + deviceSensors[baseName] = append(deviceSensors[baseName], sensorComponent(sensor, value, "celsius", "milli", label)) + } + + for _, e := range fanFiles { + sensorNum := strings.TrimPrefix(strings.SplitN(e, "_", 2)[0], "fan") + value, ok := c.readSensorInt(hwmonPath + "/" + e) + if !ok { + continue + } + label := "" + sensor := "" + if rawLabel, ok := c.readSensorString(fmt.Sprintf("%s/fan%s_label", hwmonPath, sensorNum)); ok { + label = rawLabel + sensor = baseName + "-" + normalize_sensor_name(rawLabel) + } else { + sensor = sensorName(baseName, sensorNum) + } + deviceSensors[baseName] = append(deviceSensors[baseName], sensorComponent(sensor, value, "rpm", "units", label)) + } + + if len(fanFiles) == 0 { + for _, e := range entries { + if !strings.HasPrefix(e, "pwm") { + continue + } + n := strings.TrimPrefix(e, "pwm") + if _, err := strconv.Atoi(n); err != nil { + continue + } + pwmRaw, ok := c.readSensorInt(hwmonPath + "/" + e) + if !ok { + continue + } + sensorNum := n + value := int((float64(pwmRaw) / 255.0) * 100.0 * 1000.0) + label := "PWM Fan" + sensor := "" + if rawLabel, ok := c.readSensorString(fmt.Sprintf("%s/pwm%s_label", hwmonPath, sensorNum)); ok { + label = rawLabel + sensor = baseName + "-" + normalize_sensor_name(rawLabel) + } else { + sensor = sensorName(baseName, sensorNum) + } + deviceSensors[baseName] = append(deviceSensors[baseName], sensorComponent(sensor, value, "other", "milli", label)) + } + } + + for _, e := range entries { + if !strings.HasPrefix(e, "in") || !strings.HasSuffix(e, "_input") { + continue + } + sensorNum := strings.TrimPrefix(strings.SplitN(e, "_", 2)[0], "in") + value, ok := c.readSensorInt(hwmonPath + "/" + e) + if !ok { + continue + } + label := "voltage" + sensor := "" + if rawLabel, ok := c.readSensorString(fmt.Sprintf("%s/in%s_label", hwmonPath, sensorNum)); ok { + label = rawLabel + sensor = baseName + "-" + normalize_sensor_name(rawLabel) + } else { + if sensorNum == "0" { + sensor = baseName + "-voltage" + } else { + sensor = baseName + "-voltage" + sensorNum + } + } + deviceSensors[baseName] = append(deviceSensors[baseName], sensorComponent(sensor, value, "volts-DC", "milli", label)) + } + + for _, e := range entries { + if !strings.HasPrefix(e, "curr") || !strings.HasSuffix(e, "_input") { + continue + } + sensorNum := strings.TrimPrefix(strings.SplitN(e, "_", 2)[0], "curr") + value, ok := c.readSensorInt(hwmonPath + "/" + e) + if !ok { + continue + } + label := "current" + sensor := "" + if rawLabel, ok := c.readSensorString(fmt.Sprintf("%s/curr%s_label", hwmonPath, sensorNum)); ok { + label = rawLabel + sensor = baseName + "-" + normalize_sensor_name(rawLabel) + } else { + if sensorNum == "1" { + sensor = baseName + "-current" + } else { + sensor = baseName + "-current" + sensorNum + } + } + deviceSensors[baseName] = append(deviceSensors[baseName], sensorComponent(sensor, value, "amperes", "milli", label)) + } + + for _, e := range entries { + if !strings.HasPrefix(e, "power") || !strings.HasSuffix(e, "_input") { + continue + } + sensorNum := strings.TrimPrefix(strings.SplitN(e, "_", 2)[0], "power") + value, ok := c.readSensorInt(hwmonPath + "/" + e) + if !ok { + continue + } + label := "power" + sensor := "" + if rawLabel, ok := c.readSensorString(fmt.Sprintf("%s/power%s_label", hwmonPath, sensorNum)); ok { + label = rawLabel + sensor = baseName + "-" + normalize_sensor_name(rawLabel) + } else { + if sensorNum == "1" { + sensor = baseName + "-power" + } else { + sensor = baseName + "-power" + sensorNum + } + } + deviceSensors[baseName] = append(deviceSensors[baseName], sensorComponent(sensor, value, "watts", "micro", label)) + } + } + + for baseName, sensors := range deviceSensors { + if len(sensors) > 1 { + components = append(components, map[string]interface{}{ + "name": baseName, + "class": "iana-hardware:module", + }) + for _, sensor := range sensors { + sensor["parent"] = baseName + components = append(components, sensor) + } + continue + } + for _, sensor := range sensors { + components = append(components, sensor) + } + } + + wifiInfo := make(map[string]map[string]interface{}) + if client, err := nl80211.Dial(); err == nil { + wifiInfo = c.get_wifi_phy_info(ctx, client) + _ = client.Close() + } + for _, componentRaw := range components { + component, ok := componentRaw.(map[string]interface{}) + if !ok { + continue + } + name, ok := component["name"].(string) + if !ok { + continue + } + if strings.HasPrefix(name, "radio") { + if phy, ok := wifiInfo[name]; ok { + if desc, ok := phy["description"].(string); ok && desc != "" { + component["description"] = desc + } + } + } + } + + return components +} + +func (c *HardwareCollector) thermal_sensor_components(ctx context.Context) []interface{} { + components := make([]interface{}, 0) + + entries, err := c.listDir(ctx, "/sys/class/thermal") + if err != nil { + return components + } + + for _, entry := range entries { + if !strings.HasPrefix(entry, "thermal_zone") { + continue + } + zonePath := "/sys/class/thermal/" + entry + zoneType, ok := c.readSensorString(zonePath + "/type") + if !ok || zoneType == "" { + continue + } + temp, ok := c.readSensorInt(zonePath + "/temp") + if !ok { + continue + } + + components = append(components, sensorComponent(normalize_sensor_name(zoneType), temp, "celsius", "milli", "")) + } + + return components +} + +func (c *HardwareCollector) get_survey_data(ctx context.Context, client *nl80211.Client, ifname string) []interface{} { + if err := ctx.Err(); err != nil { + return nil + } + if ifname == "" { + return nil + } + iface, err := net.InterfaceByName(ifname) + if err != nil { + return nil + } + + survey, err := client.Survey(iface.Index) + if err != nil { + return nil + } + + channels := make([]interface{}, 0, len(survey)) + for _, entry := range survey { + channel := map[string]interface{}{ + "frequency": entry["frequency"], + "in-use": entry["in_use"], + } + setIfPresent(channel, "noise", entry, "noise") + setIfPresent(channel, "active-time", entry, "active_time") + setIfPresent(channel, "busy-time", entry, "busy_time") + setIfPresent(channel, "receive-time", entry, "receive_time") + setIfPresent(channel, "transmit-time", entry, "transmit_time") + channels = append(channels, channel) + } + + return channels +} + +func (c *HardwareCollector) get_phy_info(ctx context.Context, client *nl80211.Client, phyName string) map[string]interface{} { + if err := ctx.Err(); err != nil { + return map[string]interface{}{} + } + phyInfo, err := client.PhyInfo(phyName) + if err != nil { + return map[string]interface{}{} + } + + return phyInfo +} + +func convert_iw_phy_info_for_yanger(phyInfo map[string]interface{}) map[string]interface{} { + result := map[string]interface{}{ + "bands": []interface{}{}, + "driver": nil, + "manufacturer": "Unknown", + "max-interfaces": map[string]interface{}{}, + } + + bandsRaw, _ := phyInfo["bands"].([]interface{}) + bands := make([]interface{}, 0, len(bandsRaw)) + for _, bandRaw := range bandsRaw { + band, ok := bandRaw.(map[string]interface{}) + if !ok { + continue + } + bandData := map[string]interface{}{ + "band": strconv.Itoa(toInt(band["band"])), + "name": strDefault(band["name"], "Unknown"), + } + if v, ok := band["ht_capable"].(bool); ok && v { + bandData["ht-capable"] = true + } + if v, ok := band["vht_capable"].(bool); ok && v { + bandData["vht-capable"] = true + } + if v, ok := band["he_capable"].(bool); ok && v { + bandData["he-capable"] = true + } + bands = append(bands, bandData) + } + result["bands"] = bands + + if driver, ok := phyInfo["driver"].(string); ok && driver != "" { + result["driver"] = driver + } + if manufacturer, ok := phyInfo["manufacturer"].(string); ok && manufacturer != "" { + result["manufacturer"] = manufacturer + } + + maxInterfaces := make(map[string]interface{}) + ifCombRaw, _ := phyInfo["interface_combinations"].([]interface{}) + for _, combRaw := range ifCombRaw { + comb, ok := combRaw.(map[string]interface{}) + if !ok { + continue + } + limitsRaw, _ := comb["limits"].([]interface{}) + for _, limitRaw := range limitsRaw { + limit, ok := limitRaw.(map[string]interface{}) + if !ok { + continue + } + typesRaw, _ := limit["types"].([]interface{}) + hasAP := false + for _, t := range typesRaw { + if s, ok := t.(string); ok && s == "AP" { + hasAP = true + break + } + } + if !hasAP { + continue + } + apMax := toInt(limit["max"]) + if cur, ok := maxInterfaces["ap"]; !ok || apMax > toInt(cur) { + maxInterfaces["ap"] = apMax + } + } + } + result["max-interfaces"] = maxInterfaces + + return result +} + +func channelFromFrequency(freq int) (int, bool) { + switch { + case freq >= 2412 && freq <= 2484: + return (freq - 2407) / 5, true + case freq >= 5170 && freq <= 5825: + return (freq - 5000) / 5, true + case freq >= 5955 && freq <= 7115: + return (freq - 5950) / 5, true + default: + return 0, false + } +} + +func (c *HardwareCollector) wifi_radio_components(ctx context.Context) []interface{} { + components := make([]interface{}, 0) + client, err := nl80211.Dial() + if err != nil { + return components + } + defer client.Close() + + wifiInfo := c.get_wifi_phy_info(ctx, client) + + for phyName, phyData := range wifiInfo { + component := map[string]interface{}{ + "name": phyName, + "class": "infix-hardware:wifi", + "description": strDefault(phyData["description"], "WiFi Radio"), + } + + wifiRadioData := make(map[string]interface{}) + iwInfo := c.get_phy_info(ctx, client, phyName) + phyDetails := convert_iw_phy_info_for_yanger(iwInfo) + + if manufacturer := strDefault(phyDetails["manufacturer"], "Unknown"); manufacturer != "Unknown" { + component["mfg-name"] = manufacturer + } + + if bands, ok := phyDetails["bands"].([]interface{}); ok && len(bands) > 0 { + wifiRadioData["bands"] = bands + } + if driver := strDefault(phyDetails["driver"], ""); driver != "" { + wifiRadioData["driver"] = driver + } + if maxIf, ok := phyDetails["max-interfaces"].(map[string]interface{}); ok && len(maxIf) > 0 { + wifiRadioData["max-interfaces"] = maxIf + } + + setIfPresent(wifiRadioData, "max-txpower", iwInfo, "max_txpower") + + supportedChannelsMap := make(map[int]bool) + bandsRaw, _ := iwInfo["bands"].([]interface{}) + for _, bandRaw := range bandsRaw { + band, ok := bandRaw.(map[string]interface{}) + if !ok { + continue + } + freqsRaw, _ := band["frequencies"].([]interface{}) + for _, freqRaw := range freqsRaw { + freq := toInt(freqRaw) + if channel, ok := channelFromFrequency(freq); ok { + supportedChannelsMap[channel] = true + } + } + } + if len(supportedChannelsMap) > 0 { + supported := make([]int, 0, len(supportedChannelsMap)) + for ch := range supportedChannelsMap { + supported = append(supported, ch) + } + sort.Ints(supported) + supportedIface := make([]interface{}, 0, len(supported)) + for _, ch := range supported { + supportedIface = append(supportedIface, ch) + } + wifiRadioData["supported-channels"] = supportedIface + } + + wifiRadioData["num-virtual-interfaces"] = toInt(iwInfo["num_virtual_interfaces"]) + + iface := strDefault(phyData["iface"], "") + if channels := c.get_survey_data(ctx, client, iface); len(channels) > 0 { + wifiRadioData["survey"] = map[string]interface{}{ + "channel": channels, + } + } + + if len(wifiRadioData) > 0 { + component["infix-hardware:wifi-radio"] = wifiRadioData + } + + components = append(components, component) + } + + return components +} + +func gpsd_poll(ctx context.Context) map[string]interface{} { + dialer := &net.Dialer{Timeout: 500 * time.Millisecond} + conn, err := dialer.DialContext(ctx, "tcp", "127.0.0.1:2947") + if err != nil { + return map[string]interface{}{} + } + defer conn.Close() + + _ = conn.SetDeadline(time.Now().Add(500 * time.Millisecond)) + + reader := bufio.NewReader(conn) + _, _ = reader.ReadBytes('\n') + + if _, err := conn.Write([]byte("?WATCH={\"enable\":true,\"json\":true};\n?POLL;\n")); err != nil { + return map[string]interface{}{} + } + + buf := bytes.Buffer{} + for i := 0; i < 5; i++ { + chunk := make([]byte, 4096) + n, err := conn.Read(chunk) + if err != nil || n == 0 { + break + } + buf.Write(chunk[:n]) + for _, line := range splitLines(buf.String()) { + var msg map[string]interface{} + if json.Unmarshal([]byte(line), &msg) != nil { + continue + } + if cls, ok := msg["class"].(string); ok && cls == "POLL" { + return msg + } + } + } + + return map[string]interface{}{} +} + +func countUsedSatellites(sats []interface{}) int { + used := 0 + for _, satRaw := range sats { + sat, ok := satRaw.(map[string]interface{}) + if !ok { + continue + } + if v, ok := sat["used"].(bool); ok && v { + used++ + } + } + return used +} + +func (c *HardwareCollector) gps_receiver_components(ctx context.Context) []interface{} { + components := make([]interface{}, 0) + gpsDevices := make(map[string]map[string]string) + + for i := 0; i < 4; i++ { + devPath := fmt.Sprintf("/dev/gps%d", i) + if _, err := c.cmd.Run(ctx, "ls", devPath); err != nil { + continue + } + actual, err := c.cmd.Run(ctx, "readlink", "-f", devPath) + if err != nil { + continue + } + actualPath := strings.TrimSpace(string(actual)) + if actualPath == "" { + continue + } + gpsDevices[actualPath] = map[string]string{ + "name": fmt.Sprintf("gps%d", i), + "symlink": devPath, + } + } + + if len(gpsDevices) == 0 { + return components + } + + poll := gpsd_poll(ctx) + active := toInt(poll["active"]) + + tpvByDev := make(map[string]map[string]interface{}) + tpvRaw, _ := poll["tpv"].([]interface{}) + for _, itemRaw := range tpvRaw { + item, ok := itemRaw.(map[string]interface{}) + if !ok { + continue + } + dev, _ := item["device"].(string) + if dev != "" { + tpvByDev[dev] = item + } + } + + skyByDev := make(map[string]map[string]interface{}) + skyRaw, _ := poll["sky"].([]interface{}) + for _, itemRaw := range skyRaw { + item, ok := itemRaw.(map[string]interface{}) + if !ok { + continue + } + dev, _ := item["device"].(string) + if dev != "" { + skyByDev[dev] = item + } + } + + for actualPath, dev := range gpsDevices { + name := dev["name"] + symlink := dev["symlink"] + + component := map[string]interface{}{ + "name": name, + "class": "infix-hardware:gps", + "description": "GPS/GNSS Receiver", + } + + gpsData := make(map[string]interface{}) + gpsData["device"] = symlink + + tpv := tpvByDev[actualPath] + if tpv == nil { + tpv = tpvByDev[symlink] + } + if tpv == nil && len(tpvByDev) == 1 { + for _, v := range tpvByDev { + tpv = v + } + } + + sky := skyByDev[actualPath] + if sky == nil { + sky = skyByDev[symlink] + } + if sky == nil && len(skyByDev) == 1 { + for _, v := range skyByDev { + sky = v + } + } + + gpsData["activated"] = active > 0 && len(tpv) > 0 + + if driver, ok := tpv["driver"].(string); ok && driver != "" { + gpsData["driver"] = driver + } + + switch toInt(tpv["mode"]) { + case 2: + gpsData["fix-mode"] = "2d" + case 3: + gpsData["fix-mode"] = "3d" + default: + gpsData["fix-mode"] = "none" + } + + if lat, ok := tpv["lat"]; ok { + gpsData["latitude"] = fmt.Sprintf("%.6f", toFloat64(lat)) + } + if lon, ok := tpv["lon"]; ok { + gpsData["longitude"] = fmt.Sprintf("%.6f", toFloat64(lon)) + } + if alt, ok := tpv["altHAE"]; ok { + gpsData["altitude"] = fmt.Sprintf("%.1f", toFloat64(alt)) + } + + satVis := 0 + satUsed := 0 + if sky != nil { + sats, _ := sky["satellites"].([]interface{}) + if len(sats) > 0 { + satVis = len(sats) + satUsed = countUsedSatellites(sats) + } + if satVis == 0 { + satVis = toInt(zeroIfNil(sky["nSat"])) + if satVis == 0 { + satVis = toInt(zeroIfNil(sky["satellites_visible"])) + } + } + if satUsed == 0 { + satUsed = toInt(zeroIfNil(sky["uSat"])) + if satUsed == 0 { + satUsed = toInt(zeroIfNil(sky["satellites_used"])) + } + } + } + + if satVis == 0 { + satVis = toInt(zeroIfNil(tpv["nSat"])) + if satVis == 0 { + satVis = toInt(zeroIfNil(tpv["satellites_visible"])) + } + } + if satUsed == 0 { + satUsed = toInt(zeroIfNil(tpv["uSat"])) + if satUsed == 0 { + satUsed = toInt(zeroIfNil(tpv["satellites_used"])) + } + } + + if satUsed > satVis { + satVis = satUsed + } + gpsData["satellites-visible"] = satVis + gpsData["satellites-used"] = satUsed + + ppsPath := fmt.Sprintf("/dev/pps%s", strings.TrimPrefix(name, "gps")) + if _, err := c.cmd.Run(ctx, "ls", ppsPath); err == nil { + gpsData["pps-available"] = true + } else { + gpsData["pps-available"] = false + } + + component["infix-hardware:gps-receiver"] = gpsData + components = append(components, component) + } + + return components +} + +func toFloat64(v interface{}) float64 { + switch n := v.(type) { + case float64: + return n + case float32: + return float64(n) + case int: + return float64(n) + case int64: + return float64(n) + case json.Number: + f, _ := n.Float64() + return f + case string: + f, _ := strconv.ParseFloat(n, 64) + return f + default: + return 0 + } +} diff --git a/src/yangerd/internal/collector/hardware_test.go b/src/yangerd/internal/collector/hardware_test.go new file mode 100644 index 000000000..deec42f3d --- /dev/null +++ b/src/yangerd/internal/collector/hardware_test.go @@ -0,0 +1,398 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +func collectHardware(t *testing.T, c *HardwareCollector) []interface{} { + t.Helper() + + tr := tree.New() + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect failed: %v", err) + } + + raw := tr.Get("ietf-hardware:hardware") + if raw == nil { + t.Fatal("missing ietf-hardware:hardware in tree") + } + + var out map[string]interface{} + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal hardware: %v", err) + } + + components, ok := out["component"].([]interface{}) + if !ok { + t.Fatalf("component list missing or invalid: %v", out["component"]) + } + + return components +} + +func getComponentByName(components []interface{}, name string) map[string]interface{} { + for _, c := range components { + m, ok := c.(map[string]interface{}) + if !ok { + continue + } + if m["name"] == name { + return m + } + } + return nil +} + +func containsComponentWithClass(components []interface{}, class string) bool { + for _, c := range components { + m, ok := c.(map[string]interface{}) + if !ok { + continue + } + if m["class"] == class { + return true + } + } + return false +} + +func newHardwareCollector(r *testutil.MockRunner, fs *testutil.MockFileReader) *HardwareCollector { + return NewHardwareCollector(r, fs, 30*time.Second, false, false) +} + +func TestHardwareMotherboard(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{}, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{"vendor":"Acme","product-name":"Router-1","serial-number":"SN123","part-number":"PN99","mac-address":"00:11:22:33:44:55"}`), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + mb := getComponentByName(components, "mainboard") + if mb == nil { + t.Fatal("mainboard component not found") + } + + if mb["class"] != "iana-hardware:chassis" { + t.Fatalf("mainboard class: expected chassis, got %v", mb["class"]) + } + if mb["mfg-name"] != "Acme" || mb["model-name"] != "Router-1" || mb["serial-num"] != "SN123" { + t.Fatalf("mainboard identity fields mismatch: %v", mb) + } + if mb["hardware-rev"] != "PN99" { + t.Fatalf("mainboard hardware-rev mismatch: %v", mb["hardware-rev"]) + } + if mb["infix-hardware:phys-address"] != "00:11:22:33:44:55" { + t.Fatalf("mainboard phys-address mismatch: %v", mb["infix-hardware:phys-address"]) + } + + state, ok := mb["state"].(map[string]interface{}) + if !ok { + t.Fatalf("mainboard state missing: %v", mb["state"]) + } + if state["admin-state"] != "unknown" || state["oper-state"] != "enabled" { + t.Fatalf("mainboard state mismatch: %v", state) + } +} + +func TestHardwareVPD(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{}, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{ + "vpd": { + "slot0": { + "board": "board0", + "data": { + "manufacture-date": "04/11/2026 13:14:15", + "manufacturer": "VPD Inc", + "product-name": "X1", + "serial-number": "VPD-123", + "foo": "bar", + "vendor-extension": [[32473, "aa55"]] + } + } + } + }`), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + vpd := getComponentByName(components, "board0") + if vpd == nil { + t.Fatal("vpd component board0 not found") + } + if vpd["class"] != "infix-hardware:vpd" { + t.Fatalf("vpd class mismatch: %v", vpd["class"]) + } + if vpd["mfg-date"] != "2026-04-11T13:14:15Z" { + t.Fatalf("mfg-date mismatch: %v", vpd["mfg-date"]) + } + if vpd["serial-num"] != "VPD-123" { + t.Fatalf("serial-num mismatch: %v", vpd["serial-num"]) + } + + vpdData, ok := vpd["infix-hardware:vpd-data"].(map[string]interface{}) + if !ok { + t.Fatalf("vpd-data missing: %v", vpd["infix-hardware:vpd-data"]) + } + if vpdData["foo"] != "bar" { + t.Fatalf("vpd-data foo mismatch: %v", vpdData["foo"]) + } + extList, ok := vpdData["infix-hardware:vendor-extension"].([]interface{}) + if !ok || len(extList) != 1 { + t.Fatalf("vendor-extension missing: %v", vpdData["infix-hardware:vendor-extension"]) + } + ext := extList[0].(map[string]interface{}) + if toInt(ext["iana-enterprise-number"]) != 32473 || ext["extension-data"] != "aa55" { + t.Fatalf("vendor-extension mismatch: %v", ext) + } +} + +func TestHardwareUSBPorts(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{}, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{"usb-ports":[{"name":"usb-a","path":"/sys/devices/usb-a"},{"name":"usb-b","path":"/sys/devices/usb-b"}]}`), + "/sys/devices/usb-a/authorized_default": []byte("1\n"), + "/sys/devices/usb-b/authorized_default": []byte("0\n"), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + usbA := getComponentByName(components, "usb-a") + usbB := getComponentByName(components, "usb-b") + if usbA == nil || usbB == nil { + t.Fatalf("usb components missing: usb-a=%v usb-b=%v", usbA, usbB) + } + aState := usbA["state"].(map[string]interface{}) + bState := usbB["state"].(map[string]interface{}) + if aState["admin-state"] != "unlocked" || bState["admin-state"] != "locked" { + t.Fatalf("usb admin-state mismatch: a=%v b=%v", aState, bState) + } +} + +func TestHardwareHwmonTemp(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{ + "ls /sys/class/hwmon": []byte("hwmon0\n"), + "ls /sys/class/hwmon/hwmon0": []byte("name\ntemp1_input\ntemp1_label\n"), + }, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{}`), + "/sys/class/hwmon/hwmon0/name": []byte("cpu_thermal\n"), + "/sys/class/hwmon/hwmon0/temp1_input": []byte("42000\n"), + "/sys/class/hwmon/hwmon0/temp1_label": []byte("cpu_temp\n"), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + if !containsComponentWithClass(components, "iana-hardware:sensor") { + t.Fatalf("expected at least one sensor component: %v", components) + } + sensor := getComponentByName(components, "cpu-temp") + if sensor == nil { + t.Fatalf("expected temp sensor cpu-temp, got: %v", components) + } + sd := sensor["sensor-data"].(map[string]interface{}) + if toInt(sd["value"]) != 42000 || sd["value-type"] != "celsius" || sd["value-scale"] != "milli" { + t.Fatalf("temp sensor-data mismatch: %v", sd) + } +} + +func TestHardwareHwmonFan(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{ + "ls /sys/class/hwmon": []byte("hwmon1\n"), + "ls /sys/class/hwmon/hwmon1": []byte("name\nfan1_input\n"), + }, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{}`), + "/sys/class/hwmon/hwmon1/name": []byte("pwmfan\n"), + "/sys/class/hwmon/hwmon1/fan1_input": []byte("3200\n"), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + sensor := getComponentByName(components, "pwmfan") + if sensor == nil { + t.Fatalf("expected fan sensor pwmfan, got: %v", components) + } + sd := sensor["sensor-data"].(map[string]interface{}) + if toInt(sd["value"]) != 3200 || sd["value-type"] != "rpm" || sd["value-scale"] != "units" { + t.Fatalf("fan sensor-data mismatch: %v", sd) + } +} + +func TestHardwareHwmonVoltage(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{ + "ls /sys/class/hwmon": []byte("hwmon2\n"), + "ls /sys/class/hwmon/hwmon2": []byte("name\nin1_input\nin1_label\n"), + }, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{}`), + "/sys/class/hwmon/hwmon2/name": []byte("ina3221\n"), + "/sys/class/hwmon/hwmon2/in1_input": []byte("12000\n"), + "/sys/class/hwmon/hwmon2/in1_label": []byte("VCC\n"), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + sensor := getComponentByName(components, "ina3221-VCC") + if sensor == nil { + t.Fatalf("expected voltage sensor ina3221-VCC, got: %v", components) + } + if sensor["description"] != "VCC" { + t.Fatalf("expected VCC description, got %v", sensor["description"]) + } + sd := sensor["sensor-data"].(map[string]interface{}) + if toInt(sd["value"]) != 12000 || sd["value-type"] != "volts-DC" || sd["value-scale"] != "milli" { + t.Fatalf("voltage sensor-data mismatch: %v", sd) + } +} + +func TestHardwareHwmonMultiSensor(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{ + "ls /sys/class/hwmon": []byte("hwmon3\n"), + "ls /sys/class/hwmon/hwmon3": []byte("name\ntemp1_input\ntemp1_label\nfan1_input\nfan1_label\ncurr1_input\npower1_input\n"), + }, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{}`), + "/sys/class/hwmon/hwmon3/name": []byte("sfp_2\n"), + "/sys/class/hwmon/hwmon3/temp1_input": []byte("33000\n"), + "/sys/class/hwmon/hwmon3/temp1_label": []byte("temp1\n"), + "/sys/class/hwmon/hwmon3/fan1_input": []byte("2000\n"), + "/sys/class/hwmon/hwmon3/fan1_label": []byte("fan1\n"), + "/sys/class/hwmon/hwmon3/curr1_input": []byte("1500\n"), + "/sys/class/hwmon/hwmon3/power1_input": []byte("2500000\n"), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + parent := getComponentByName(components, "sfp2") + if parent == nil || parent["class"] != "iana-hardware:module" { + t.Fatalf("expected sfp2 parent module, got: %v", parent) + } + + children := 0 + hasCurrent := false + hasPower := false + for _, compRaw := range components { + comp, ok := compRaw.(map[string]interface{}) + if !ok { + continue + } + if comp["parent"] != "sfp2" { + continue + } + children++ + sd, _ := comp["sensor-data"].(map[string]interface{}) + if sd != nil && sd["value-type"] == "amperes" { + hasCurrent = true + } + if sd != nil && sd["value-type"] == "watts" { + hasPower = true + } + } + if children < 4 { + t.Fatalf("expected at least 4 child sensors, got %d", children) + } + if !hasCurrent || !hasPower { + t.Fatalf("expected current and power sensors under parent: current=%v power=%v", hasCurrent, hasPower) + } +} + +func TestHardwareThermalZone(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{ + "ls /sys/class/thermal": []byte("thermal_zone0\n"), + }, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{}`), + "/sys/class/thermal/thermal_zone0/type": []byte("cpu-thermal\n"), + "/sys/class/thermal/thermal_zone0/temp": []byte("39000\n"), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + sensor := getComponentByName(components, "cpu") + if sensor == nil { + t.Fatalf("expected thermal sensor cpu, got %v", components) + } + sd := sensor["sensor-data"].(map[string]interface{}) + if toInt(sd["value"]) != 39000 || sd["value-type"] != "celsius" { + t.Fatalf("thermal sensor mismatch: %v", sd) + } +} + +func TestHardwareNormalizeSensorName(t *testing.T) { + tests := []struct { + in string + want string + }{ + {in: "sfp_2", want: "sfp2"}, + {in: "mt7915_phy0", want: "phy0"}, + {in: "marvell_alaska_tomte_phy7", want: "phy7"}, + {in: "cpu_thermal", want: "cpu"}, + {in: "gpu-thermal", want: "gpu"}, + {in: "pwmfan", want: "pwmfan"}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.in, func(t *testing.T) { + got := normalize_sensor_name(tt.in) + if got != tt.want { + t.Errorf("normalize_sensor_name(%q): expected %q, got %q", tt.in, tt.want, got) + } + }) + } +} + +func TestHardwareGracefulDegradation(t *testing.T) { + runner := &testutil.MockRunner{Results: map[string][]byte{}, Errors: map[string]error{}} + fs := &testutil.MockFileReader{Files: map[string][]byte{}, Globs: map[string][]string{}} + + tr := tree.New() + c := newHardwareCollector(runner, fs) + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect should not fail when all probes fail: %v", err) + } + + raw := tr.Get("ietf-hardware:hardware") + if raw == nil { + t.Fatal("expected ietf-hardware:hardware key even on probe failures") + } + + var out map[string]interface{} + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal hardware: %v", err) + } + components, ok := out["component"].([]interface{}) + if !ok { + t.Fatalf("component list missing: %v", out["component"]) + } + if len(components) != 0 { + t.Fatalf("expected empty component list on total failure, got %d (%v)", len(components), components) + } +} + +func TestHardwareGPSDeviceNotFound(t *testing.T) { + // Bug 5: When /dev/gps* doesn't exist, readlink -f still succeeds + // (returns canonical form of non-existent path). Verify the existence + // check prevents phantom GPS components. + runner := &testutil.MockRunner{Results: map[string][]byte{}, Errors: map[string]error{ + "ls /dev/gps0": fmt.Errorf("No such file or directory"), + "ls /dev/gps1": fmt.Errorf("No such file or directory"), + "ls /dev/gps2": fmt.Errorf("No such file or directory"), + "ls /dev/gps3": fmt.Errorf("No such file or directory"), + }} + fs := &testutil.MockFileReader{Files: map[string][]byte{ + "/run/system.json": []byte(`{}`), + }, Globs: map[string][]string{}} + + components := collectHardware(t, newHardwareCollector(runner, fs)) + for _, c := range components { + m, ok := c.(map[string]interface{}) + if !ok { + continue + } + if m["class"] == "infix-hardware:gps" { + t.Fatalf("phantom GPS component should not exist when /dev/gps* missing: %v", m) + } + } +} diff --git a/src/yangerd/internal/collector/live.go b/src/yangerd/internal/collector/live.go new file mode 100644 index 000000000..764fcf59e --- /dev/null +++ b/src/yangerd/internal/collector/live.go @@ -0,0 +1,208 @@ +package collector + +import ( + "encoding/json" + "strconv" + "strings" + "syscall" + "time" +) + +// LiveSystemState computes the on-demand portion of ietf-system:system-state. +// It reads uptime, current time, memory, load average from procfs and +// filesystem usage via statfs — all computed fresh on each call. +// +// Installer status is handled separately via MergeInstaller to avoid +// shallow-merge clobbering the boot-time software data. +func LiveSystemState(fs FileReader) json.RawMessage { + state := make(map[string]interface{}) + + if clock := liveClock(fs); len(clock) > 0 { + state["clock"] = clock + } + + resource := make(map[string]interface{}) + if mem := liveMemory(fs); len(mem) > 0 { + resource["memory"] = mem + } + if la := liveLoadAvg(fs); len(la) > 0 { + resource["load-average"] = la + } + if filesys := liveFilesystems(); len(filesys) > 0 { + resource["filesystem"] = filesys + } + if len(resource) > 0 { + state["infix-system:resource-usage"] = resource + } + + data, err := json.Marshal(state) + if err != nil { + return nil + } + return data +} + +// MergeInstaller reads the cached software data from the tree and +// overlays the live installer status into it, returning the merged +// infix-system:software object as a top-level system-state fragment. +func MergeInstaller(cached json.RawMessage, inst InstallerStatus) json.RawMessage { + if inst == nil { + return nil + } + installer := liveInstaller(inst) + if len(installer) == 0 { + return nil + } + + var base map[string]json.RawMessage + if len(cached) > 0 { + json.Unmarshal(cached, &base) + } + if base == nil { + base = make(map[string]json.RawMessage) + } + + sw := make(map[string]interface{}) + if raw, ok := base["infix-system:software"]; ok { + json.Unmarshal(raw, &sw) + } + if sw == nil { + sw = make(map[string]interface{}) + } + sw["installer"] = installer + + swJSON, err := json.Marshal(sw) + if err != nil { + return nil + } + + result := map[string]json.RawMessage{ + "infix-system:software": swJSON, + } + out, err := json.Marshal(result) + if err != nil { + return nil + } + return out +} + +func liveInstaller(inst InstallerStatus) map[string]interface{} { + op, lastErr, pct, msg, err := inst.GetInstallStatus() + if err != nil { + return nil + } + installer := make(map[string]interface{}) + if op != "" { + installer["operation"] = op + } + if lastErr != "" { + installer["last-error"] = lastErr + } + if pct > 0 || msg != "" { + progress := make(map[string]interface{}) + if pct > 0 { + progress["percentage"] = pct + } + if msg != "" { + progress["message"] = msg + } + installer["progress"] = progress + } + return installer +} + +func liveClock(fs FileReader) map[string]interface{} { + data, err := fs.ReadFile("/proc/uptime") + if err != nil { + return nil + } + parts := strings.Fields(string(data)) + if len(parts) < 1 { + return nil + } + upSec, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return nil + } + + now := time.Now() + boot := now.Add(-time.Duration(upSec * float64(time.Second))) + + return map[string]interface{}{ + "current-datetime": yangDateTime(now), + "boot-datetime": yangDateTime(boot), + } +} + +func liveMemory(fs FileReader) map[string]interface{} { + data, err := fs.ReadFile("/proc/meminfo") + if err != nil { + return nil + } + + memFields := map[string]string{ + "MemTotal": "total", + "MemFree": "free", + "MemAvailable": "available", + } + + memory := make(map[string]interface{}) + for _, line := range strings.Split(string(data), "\n") { + parts := strings.SplitN(line, ":", 2) + if len(parts) != 2 { + continue + } + key := strings.TrimSpace(parts[0]) + jsonKey, ok := memFields[key] + if !ok { + continue + } + valStr := strings.TrimSpace(parts[1]) + fields := strings.Fields(valStr) + if len(fields) < 1 { + continue + } + memory[jsonKey] = fields[0] + } + return memory +} + +func liveLoadAvg(fs FileReader) map[string]interface{} { + data, err := fs.ReadFile("/proc/loadavg") + if err != nil { + return nil + } + fields := strings.Fields(string(data)) + if len(fields) < 3 { + return nil + } + return map[string]interface{}{ + "load-1min": fields[0], + "load-5min": fields[1], + "load-15min": fields[2], + } +} + +func liveFilesystems() []interface{} { + mounts := []string{"/", "/var", "/cfg"} + var filesystems []interface{} + + for _, mount := range mounts { + var stat syscall.Statfs_t + if err := syscall.Statfs(mount, &stat); err != nil { + continue + } + bsize := uint64(stat.Bsize) + sizeKB := (stat.Blocks * bsize) / 1024 + availKB := (stat.Bavail * bsize) / 1024 + usedKB := sizeKB - (stat.Bfree*bsize)/1024 + + filesystems = append(filesystems, map[string]interface{}{ + "mount-point": mount, + "size": strconv.FormatUint(sizeKB, 10), + "used": strconv.FormatUint(usedKB, 10), + "available": strconv.FormatUint(availKB, 10), + }) + } + return filesystems +} diff --git a/src/yangerd/internal/collector/live_test.go b/src/yangerd/internal/collector/live_test.go new file mode 100644 index 000000000..5daa0b06f --- /dev/null +++ b/src/yangerd/internal/collector/live_test.go @@ -0,0 +1,264 @@ +package collector + +import ( + "encoding/json" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" +) + +func TestLiveClock(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/proc/uptime": []byte("12345.67 23456.78\n"), + }, + } + + before := time.Now().Truncate(time.Second) + clock := liveClock(fs) + after := time.Now().Truncate(time.Second).Add(time.Second) + + if clock == nil { + t.Fatal("expected non-nil clock") + } + + cur, ok := clock["current-datetime"].(string) + if !ok || cur == "" { + t.Fatal("missing current-datetime") + } + parsed, err := time.Parse("2006-01-02T15:04:05-07:00", cur) + if err != nil { + t.Fatalf("invalid datetime format: %v", err) + } + if parsed.Before(before) || parsed.After(after) { + t.Fatalf("current-datetime %v not between %v and %v", parsed, before, after) + } + + boot, ok := clock["boot-datetime"].(string) + if !ok || boot == "" { + t.Fatal("missing boot-datetime") + } + _, err = time.Parse("2006-01-02T15:04:05-07:00", boot) + if err != nil { + t.Fatalf("invalid boot-datetime format: %v", err) + } +} + +func TestLiveClockMissingFile(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{}, + } + if clock := liveClock(fs); clock != nil { + t.Fatalf("expected nil on missing /proc/uptime, got %v", clock) + } +} + +func TestLiveMemory(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/proc/meminfo": []byte("MemTotal: 1024000 kB\nMemFree: 512000 kB\nMemAvailable: 768000 kB\nBuffers: 64000 kB\n"), + }, + } + + mem := liveMemory(fs) + if mem == nil { + t.Fatal("expected non-nil memory") + } + + checks := map[string]string{ + "total": "1024000", + "free": "512000", + "available": "768000", + } + for key, expected := range checks { + got, ok := mem[key].(string) + if !ok || got != expected { + t.Fatalf("memory[%q]: expected %q, got %v", key, expected, mem[key]) + } + } + + if _, has := mem["Buffers"]; has { + t.Fatal("unexpected Buffers field in memory output") + } +} + +func TestLiveMemoryMissingFile(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{}, + } + if mem := liveMemory(fs); mem != nil { + t.Fatalf("expected nil on missing /proc/meminfo, got %v", mem) + } +} + +func TestLiveLoadAvg(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/proc/loadavg": []byte("0.42 0.31 0.15 2/123 4567\n"), + }, + } + + la := liveLoadAvg(fs) + if la == nil { + t.Fatal("expected non-nil load average") + } + + checks := map[string]string{ + "load-1min": "0.42", + "load-5min": "0.31", + "load-15min": "0.15", + } + for key, expected := range checks { + got, ok := la[key].(string) + if !ok || got != expected { + t.Fatalf("load-average[%q]: expected %q, got %v", key, expected, la[key]) + } + } +} + +func TestLiveLoadAvgMissingFile(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{}, + } + if la := liveLoadAvg(fs); la != nil { + t.Fatalf("expected nil on missing /proc/loadavg, got %v", la) + } +} + +func TestLiveSystemState(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/proc/uptime": []byte("100.0 200.0\n"), + "/proc/meminfo": []byte("MemTotal: 2048000 kB\nMemFree: 1024000 kB\nMemAvailable: 1536000 kB\n"), + "/proc/loadavg": []byte("1.00 0.50 0.25 3/200 9999\n"), + }, + } + + raw := LiveSystemState(fs) + if raw == nil { + t.Fatal("expected non-nil LiveSystemState output") + } + + var state map[string]interface{} + if err := json.Unmarshal(raw, &state); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if _, ok := state["clock"]; !ok { + t.Fatal("missing clock in live state") + } + + resource, ok := state["infix-system:resource-usage"].(map[string]interface{}) + if !ok { + t.Fatal("missing infix-system:resource-usage in live state") + } + if _, ok := resource["memory"]; !ok { + t.Fatal("missing memory in resource-usage") + } + if _, ok := resource["load-average"]; !ok { + t.Fatal("missing load-average in resource-usage") + } +} + +func TestLiveSystemStatePartialFailure(t *testing.T) { + fs := &testutil.MockFileReader{ + Files: map[string][]byte{ + "/proc/loadavg": []byte("0.10 0.20 0.30 1/50 1234\n"), + }, + } + + raw := LiveSystemState(fs) + if raw == nil { + t.Fatal("expected non-nil even with partial data") + } + + var state map[string]interface{} + json.Unmarshal(raw, &state) + + if _, ok := state["clock"]; ok { + t.Fatal("clock should be absent when /proc/uptime is missing") + } + + resource := state["infix-system:resource-usage"].(map[string]interface{}) + if _, ok := resource["memory"]; ok { + t.Fatal("memory should be absent when /proc/meminfo is missing") + } + if _, ok := resource["load-average"]; !ok { + t.Fatal("load-average should be present") + } +} + +type mockInstaller struct { + op, lastErr, msg string + pct int + err error +} + +func (m *mockInstaller) GetInstallStatus() (string, string, int, string, error) { + return m.op, m.lastErr, m.pct, m.msg, m.err +} + +func TestMergeInstaller(t *testing.T) { + cached := json.RawMessage(`{"infix-system:software":{"compatible":"infix-x86_64","booted":{"slot":"rootfs.0"}}}`) + inst := &mockInstaller{op: "installing", pct: 45, msg: "Writing rootfs"} + + raw := MergeInstaller(cached, inst) + if raw == nil { + t.Fatal("expected non-nil") + } + + var result map[string]json.RawMessage + json.Unmarshal(raw, &result) + + var sw map[string]interface{} + json.Unmarshal(result["infix-system:software"], &sw) + if sw == nil { + t.Fatal("missing infix-system:software") + } + if sw["compatible"] != "infix-x86_64" { + t.Fatalf("cached 'compatible' was lost: %v", sw["compatible"]) + } + if sw["booted"] == nil { + t.Fatal("cached 'booted' was lost") + } + installer, ok := sw["installer"].(map[string]interface{}) + if !ok { + t.Fatal("missing installer") + } + if installer["operation"] != "installing" { + t.Fatalf("operation = %v, want 'installing'", installer["operation"]) + } + progress, ok := installer["progress"].(map[string]interface{}) + if !ok { + t.Fatal("missing progress") + } + if toInt(progress["percentage"]) != 45 { + t.Fatalf("percentage = %v, want 45", progress["percentage"]) + } + if progress["message"] != "Writing rootfs" { + t.Fatalf("message = %v", progress["message"]) + } +} + +func TestMergeInstallerNilCached(t *testing.T) { + inst := &mockInstaller{op: "idle"} + raw := MergeInstaller(nil, inst) + if raw == nil { + t.Fatal("expected non-nil even with nil cached") + } + var result map[string]json.RawMessage + json.Unmarshal(raw, &result) + var sw map[string]interface{} + json.Unmarshal(result["infix-system:software"], &sw) + if sw["installer"] == nil { + t.Fatal("missing installer") + } +} + +func TestMergeInstallerNilInst(t *testing.T) { + cached := json.RawMessage(`{"infix-system:software":{"compatible":"infix-x86_64"}}`) + if raw := MergeInstaller(cached, nil); raw != nil { + t.Fatalf("expected nil with nil installer, got %s", raw) + } +} diff --git a/src/yangerd/internal/collector/ntp.go b/src/yangerd/internal/collector/ntp.go new file mode 100644 index 000000000..e8b1a8374 --- /dev/null +++ b/src/yangerd/internal/collector/ntp.go @@ -0,0 +1,542 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "math" + "strconv" + "strings" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// NTPCollector gathers ietf-ntp operational data by running chronyc +// commands (sources, sourcestats, tracking, serverstats) and ss to +// detect the NTP listening port. +type NTPCollector struct { + cmd CommandRunner + interval time.Duration +} + +// NewNTPCollector creates an NTPCollector with the given dependencies. +func NewNTPCollector(cmd CommandRunner, interval time.Duration) *NTPCollector { + return &NTPCollector{cmd: cmd, interval: interval} +} + +// Name implements Collector. +func (c *NTPCollector) Name() string { return "ntp" } + +// Interval implements Collector. +func (c *NTPCollector) Interval() time.Duration { return c.interval } + +// Collect implements Collector. It produces two tree keys: +// - "ietf-ntp:ntp" — associations, clock state, server status, and +// server statistics (RFC 9249). +// - "ietf-system:system-state" — merged infix-system:ntp/sources/source +// list with address, mode, state, stratum and poll for each chrony +// source (Infix augmentation of ietf-system). +func (c *NTPCollector) Collect(ctx context.Context, t *tree.Tree) error { + // Run chronyc sources once and share between addAssociations and addSources. + sourcesOut, _ := c.cmd.Run(ctx, "chronyc", "-c", "sources") + + ntp := make(map[string]interface{}) + + c.addAssociations(ctx, ntp, sourcesOut) + c.addClockState(ctx, ntp) + c.addServerStatus(ctx, ntp) + c.addServerStats(ctx, ntp) + + if len(ntp) > 0 { + if data, err := json.Marshal(ntp); err == nil { + t.Set("ietf-ntp:ntp", data) + } + } else { + // chronyd is not running (NTP unconfigured, or disabled by a + // config change). Drop the key so data from a previous run does + // not linger -- yangerd outlives config resets, so stale state + // would otherwise survive until restart. + t.Delete("ietf-ntp:ntp") + } + + // Always refresh the Infix NTP sources under system-state, even when + // empty. Otherwise a source that disappears from chrony (e.g. a DHCP + // lease without option 42, or NTP turned off) lingers as stale + // operational data -- a phantom "selected" server chronyc no longer + // reports. Merge only overwrites the keys it is given, so we must + // hand it an empty source list to clear a previously-populated one. + sources := c.addSources(sourcesOut) + if sources == nil { + sources = map[string]interface{}{ + "sources": map[string]interface{}{ + "source": []interface{}{}, + }, + } + } + if data, err := json.Marshal(map[string]interface{}{ + "infix-system:ntp": sources, + }); err == nil { + t.Merge("ietf-system:system-state", data) + } + + return nil +} + +// addAssociations parses chronyc sources and sourcestats CSV output +// into the associations/association list. +// +// chronyc -c sources format (comma-separated): +// +// [0] Mode: ^ server, = peer, # refclock (skipped) +// [1] State: * selected, + candidate, - outlier, ? unusable, x falseticker, ~ unstable +// [2] Address (IP) +// [3] Stratum +// [4] Poll interval (log2 seconds) +// [5] Reach (octal reachability register) +// [6] LastRx (seconds since last response) +// [7] Last offset (seconds) +// [8] Offset at last update (seconds) +// [9] Error estimate (seconds) +// +// chronyc -c sourcestats format: +// +// [0] Address +// [1] NP +// [2] NR +// [3] Span +// [4] Frequency (ppm) +// [5] Freq Skew (ppm) +// [6] Offset (seconds) +// [7] Std Dev (seconds) +func (c *NTPCollector) addAssociations(ctx context.Context, ntp map[string]interface{}, sourcesOut []byte) { + if len(sourcesOut) == 0 { + return + } + + // Build stats map from sourcestats for offset/dispersion + statsMap := make(map[string]map[string]string) + statsOut, err := c.cmd.Run(ctx, "chronyc", "-c", "sourcestats") + if err == nil { + for _, line := range splitLines(string(statsOut)) { + parts := strings.Split(line, ",") + if len(parts) >= 8 { + statsMap[parts[0]] = map[string]string{ + "offset": parts[6], + "std_dev": parts[7], + } + } + } + } + + modeMap := map[string]string{ + "^": "ietf-ntp:client", + "=": "ietf-ntp:active", + "#": "ietf-ntp:broadcast-client", + } + + var associations []interface{} + for _, line := range splitLines(string(sourcesOut)) { + parts := strings.Split(line, ",") + if len(parts) < 10 { + continue + } + + modeIndicator := parts[0] + // Skip reference clocks — they have names like "GPS", not IP addresses + if modeIndicator == "#" { + continue + } + + stateIndicator := parts[1] + address := parts[2] + stratum, err := strconv.Atoi(parts[3]) + if err != nil { + continue + } + // YANG requires stratum 1..16 + if stratum < 1 || stratum > 16 { + continue + } + + assoc := map[string]interface{}{ + "address": address, + "local-mode": modeMap[modeIndicator], + "isconfigured": true, + "stratum": stratum, + } + if assoc["local-mode"] == nil { + assoc["local-mode"] = "ietf-ntp:client" + } + + // Current sync source + if stateIndicator == "*" { + assoc["prefer"] = true + } + + // Reachability register (octal → decimal) + if reach, err := strconv.ParseInt(parts[5], 8, 32); err == nil { + assoc["reach"] = int(reach) + } + + // Poll interval (log2 seconds) + if poll, err := strconv.Atoi(parts[4]); err == nil { + assoc["poll"] = poll + } + + // Time since last packet + if now, err := strconv.Atoi(parts[6]); err == nil { + assoc["now"] = now + } + + // Offset: prefer sourcestats if available, else sources[7] + // Convert seconds → milliseconds with 3 fraction digits + if stats, ok := statsMap[address]; ok { + if offsetSec, err := strconv.ParseFloat(stats["offset"], 64); err == nil { + assoc["offset"] = fmt.Sprintf("%.3f", offsetSec*1000.0) + } + } else if offsetSec, err := strconv.ParseFloat(parts[7], 64); err == nil { + assoc["offset"] = fmt.Sprintf("%.3f", offsetSec*1000.0) + } + + // Delay: error estimate from sources[9], seconds → milliseconds + if delaySec, err := strconv.ParseFloat(parts[9], 64); err == nil { + assoc["delay"] = fmt.Sprintf("%.3f", math.Abs(delaySec)*1000.0) + } + + // Dispersion: std_dev from sourcestats, seconds → milliseconds + if stats, ok := statsMap[address]; ok { + if dispSec, err := strconv.ParseFloat(stats["std_dev"], 64); err == nil { + assoc["dispersion"] = fmt.Sprintf("%.3f", dispSec*1000.0) + } + } + + associations = append(associations, assoc) + } + + if len(associations) > 0 { + ntp["associations"] = map[string]interface{}{ + "association": associations, + } + } +} + +// sourceStateMap maps chronyc source-state indicators to YANG +// infix-system source-state enum values. +var sourceStateMap = map[string]string{ + "*": "selected", + "+": "candidate", + "-": "outlier", + "?": "unusable", + "x": "falseticker", + "~": "unstable", +} + +// sourceModeMap maps chronyc mode indicators to YANG +// infix-system source-mode enum values. +var sourceModeMap = map[string]string{ + "^": "server", + "=": "peer", + "#": "local-clock", +} + +// addSources builds the infix-system:ntp/sources/source list from +// chronyc -c sources output. Reference clocks (mode #) and sources +// with invalid stratum are skipped, matching the Python yanger +// ietf_system.py add_ntp() behaviour. +func (c *NTPCollector) addSources(sourcesOut []byte) map[string]interface{} { + if len(sourcesOut) == 0 { + return nil + } + + var sources []interface{} + for _, line := range splitLines(string(sourcesOut)) { + parts := strings.Split(line, ",") + if len(parts) < 10 { + continue + } + + modeIndicator := parts[0] + if modeIndicator == "#" { + continue + } + + stratum, err := strconv.Atoi(parts[3]) + if err != nil || stratum > 16 { + continue + } + + mode := sourceModeMap[modeIndicator] + if mode == "" { + mode = "server" + } + state := sourceStateMap[parts[1]] + if state == "" { + continue + } + + src := map[string]interface{}{ + "address": parts[2], + "mode": mode, + "state": state, + "stratum": stratum, + } + if poll, err := strconv.Atoi(parts[4]); err == nil { + src["poll"] = poll + } + + sources = append(sources, src) + } + + if len(sources) == 0 { + return nil + } + + return map[string]interface{}{ + "sources": map[string]interface{}{ + "source": sources, + }, + } +} + +// addClockState parses chronyc tracking CSV output into the clock-state +// container. +// +// chronyc -c tracking format (comma-separated): +// +// [0] Ref-ID (hex IP, e.g. "C0A80101") +// [1] Ref-ID name (e.g. "router.local") +// [2] Stratum +// [3] Ref time (seconds since epoch) +// [4] System time offset (seconds) +// [5] Last offset (seconds) +// [6] RMS offset (seconds) +// [7] Frequency (ppm) +// [8] Residual frequency (ppm) +// [9] Skew (ppm) +// [10] Root delay (seconds) +// [11] Root dispersion (seconds) +// [12] Update interval (seconds) +// [13] Leap status (e.g. "Normal", "Not synchronised") +func (c *NTPCollector) addClockState(ctx context.Context, ntp map[string]interface{}) { + out, err := c.cmd.Run(ctx, "chronyc", "-c", "tracking") + if err != nil || len(out) == 0 { + return + } + + lines := splitLines(string(out)) + if len(lines) == 0 { + return + } + + parts := strings.Split(lines[0], ",") + if len(parts) < 14 { + return + } + + ss := make(map[string]interface{}) + + // Stratum: chronyd uses 0 for "not synchronized", YANG requires 1-16 + stratumRaw, _ := strconv.Atoi(parts[2]) + stratum := stratumRaw + if stratum == 0 { + stratum = 16 + } + + if stratum == 16 { + ss["clock-state"] = "ietf-ntp:unsynchronized" + } else { + ss["clock-state"] = "ietf-ntp:synchronized" + } + ss["clock-stratum"] = stratum + + // Reference ID + refidIP := parts[0] + refidName := parts[1] + if refidName != "" { + // NTP refids are always 4 bytes; pad/truncate to exactly 4 chars + padded := refidName + " " + ss["clock-refid"] = padded[:4] + } else if len(refidIP) == 8 { + a, e1 := strconv.ParseInt(refidIP[0:2], 16, 32) + b, e2 := strconv.ParseInt(refidIP[2:4], 16, 32) + cv, e3 := strconv.ParseInt(refidIP[4:6], 16, 32) + d, e4 := strconv.ParseInt(refidIP[6:8], 16, 32) + if e1 == nil && e2 == nil && e3 == nil && e4 == nil { + ss["clock-refid"] = fmt.Sprintf("%d.%d.%d.%d", a, b, cv, d) + } else { + ss["clock-refid"] = refidIP + } + } else if refidIP != "" { + ss["clock-refid"] = refidIP + } else { + ss["clock-refid"] = "0.0.0.0" + } + + // Frequencies (ppm → Hz with nominal 1GHz) + if freqPPM, err := strconv.ParseFloat(parts[7], 64); err == nil { + nominal := 1000000000.0 + actual := nominal * (1.0 + freqPPM/1000000.0) + ss["nominal-freq"] = fmt.Sprintf("%.4f", nominal) + ss["actual-freq"] = fmt.Sprintf("%.4f", actual) + } + + // Clock precision (fixed estimate, ~1µs) + ss["clock-precision"] = -20 + + // Clock offset (system-time column[4], seconds → milliseconds) + if offsetSec, err := strconv.ParseFloat(parts[4], 64); err == nil { + ss["clock-offset"] = fmt.Sprintf("%.3f", offsetSec*1000.0) + } + + // Root delay (seconds → milliseconds) + if rootDelay, err := strconv.ParseFloat(parts[10], 64); err == nil { + ss["root-delay"] = fmt.Sprintf("%.3f", rootDelay*1000.0) + } + + // Root dispersion (seconds → milliseconds) + if rootDisp, err := strconv.ParseFloat(parts[11], 64); err == nil { + ss["root-dispersion"] = fmt.Sprintf("%.3f", rootDisp*1000.0) + } + + // Reference time (epoch seconds → ISO 8601) + if refTime, err := strconv.ParseFloat(parts[3], 64); err == nil && refTime > 0 { + sec := int64(refTime) + nsec := int64((refTime - float64(sec)) * 1e9) + t := time.Unix(sec, nsec).UTC() + ss["reference-time"] = t.Format("2006-01-02T15:04:05.000") + "Z" + } + + // Sync state based on leap status + leapStatus := strings.TrimSpace(parts[13]) + if leapStatus == "Not synchronised" || stratum == 16 { + ss["sync-state"] = "ietf-ntp:clock-never-set" + } else { + ss["sync-state"] = "ietf-ntp:clock-synchronized" + } + + // Infix augmentations + if lastOffset, err := strconv.ParseFloat(parts[5], 64); err == nil { + ss["infix-ntp:last-offset"] = fmt.Sprintf("%.9f", lastOffset) + } + if rmsOffset, err := strconv.ParseFloat(parts[6], 64); err == nil { + ss["infix-ntp:rms-offset"] = fmt.Sprintf("%.9f", rmsOffset) + } + if residualFreq, err := strconv.ParseFloat(parts[8], 64); err == nil { + ss["infix-ntp:residual-freq"] = fmt.Sprintf("%.3f", residualFreq) + } + if skew, err := strconv.ParseFloat(parts[9], 64); err == nil { + ss["infix-ntp:skew"] = fmt.Sprintf("%.3f", skew) + } + if updateInterval, err := strconv.ParseFloat(parts[12], 64); err == nil { + ss["infix-ntp:update-interval"] = fmt.Sprintf("%.1f", updateInterval) + } + + ntp["clock-state"] = map[string]interface{}{ + "system-status": ss, + } +} + +// addServerStatus adds the refclock-master stratum and listening port. +// Must be called after addClockState so clock-state is available. +func (c *NTPCollector) addServerStatus(ctx context.Context, ntp map[string]interface{}) { + // Reuse stratum from clock-state if already populated + if cs, ok := ntp["clock-state"].(map[string]interface{}); ok { + if ss, ok := cs["system-status"].(map[string]interface{}); ok { + if stratum, ok := ss["clock-stratum"]; ok { + ntp["refclock-master"] = map[string]interface{}{ + "master-stratum": stratum, + } + } + } + } + + // Detect NTP listening port via ss + ssOut, err := c.cmd.Run(ctx, "ss", "-ulnp") + if err != nil { + return + } + + for _, line := range splitLines(string(ssOut)) { + if !strings.Contains(line, "chronyd") { + continue + } + // Skip loopback (command socket) + if strings.Contains(line, "127.0.0.1") || strings.Contains(line, "[::1]") { + continue + } + + fields := strings.Fields(line) + if len(fields) >= 5 { + localAddr := fields[3] + idx := strings.LastIndex(localAddr, ":") + if idx >= 0 { + portStr := localAddr[idx+1:] + if port, err := strconv.Atoi(portStr); err == nil { + ntp["port"] = port + break + } + } + } + } +} + +// addServerStats parses chronyc serverstats CSV into ntp-statistics. +// +// chronyc -c serverstats format: +// +// [0] NTP packets received +// [1] NTP packets dropped +// [2] Cmd packets received +// [3] Cmd packets dropped +// [4] Client log size active +// [5] Client log memory +// [6] Rate limit drops +// [7] NTP packets sent +// [8] NTP packets send fail +func (c *NTPCollector) addServerStats(ctx context.Context, ntp map[string]interface{}) { + out, err := c.cmd.Run(ctx, "chronyc", "-c", "serverstats") + if err != nil || len(out) == 0 { + return + } + + lines := splitLines(string(out)) + if len(lines) == 0 { + return + } + + parts := strings.Split(lines[0], ",") + if len(parts) < 9 { + return + } + + stats := make(map[string]interface{}) + if v, err := strconv.Atoi(parts[0]); err == nil { + stats["packet-received"] = v + } + if v, err := strconv.Atoi(parts[1]); err == nil { + stats["packet-dropped"] = v + } + if v, err := strconv.Atoi(parts[7]); err == nil { + stats["packet-sent"] = v + } + if v, err := strconv.Atoi(parts[8]); err == nil { + stats["packet-sent-fail"] = v + } + + if len(stats) > 0 { + ntp["ntp-statistics"] = stats + } +} + +// splitLines splits text into non-empty lines. +func splitLines(text string) []string { + var lines []string + for _, line := range strings.Split(text, "\n") { + line = strings.TrimSpace(line) + if line != "" { + lines = append(lines, line) + } + } + return lines +} diff --git a/src/yangerd/internal/collector/ntp_test.go b/src/yangerd/internal/collector/ntp_test.go new file mode 100644 index 000000000..75a7629fc --- /dev/null +++ b/src/yangerd/internal/collector/ntp_test.go @@ -0,0 +1,494 @@ +package collector + +import ( + "context" + "encoding/json" + "strings" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const testChronycSources = `^,*,10.0.0.1,2,6,377,32,+0.000123,,0.000456 +^,+,10.0.0.2,3,7,377,64,-0.000789,,0.001234 +=,-,10.0.0.3,4,6,177,128,+0.001500,,0.002000 +#,,GPS,1,4,377,16,+0.000001,,0.000010 +^,?,10.0.0.4,0,6,0,0,+0.000000,,0.000000` + +const testChronycSourcestats = `10.0.0.1,15,8,256,0.001,-0.002,+0.000050,0.000100 +10.0.0.2,12,6,128,-0.005,0.003,-0.000300,0.000200 +10.0.0.3,8,4,64,0.010,-0.008,+0.001000,0.000500` + +const testChronycTracking = `C0A80001,router.local,2,1700000000.123,0.000045,-0.000012,0.000025,-1.500,0.003,0.050,0.004500,0.001200,64.0,Normal` + +const testChronycTrackingUnsync = `00000000,,0,0.000,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000000,0.000000,0.0,Not synchronised` + +const testChronycServerstats = `1000,5,200,3,1,8192,2,950,10` + +const testSSOutput = `State Recv-Q Send-Q Local Address:Port Peer Address:Port Process +UNCONN 0 0 0.0.0.0:123 0.0.0.0:* users:(("chronyd",pid=5441,fd=5)) +UNCONN 0 0 127.0.0.1:323 0.0.0.0:* users:(("chronyd",pid=5441,fd=1)) +` + +func newNTPCollector(runner *testutil.MockRunner) *NTPCollector { + return NewNTPCollector(runner, 60*time.Second) +} + +func ntpCollect(t *testing.T, runner *testutil.MockRunner) (map[string]interface{}, *tree.Tree) { + t.Helper() + c := newNTPCollector(runner) + tr := tree.New() + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect failed: %v", err) + } + raw := tr.Get("ietf-ntp:ntp") + if raw == nil { + t.Fatal("missing ietf-ntp:ntp in tree") + } + var out map[string]interface{} + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal ntp: %v", err) + } + return out, tr +} + +func fullNTPRunner() *testutil.MockRunner { + return &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c sources": []byte(testChronycSources), + "chronyc -c sourcestats": []byte(testChronycSourcestats), + "chronyc -c tracking": []byte(testChronycTracking), + "chronyc -c serverstats": []byte(testChronycServerstats), + "ss -ulnp": []byte(testSSOutput), + }, + Errors: map[string]error{}, + } +} + +func TestNTPCollectorNameAndInterval(t *testing.T) { + c := newNTPCollector(fullNTPRunner()) + if c.Name() != "ntp" { + t.Fatalf("expected name 'ntp', got %q", c.Name()) + } + if c.Interval() != 60*time.Second { + t.Fatalf("expected interval 60s, got %v", c.Interval()) + } +} + +func TestNTPAssociations(t *testing.T) { + out, _ := ntpCollect(t, fullNTPRunner()) + assocContainer := out["associations"].(map[string]interface{}) + assocs := assocContainer["association"].([]interface{}) + + // 5 sources minus GPS refclock (#) minus stratum-0 (10.0.0.4) = 3 + if len(assocs) != 3 { + t.Fatalf("expected 3 associations (refclock+stratum0 filtered), got %d", len(assocs)) + } + + byAddr := make(map[string]map[string]interface{}) + for _, a := range assocs { + am := a.(map[string]interface{}) + byAddr[am["address"].(string)] = am + } + + // 10.0.0.1: selected server (*), stratum 2 + a1 := byAddr["10.0.0.1"] + if a1 == nil { + t.Fatal("missing association for 10.0.0.1") + } + if a1["local-mode"] != "ietf-ntp:client" { + t.Fatalf("10.0.0.1 mode: expected ietf-ntp:client, got %v", a1["local-mode"]) + } + if a1["prefer"] != true { + t.Fatalf("10.0.0.1 should be preferred (selected source)") + } + if toInt(a1["stratum"]) != 2 { + t.Fatalf("10.0.0.1 stratum: expected 2, got %v", a1["stratum"]) + } + // Reach: 377 octal = 255 decimal + if toInt(a1["reach"]) != 255 { + t.Fatalf("10.0.0.1 reach: expected 255, got %v", a1["reach"]) + } + // Offset should come from sourcestats (0.000050s → 0.050ms) + if a1["offset"] != "0.050" { + t.Fatalf("10.0.0.1 offset: expected '0.050', got %v", a1["offset"]) + } + // Dispersion from sourcestats std_dev (0.000100s → 0.100ms) + if a1["dispersion"] != "0.100" { + t.Fatalf("10.0.0.1 dispersion: expected '0.100', got %v", a1["dispersion"]) + } + + // 10.0.0.3: peer mode (=) + a3 := byAddr["10.0.0.3"] + if a3 == nil { + t.Fatal("missing association for 10.0.0.3") + } + if a3["local-mode"] != "ietf-ntp:active" { + t.Fatalf("10.0.0.3 mode: expected ietf-ntp:active, got %v", a3["local-mode"]) + } + // Should NOT be preferred (state is -) + if _, hasPrefer := a3["prefer"]; hasPrefer { + t.Fatal("10.0.0.3 should not be preferred") + } +} + +func TestNTPSources(t *testing.T) { + _, tr := ntpCollect(t, fullNTPRunner()) + + raw := tr.Get("ietf-system:system-state") + if raw == nil { + t.Fatal("missing ietf-system:system-state in tree") + } + var state map[string]interface{} + if err := json.Unmarshal(raw, &state); err != nil { + t.Fatalf("unmarshal system-state: %v", err) + } + + ntpData, ok := state["infix-system:ntp"].(map[string]interface{}) + if !ok { + t.Fatal("missing infix-system:ntp in system-state") + } + sourcesContainer, ok := ntpData["sources"].(map[string]interface{}) + if !ok { + t.Fatal("missing sources in infix-system:ntp") + } + sources, ok := sourcesContainer["source"].([]interface{}) + if !ok { + t.Fatal("missing source list in sources") + } + + // 5 sources minus GPS refclock (#) = 4 (stratum 0 is kept) + if len(sources) != 4 { + t.Fatalf("expected 4 sources, got %d", len(sources)) + } + + byAddr := make(map[string]map[string]interface{}) + for _, s := range sources { + sm := s.(map[string]interface{}) + byAddr[sm["address"].(string)] = sm + } + + // 10.0.0.1: selected server + s1 := byAddr["10.0.0.1"] + if s1 == nil { + t.Fatal("missing source 10.0.0.1") + } + if s1["state"] != "selected" { + t.Fatalf("10.0.0.1 state: expected selected, got %v", s1["state"]) + } + if s1["mode"] != "server" { + t.Fatalf("10.0.0.1 mode: expected server, got %v", s1["mode"]) + } + if toInt(s1["stratum"]) != 2 { + t.Fatalf("10.0.0.1 stratum: expected 2, got %v", s1["stratum"]) + } + if toInt(s1["poll"]) != 6 { + t.Fatalf("10.0.0.1 poll: expected 6, got %v", s1["poll"]) + } + + // 10.0.0.2: candidate server + s2 := byAddr["10.0.0.2"] + if s2 == nil { + t.Fatal("missing source 10.0.0.2") + } + if s2["state"] != "candidate" { + t.Fatalf("10.0.0.2 state: expected candidate, got %v", s2["state"]) + } + if s2["mode"] != "server" { + t.Fatalf("10.0.0.2 mode: expected server, got %v", s2["mode"]) + } + + // 10.0.0.3: outlier peer + s3 := byAddr["10.0.0.3"] + if s3 == nil { + t.Fatal("missing source 10.0.0.3") + } + if s3["state"] != "outlier" { + t.Fatalf("10.0.0.3 state: expected outlier, got %v", s3["state"]) + } + if s3["mode"] != "peer" { + t.Fatalf("10.0.0.3 mode: expected peer, got %v", s3["mode"]) + } + + // 10.0.0.4: unreachable server (stratum 0) + s4 := byAddr["10.0.0.4"] + if s4 == nil { + t.Fatal("missing source 10.0.0.4") + } + if s4["state"] != "unusable" { + t.Fatalf("10.0.0.4 state: expected unusable, got %v", s4["state"]) + } + if toInt(s4["stratum"]) != 0 { + t.Fatalf("10.0.0.4 stratum: expected 0, got %v", s4["stratum"]) + } +} + +// ntpSourceCount returns the number of infix-system:ntp sources in the +// system-state tree key, failing the test if the subtree is missing. +func ntpSourceCount(t *testing.T, tr *tree.Tree) int { + t.Helper() + + raw := tr.Get("ietf-system:system-state") + if raw == nil { + t.Fatal("system-state not set") + } + + var data map[string]json.RawMessage + if err := json.Unmarshal(raw, &data); err != nil { + t.Fatalf("unmarshal system-state: %v", err) + } + ntpRaw, ok := data["infix-system:ntp"] + if !ok { + t.Fatal("infix-system:ntp not present") + } + + var ntp struct { + Sources struct { + Source []json.RawMessage `json:"source"` + } `json:"sources"` + } + if err := json.Unmarshal(ntpRaw, &ntp); err != nil { + t.Fatalf("unmarshal infix-system:ntp: %v", err) + } + return len(ntp.Sources.Source) +} + +func TestNTPSourcesEmpty(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c tracking": []byte(testChronycTracking), + }, + Errors: map[string]error{}, + } + + c := newNTPCollector(runner) + tr := tree.New() + c.Collect(context.Background(), tr) + + // With no chrony sources the collector must still write an empty + // source list, so a previously-reported source cannot linger as + // stale operational data. + if n := ntpSourceCount(t, tr); n != 0 { + t.Fatalf("expected empty NTP source list, got %d", n) + } +} + +// When chronyd stops (NTP disabled via config reset), the whole +// ietf-ntp:ntp key must disappear -- yangerd outlives config resets, so +// a key that is only ever Set when non-empty would keep stale data from +// a previous run forever. +func TestNTPTreeKeyRemovedWhenChronydStops(t *testing.T) { + tr := tree.New() + + running := &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c sources": []byte(testChronycSources), + "chronyc -c tracking": []byte(testChronycTracking), + }, + Errors: map[string]error{}, + } + newNTPCollector(running).Collect(context.Background(), tr) + if tr.Get("ietf-ntp:ntp") == nil { + t.Fatal("expected ietf-ntp:ntp after first poll") + } + + stopped := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{}, + } + newNTPCollector(stopped).Collect(context.Background(), tr) + if data := tr.Get("ietf-ntp:ntp"); data != nil { + t.Fatalf("stale ietf-ntp:ntp survived chronyd stop: %s", data) + } +} + +// A source that disappears from chrony (e.g. a DHCP NTP server that is no +// longer offered) must be cleared from operational, not left stale. +func TestNTPSourcesClearedWhenGone(t *testing.T) { + tr := tree.New() + + withSources := &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c sources": []byte(testChronycSources), + "chronyc -c tracking": []byte(testChronycTracking), + }, + Errors: map[string]error{}, + } + newNTPCollector(withSources).Collect(context.Background(), tr) + if ntpSourceCount(t, tr) == 0 { + t.Fatal("expected NTP sources after first poll") + } + + noSources := &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c tracking": []byte(testChronycTracking), + }, + Errors: map[string]error{}, + } + newNTPCollector(noSources).Collect(context.Background(), tr) + if n := ntpSourceCount(t, tr); n != 0 { + t.Fatalf("stale NTP sources not cleared: got %d, want 0", n) + } +} + +func TestNTPClockStateSynchronized(t *testing.T) { + out, _ := ntpCollect(t, fullNTPRunner()) + cs := out["clock-state"].(map[string]interface{}) + ss := cs["system-status"].(map[string]interface{}) + + if ss["clock-state"] != "ietf-ntp:synchronized" { + t.Fatalf("clock-state: expected synchronized, got %v", ss["clock-state"]) + } + if toInt(ss["clock-stratum"]) != 2 { + t.Fatalf("clock-stratum: expected 2, got %v", ss["clock-stratum"]) + } + // refid from name "router.local" → padded/truncated to 4 chars: "rout" + if ss["clock-refid"] != "rout" { + t.Fatalf("clock-refid: expected 'rout', got %v", ss["clock-refid"]) + } + if ss["sync-state"] != "ietf-ntp:clock-synchronized" { + t.Fatalf("sync-state: expected clock-synchronized, got %v", ss["sync-state"]) + } + if toInt(ss["clock-precision"]) != -20 { + t.Fatalf("clock-precision: expected -20, got %v", ss["clock-precision"]) + } + + // Verify nominal/actual freq strings + if ss["nominal-freq"] != "1000000000.0000" { + t.Fatalf("nominal-freq: expected '1000000000.0000', got %v", ss["nominal-freq"]) + } + + // Infix augmentations + if ss["infix-ntp:update-interval"] != "64.0" { + t.Fatalf("update-interval: expected '64.0', got %v", ss["infix-ntp:update-interval"]) + } + + // Reference time should be an ISO timestamp + refTime, ok := ss["reference-time"].(string) + if !ok || !strings.HasPrefix(refTime, "2023-") { + t.Fatalf("reference-time should be 2023-* ISO timestamp, got %v", ss["reference-time"]) + } +} + +func TestNTPClockStateUnsynchronized(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c tracking": []byte(testChronycTrackingUnsync), + }, + Errors: map[string]error{}, + } + + c := newNTPCollector(runner) + tr := tree.New() + c.Collect(context.Background(), tr) + + raw := tr.Get("ietf-ntp:ntp") + if raw == nil { + t.Fatal("expected ietf-ntp:ntp even when unsynchronized") + } + var out map[string]interface{} + json.Unmarshal(raw, &out) + + cs := out["clock-state"].(map[string]interface{}) + ss := cs["system-status"].(map[string]interface{}) + + if ss["clock-state"] != "ietf-ntp:unsynchronized" { + t.Fatalf("clock-state: expected unsynchronized, got %v", ss["clock-state"]) + } + // Stratum 0 → 16 + if toInt(ss["clock-stratum"]) != 16 { + t.Fatalf("clock-stratum: expected 16 (mapped from 0), got %v", ss["clock-stratum"]) + } + if ss["sync-state"] != "ietf-ntp:clock-never-set" { + t.Fatalf("sync-state: expected clock-never-set, got %v", ss["sync-state"]) + } +} + +func TestNTPServerPort(t *testing.T) { + out, _ := ntpCollect(t, fullNTPRunner()) + + // Should find port 123 from the non-loopback ss line + if toInt(out["port"]) != 123 { + t.Fatalf("port: expected 123, got %v", out["port"]) + } +} + +func TestNTPRefclockMaster(t *testing.T) { + out, _ := ntpCollect(t, fullNTPRunner()) + master := out["refclock-master"].(map[string]interface{}) + if toInt(master["master-stratum"]) != 2 { + t.Fatalf("master-stratum: expected 2, got %v", master["master-stratum"]) + } +} + +func TestNTPServerStats(t *testing.T) { + out, _ := ntpCollect(t, fullNTPRunner()) + stats := out["ntp-statistics"].(map[string]interface{}) + + if toInt(stats["packet-received"]) != 1000 { + t.Fatalf("packet-received: expected 1000, got %v", stats["packet-received"]) + } + if toInt(stats["packet-dropped"]) != 5 { + t.Fatalf("packet-dropped: expected 5, got %v", stats["packet-dropped"]) + } + if toInt(stats["packet-sent"]) != 950 { + t.Fatalf("packet-sent: expected 950, got %v", stats["packet-sent"]) + } + if toInt(stats["packet-sent-fail"]) != 10 { + t.Fatalf("packet-sent-fail: expected 10, got %v", stats["packet-sent-fail"]) + } +} + +func TestNTPAllCommandsFail(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{}, + } + + c := newNTPCollector(runner) + tr := tree.New() + err := c.Collect(context.Background(), tr) + if err != nil { + t.Fatalf("Collect should not error when chronyc unavailable: %v", err) + } + if tr.Get("ietf-ntp:ntp") != nil { + t.Fatal("should not set ietf-ntp:ntp when nothing to report") + } +} + +func TestNTPRefidHexToIPv4(t *testing.T) { + // When refid name is empty, hex ref-ID should be converted to dotted notation + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "chronyc -c tracking": []byte("C0A80101,,2,1700000000.0,0.000001,0.000000,0.000000,-1.0,0.0,0.0,0.001,0.001,64.0,Normal"), + }, + Errors: map[string]error{}, + } + + c := newNTPCollector(runner) + tr := tree.New() + c.Collect(context.Background(), tr) + + var out map[string]interface{} + json.Unmarshal(tr.Get("ietf-ntp:ntp"), &out) + cs := out["clock-state"].(map[string]interface{}) + ss := cs["system-status"].(map[string]interface{}) + + // C0A80101 → 192.168.1.1 + if ss["clock-refid"] != "192.168.1.1" { + t.Fatalf("clock-refid: expected '192.168.1.1', got %v", ss["clock-refid"]) + } +} + +func TestSplitLines(t *testing.T) { + input := "line1\n\nline2\n \nline3\n" + got := splitLines(input) + if len(got) != 3 { + t.Fatalf("expected 3 lines, got %d: %v", len(got), got) + } + if got[0] != "line1" || got[1] != "line2" || got[2] != "line3" { + t.Fatalf("unexpected lines: %v", got) + } +} diff --git a/src/yangerd/internal/collector/routing.go b/src/yangerd/internal/collector/routing.go new file mode 100644 index 000000000..410a2e4b3 --- /dev/null +++ b/src/yangerd/internal/collector/routing.go @@ -0,0 +1,748 @@ +package collector + +import ( + "context" + "encoding/json" + "regexp" + "strconv" + "strings" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// RoutingCollector gathers ietf-routing operational data by merging +// OSPF, RIP, and BFD control-plane protocols into a single tree key. +// Each protocol contributes entries to the control-plane-protocol list +// under ietf-routing:routing. +type RoutingCollector struct { + cmd CommandRunner + interval time.Duration +} + +// NewRoutingCollector creates a RoutingCollector with the given dependencies. +func NewRoutingCollector(cmd CommandRunner, interval time.Duration) *RoutingCollector { + return &RoutingCollector{cmd: cmd, interval: interval} +} + +// Name implements Collector. +func (c *RoutingCollector) Name() string { return "routing" } + +// Interval implements Collector. +func (c *RoutingCollector) Interval() time.Duration { return c.interval } + +// Collect implements Collector. It produces one tree key: +// "ietf-routing:routing" containing merged OSPF, RIP, and BFD data. +func (c *RoutingCollector) Collect(ctx context.Context, t *tree.Tree) error { + var protocols []interface{} + + if p := c.collectOSPF(ctx); p != nil { + protocols = append(protocols, p) + } + if p := c.collectRIP(ctx); p != nil { + protocols = append(protocols, p) + } + if p := c.collectBFD(ctx); p != nil { + protocols = append(protocols, p) + } + + if len(protocols) == 0 { + return nil + } + + routing := map[string]interface{}{ + "control-plane-protocols": map[string]interface{}{ + "control-plane-protocol": protocols, + }, + } + + if data, err := json.Marshal(routing); err == nil { + t.Merge("ietf-routing:routing", data) + } + return nil +} + +// --- OSPF --- + +var ospfIfaceStateMap = map[string]string{ + "DependUpon": "down", + "Down": "down", + "Waiting": "waiting", + "Loopback": "loopback", + "Point-To-Point": "point-to-point", + "DROther": "dr-other", + "Backup": "bdr", + "DR": "dr", +} + +func frrToIETFNeighborState(state string) string { + parts := strings.SplitN(state, "/", 2) + s := parts[0] + if s == "TwoWay" { + return "2-way" + } + return strings.ToLower(s) +} + +func frrToIETFNeighborRole(role string) string { + if role == "Backup" { + return "BDR" + } + return role +} + +func ospfNetworkType(nt string, p2mpNonBroadcast bool) string { + switch nt { + case "POINTOPOINT": + return "point-to-point" + case "BROADCAST": + return "broadcast" + case "POINTOMULTIPOINT": + if p2mpNonBroadcast { + return "point-to-multipoint" + } + return "hybrid" + case "NBMA": + return "non-broadcast" + default: + return "" + } +} + +func (c *RoutingCollector) collectOSPF(ctx context.Context) interface{} { + out, err := c.cmd.Run(ctx, "/usr/libexec/statd/ospf-status") + if err != nil { + return nil + } + + var data map[string]interface{} + if json.Unmarshal(out, &data) != nil || len(data) == 0 { + return nil + } + + ospf := map[string]interface{}{ + "ietf-ospf:areas": map[string]interface{}{}, + } + + if rid, ok := data["routerId"]; ok { + ospf["ietf-ospf:router-id"] = rid + } + ospf["ietf-ospf:address-family"] = "ipv4" + + areas := make([]interface{}, 0) + areasRaw, _ := data["areas"].(map[string]interface{}) + for areaID, valRaw := range areasRaw { + values, ok := valRaw.(map[string]interface{}) + if !ok { + continue + } + + area := map[string]interface{}{ + "ietf-ospf:area-id": areaID, + "ietf-ospf:interfaces": map[string]interface{}{}, + } + if at, ok := values["area-type"]; ok && at != nil { + area["ietf-ospf:area-type"] = at + } + + interfaces := make([]interface{}, 0) + ifacesRaw, _ := values["interfaces"].([]interface{}) + for _, ifaceRaw := range ifacesRaw { + iface, ok := ifaceRaw.(map[string]interface{}) + if !ok { + continue + } + + intf := map[string]interface{}{ + "name": iface["name"], + "ietf-ospf:neighbors": map[string]interface{}{}, + } + + setIfPresent(intf, "dr-router-id", iface, "drId") + setIfPresent(intf, "dr-ip-addr", iface, "drAddress") + setIfPresent(intf, "bdr-router-id", iface, "bdrId") + setIfPresent(intf, "bdr-ip-addr", iface, "bdrAddress") + + if v, ok := iface["timerPassiveIface"]; ok && v != nil { + intf["passive"] = true + } else { + intf["passive"] = false + } + + if v, ok := iface["ospfEnabled"]; ok { + intf["enabled"] = v + } + + if nt, ok := iface["networkType"].(string); ok { + p2mpNB, _ := iface["p2mpNonBroadcast"].(bool) + if it := ospfNetworkType(nt, p2mpNB); it != "" { + intf["interface-type"] = it + } + } + + if s, ok := iface["state"].(string); ok { + if mapped, ok := ospfIfaceStateMap[s]; ok { + intf["state"] = mapped + } else { + intf["state"] = "unknown" + } + } + + setIfPresentInt(intf, "priority", iface, "priority") + setIfPresentInt(intf, "cost", iface, "cost") + setIfPresentInt(intf, "dead-interval", iface, "timerDeadSecs") + setIfPresentInt(intf, "retransmit-interval", iface, "timerRetransmitSecs") + setIfPresentInt(intf, "transmit-delay", iface, "transmitDelaySecs") + + // Hello interval: milliseconds to seconds + if v := iface["timerMsecs"]; v != nil { + helloSec := toInt(v) / 1000 + if helloSec >= 1 { + intf["hello-interval"] = helloSec + } + } + + // Hello timer: remaining time in ms to seconds + if v := iface["timerHelloInMsecs"]; v != nil { + helloTimerSec := toInt(v) / 1000 + if helloTimerSec >= 1 { + intf["hello-timer"] = helloTimerSec + } + } + + // Wait timer + if v := iface["timerWaitSecs"]; v != nil { + waitSec := toInt(v) + if waitSec >= 1 { + intf["wait-timer"] = waitSec + } + } + + neighbors := make([]interface{}, 0) + neighsRaw, _ := iface["neighbors"].([]interface{}) + for _, neighRaw := range neighsRaw { + neigh, ok := neighRaw.(map[string]interface{}) + if !ok { + continue + } + + neighbor := map[string]interface{}{ + "neighbor-router-id": neigh["neighborIp"], + "address": neigh["ifaceAddress"], + } + + setIfPresentInt(neighbor, "priority", neigh, "nbrPriority") + + // Uptime: ms to seconds (infix augmentation) + if v := neigh["lastPrgrsvChangeMsec"]; v != nil { + neighbor["infix-routing:uptime"] = toInt(v) / 1000 + } + + // Dead timer: ms to seconds + if v := neigh["routerDeadIntervalTimerDueMsec"]; v != nil { + deadSec := toInt(v) / 1000 + if deadSec >= 1 { + neighbor["dead-timer"] = deadSec + } + } + + if s, ok := neigh["nbrState"].(string); ok { + neighbor["state"] = frrToIETFNeighborState(s) + } + + if role, ok := neigh["role"].(string); ok && role != "" { + neighbor["infix-routing:role"] = frrToIETFNeighborRole(role) + } + + // Interface name (infix augmentation) + ifName, _ := neigh["ifaceName"].(string) + localAddr, _ := neigh["localIfaceAddress"].(string) + if ifName != "" && localAddr != "" { + neighbor["infix-routing:interface-name"] = ifName + ":" + localAddr + } else if ifName != "" { + neighbor["infix-routing:interface-name"] = ifName + } + + setIfPresent(neighbor, "dr-router-id", neigh, "routerDesignatedId") + setIfPresent(neighbor, "bdr-router-id", neigh, "routerDesignatedBackupId") + + neighbors = append(neighbors, neighbor) + } + + intf["ietf-ospf:neighbors"] = map[string]interface{}{ + "ietf-ospf:neighbor": neighbors, + } + interfaces = append(interfaces, intf) + } + + area["ietf-ospf:interfaces"] = map[string]interface{}{ + "ietf-ospf:interface": interfaces, + } + areas = append(areas, area) + } + + // Add routes + c.addOSPFRoutes(ctx, ospf) + + ospf["ietf-ospf:areas"] = map[string]interface{}{ + "ietf-ospf:area": areas, + } + + return map[string]interface{}{ + "type": "infix-routing:ospfv2", + "name": "default", + "ietf-ospf:ospf": ospf, + } +} + +func (c *RoutingCollector) addOSPFRoutes(ctx context.Context, ospf map[string]interface{}) { + out, err := c.cmd.Run(ctx, "vtysh", "-c", "show ip ospf route json") + if err != nil { + return + } + + var data map[string]interface{} + if json.Unmarshal(out, &data) != nil { + return + } + + var routes []interface{} + for prefix, infoRaw := range data { + if !strings.Contains(prefix, "/") { + continue + } + + info, ok := infoRaw.(map[string]interface{}) + if !ok { + continue + } + + route := map[string]interface{}{ + "prefix": prefix, + } + + if rt, ok := info["routeType"].(string); ok { + parts := strings.Fields(rt) + if len(parts) > 1 { + switch parts[1] { + case "E1": + route["route-type"] = "external-1" + case "E2": + route["route-type"] = "external-2" + case "IA": + route["route-type"] = "inter-area" + } + } else if len(parts) > 0 && parts[0] == "N" { + route["route-type"] = "intra-area" + } + } + + if v := info["area"]; v != nil { + route["infix-routing:area-id"] = v + } + + if v := info["cost"]; v != nil { + route["metric"] = v + } else if v := info["metric"]; v != nil { + route["metric"] = v + } + + if v := info["tag"]; v != nil { + route["route-tag"] = v + } + + nexthops := make([]interface{}, 0) + hopsRaw, _ := info["nexthops"].([]interface{}) + for _, hopRaw := range hopsRaw { + hop, ok := hopRaw.(map[string]interface{}) + if !ok { + continue + } + nh := make(map[string]interface{}) + ip, _ := hop["ip"].(string) + if ip != "" && ip != " " { + nh["next-hop"] = ip + } else if da, ok := hop["directlyAttachedTo"].(string); ok { + nh["outgoing-interface"] = da + } + nexthops = append(nexthops, nh) + } + + route["next-hops"] = map[string]interface{}{ + "next-hop": nexthops, + } + routes = append(routes, route) + } + + if len(routes) > 0 { + ospf["ietf-ospf:local-rib"] = map[string]interface{}{ + "ietf-ospf:route": routes, + } + } +} + +// --- RIP --- + +var ripStatusUpdateRe = regexp.MustCompile(`Sending updates every (\d+) seconds`) +var ripStatusTimeoutRe = regexp.MustCompile(`Timeout after (\d+) seconds`) +var ripStatusFlushRe = regexp.MustCompile(`garbage collect after (\d+) seconds`) +var ripStatusMetricRe = regexp.MustCompile(`Default redistribution metric is (\d+)`) +var ripStatusDistanceRe = regexp.MustCompile(`Distance: \(default is (\d+)\)`) + +func (c *RoutingCollector) collectRIP(ctx context.Context) interface{} { + statusOut, err := c.cmd.Run(ctx, "vtysh", "-c", "show ip rip status") + if err != nil { + return nil + } + statusText := string(statusOut) + if statusText == "" { + return nil + } + + status := parseRIPStatus(statusText) + if len(status) == 0 { + return nil + } + + rip := make(map[string]interface{}) + + if v, ok := status["distance"]; ok { + rip["distance"] = v + } + if v, ok := status["default-metric"]; ok { + rip["default-metric"] = v + } + + timers := make(map[string]interface{}) + if v, ok := status["update-interval"]; ok { + timers["update-interval"] = v + } + if v, ok := status["invalid-interval"]; ok { + timers["invalid-interval"] = v + } + if v, ok := status["flush-interval"]; ok { + timers["flush-interval"] = v + } + if len(timers) > 0 { + rip["timers"] = timers + } + + if ifaces, ok := status["interfaces"].([]interface{}); ok && len(ifaces) > 0 { + var ifaceList []interface{} + for _, ifRaw := range ifaces { + ifData, ok := ifRaw.(map[string]interface{}) + if !ok { + continue + } + entry := map[string]interface{}{ + "interface": ifData["name"], + "oper-status": "up", + } + if sv, ok := ifData["send-version"].(int); ok { + entry["send-version"] = strconv.Itoa(sv) + } + if rv, ok := ifData["recv-version"].(int); ok { + entry["receive-version"] = strconv.Itoa(rv) + } + ifaceList = append(ifaceList, entry) + } + if len(ifaceList) > 0 { + rip["interfaces"] = map[string]interface{}{ + "interface": ifaceList, + } + } + } + + routeOut, err := c.cmd.Run(ctx, "vtysh", "-c", "show ip route rip json") + if err == nil { + var routeData map[string]interface{} + if json.Unmarshal(routeOut, &routeData) == nil { + var routes []interface{} + for prefix, entriesRaw := range routeData { + if !strings.Contains(prefix, "/") { + continue + } + entries, ok := entriesRaw.([]interface{}) + if !ok || len(entries) == 0 { + continue + } + entry, ok := entries[0].(map[string]interface{}) + if !ok { + continue + } + + route := map[string]interface{}{ + "ipv4-prefix": prefix, + "route-type": "rip", + } + if m, ok := entry["metric"]; ok { + route["metric"] = toInt(m) + } + + nexthops, _ := entry["nexthops"].([]interface{}) + if len(nexthops) > 0 { + firstHop, _ := nexthops[0].(map[string]interface{}) + if ip, ok := firstHop["ip"].(string); ok && ip != "" { + route["next-hop"] = ip + } + if ifName, ok := firstHop["interfaceName"].(string); ok && ifName != "" { + route["interface"] = ifName + } + } + routes = append(routes, route) + } + + if len(routes) > 0 { + if _, ok := rip["ipv4"]; !ok { + rip["ipv4"] = make(map[string]interface{}) + } + rip["ipv4"].(map[string]interface{})["routes"] = map[string]interface{}{ + "route": routes, + } + rip["num-of-routes"] = len(routes) + } + } + } + + if neighs, ok := status["neighbors"].([]interface{}); ok && len(neighs) > 0 { + var neighborList []interface{} + for _, nRaw := range neighs { + nd, ok := nRaw.(map[string]interface{}) + if !ok { + continue + } + entry := map[string]interface{}{ + "ipv4-address": nd["address"], + } + if v, ok := nd["bad-packets"].(int); ok { + entry["bad-packets-rcvd"] = v + } + if v, ok := nd["bad-routes"].(int); ok { + entry["bad-routes-rcvd"] = v + } + neighborList = append(neighborList, entry) + } + if len(neighborList) > 0 { + if _, ok := rip["ipv4"]; !ok { + rip["ipv4"] = make(map[string]interface{}) + } + rip["ipv4"].(map[string]interface{})["neighbors"] = map[string]interface{}{ + "neighbor": neighborList, + } + } + } + + return map[string]interface{}{ + "type": "infix-routing:ripv2", + "name": "default", + "ietf-rip:rip": rip, + } +} + +// parseRIPStatus parses the text output of 'show ip rip status'. +func parseRIPStatus(text string) map[string]interface{} { + status := make(map[string]interface{}) + + if m := ripStatusUpdateRe.FindStringSubmatch(text); m != nil { + v, _ := strconv.Atoi(m[1]) + status["update-interval"] = v + } + if m := ripStatusTimeoutRe.FindStringSubmatch(text); m != nil { + v, _ := strconv.Atoi(m[1]) + status["invalid-interval"] = v + } + if m := ripStatusFlushRe.FindStringSubmatch(text); m != nil { + v, _ := strconv.Atoi(m[1]) + status["flush-interval"] = v + } + if m := ripStatusMetricRe.FindStringSubmatch(text); m != nil { + v, _ := strconv.Atoi(m[1]) + status["default-metric"] = v + } + if m := ripStatusDistanceRe.FindStringSubmatch(text); m != nil { + v, _ := strconv.Atoi(m[1]) + status["distance"] = v + } + + // Parse interface table + lines := strings.Split(text, "\n") + var interfaces []interface{} + inIfaceSection := false + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.Contains(line, "Interface") && strings.Contains(line, "Send") && strings.Contains(line, "Recv") { + inIfaceSection = true + continue + } + if inIfaceSection && (strings.HasPrefix(line, "Routing for Networks:") || strings.HasPrefix(line, "Routing Information Sources:")) { + break + } + if inIfaceSection && line != "" { + parts := strings.Fields(line) + if len(parts) >= 3 && !strings.HasPrefix(line, "Interface") { + sendVer, err1 := strconv.Atoi(parts[1]) + recvVer, err2 := strconv.Atoi(parts[2]) + if err1 == nil && err2 == nil { + interfaces = append(interfaces, map[string]interface{}{ + "name": parts[0], + "send-version": sendVer, + "recv-version": recvVer, + }) + } + } + } + } + if len(interfaces) > 0 { + status["interfaces"] = interfaces + } + + // Parse Routing Information Sources table (neighbors) + var neighbors []interface{} + inNeighborSection := false + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "Routing Information Sources:") { + inNeighborSection = true + continue + } + if inNeighborSection && strings.Contains(line, "Gateway") && strings.Contains(line, "BadPackets") { + continue + } + if inNeighborSection && (strings.HasPrefix(line, "Distance:") || (line == "" && len(neighbors) > 0)) { + break + } + if inNeighborSection && line != "" { + parts := strings.Fields(line) + if len(parts) >= 5 { + badPkts, err1 := strconv.Atoi(parts[1]) + badRoutes, err2 := strconv.Atoi(parts[2]) + if err1 == nil && err2 == nil { + neighbors = append(neighbors, map[string]interface{}{ + "address": parts[0], + "bad-packets": badPkts, + "bad-routes": badRoutes, + }) + } + } + } + } + if len(neighbors) > 0 { + status["neighbors"] = neighbors + } + + return status +} + +// --- BFD --- + +var bfdStateMap = map[string]string{ + "up": "up", + "down": "down", + "init": "init", + "adminDown": "adminDown", +} + +func (c *RoutingCollector) collectBFD(ctx context.Context) interface{} { + out, err := c.cmd.Run(ctx, "vtysh", "-c", "show bfd peers json") + if err != nil { + return nil + } + + var data []interface{} + if json.Unmarshal(out, &data) != nil || len(data) == 0 { + return nil + } + + var sessions []interface{} + for _, peerRaw := range data { + peer, ok := peerRaw.(map[string]interface{}) + if !ok { + continue + } + // Only process single-hop sessions (multihop == false) + if mh, _ := peer["multihop"].(bool); mh { + continue + } + + session := map[string]interface{}{ + "interface": strDefault(peer["interface"], "unknown"), + "dest-addr": strDefault(peer["peer"], "0.0.0.0"), + } + + if v := peer["id"]; v != nil { + session["local-discriminator"] = v + } + if v := peer["remote-id"]; v != nil { + session["remote-discriminator"] = v + } + + state := strDefault(peer["status"], "down") + ietfState := bfdStateMap[state] + if ietfState == "" { + ietfState = "down" + } + + sessionRunning := map[string]interface{}{ + "local-state": ietfState, + "remote-state": ietfState, + "local-diagnostic": "none", + "detection-mode": "async-without-echo", + } + + if v := peer["receive-interval"]; v != nil { + sessionRunning["negotiated-rx-interval"] = toInt(v) * 1000 + } + if v := peer["transmit-interval"]; v != nil { + sessionRunning["negotiated-tx-interval"] = toInt(v) * 1000 + } + if dm := peer["detect-multiplier"]; dm != nil { + if ri := peer["receive-interval"]; ri != nil { + detectionTimeMs := toInt(dm) * toInt(ri) + sessionRunning["detection-time"] = detectionTimeMs * 1000 + } + } + + session["session-running"] = sessionRunning + session["path-type"] = "ietf-bfd-types:path-ip-sh" + session["ip-encapsulation"] = true + + sessions = append(sessions, session) + } + + if len(sessions) == 0 { + return nil + } + + return map[string]interface{}{ + "type": "infix-routing:bfdv1", + "name": "bfd", + "ietf-bfd:bfd": map[string]interface{}{ + "ietf-bfd-ip-sh:ip-sh": map[string]interface{}{ + "sessions": map[string]interface{}{ + "session": sessions, + }, + }, + }, + } +} + +// --- Helpers --- + +func setIfPresent(dst map[string]interface{}, dstKey string, src map[string]interface{}, srcKey string) { + if v, ok := src[srcKey]; ok && v != nil { + dst[dstKey] = v + } +} + +func setIfPresentInt(dst map[string]interface{}, dstKey string, src map[string]interface{}, srcKey string) { + if v, ok := src[srcKey]; ok && v != nil { + dst[dstKey] = toInt(v) + } +} + +func strDefault(v interface{}, def string) string { + if s, ok := v.(string); ok && s != "" { + return s + } + return def +} diff --git a/src/yangerd/internal/collector/routing_test.go b/src/yangerd/internal/collector/routing_test.go new file mode 100644 index 000000000..86409ddb7 --- /dev/null +++ b/src/yangerd/internal/collector/routing_test.go @@ -0,0 +1,721 @@ +package collector + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// Canned FRR JSON matching real /usr/libexec/statd/ospf-status output. +const testOSPFStatus = `{ + "routerId": "10.0.0.1", + "areas": { + "0.0.0.0": { + "area-type": "ietf-ospf:normal-area", + "interfaces": [ + { + "name": "e0", + "state": "DR", + "ospfEnabled": true, + "networkType": "BROADCAST", + "cost": 10, + "priority": 1, + "timerDeadSecs": 40, + "timerRetransmitSecs": 5, + "transmitDelaySecs": 1, + "timerMsecs": 10000, + "timerHelloInMsecs": 7000, + "timerWaitSecs": 40, + "drId": "10.0.0.1", + "drAddress": "192.168.1.1", + "bdrId": "10.0.0.2", + "bdrAddress": "192.168.1.2", + "neighbors": [ + { + "neighborIp": "10.0.0.2", + "ifaceAddress": "192.168.1.2", + "nbrPriority": 1, + "nbrState": "Full/DR", + "role": "Backup", + "lastPrgrsvChangeMsec": 120000, + "routerDeadIntervalTimerDueMsec": 35000, + "routerDesignatedId": "10.0.0.1", + "routerDesignatedBackupId": "10.0.0.2", + "ifaceName": "e0", + "localIfaceAddress": "192.168.1.1" + } + ] + }, + { + "name": "lo", + "state": "Loopback", + "ospfEnabled": true, + "networkType": "POINTOPOINT", + "cost": 0, + "priority": 0, + "timerPassiveIface": true, + "timerDeadSecs": 0, + "timerRetransmitSecs": 0, + "transmitDelaySecs": 0, + "timerMsecs": 10000, + "neighbors": [] + } + ] + } + } +}` + +const testOSPFRoutes = `{ + "10.0.0.0/24": { + "routeType": "N IA", + "area": "0.0.0.0", + "cost": 20, + "nexthops": [ + {"ip": "192.168.1.2", "via": "e0"} + ] + }, + "10.0.1.0/24": { + "routeType": "N E2", + "area": "0.0.0.0", + "cost": 100, + "tag": 42, + "nexthops": [ + {"ip": " ", "directlyAttachedTo": "e0"} + ] + } +}` + +const testRIPStatus = `Routing Protocol is "rip" + Sending updates every 30 seconds with +/-50%, next due in 12 seconds + Timeout after 180 seconds, garbage collect after 120 seconds + Outgoing update filter list for all interface is not set + Incoming update filter list for all interface is not set + Default redistribution metric is 1 + Redistributing: + Default version control: send version 2, receive version 2 + Interface Send Recv Key-chain + e0 2 2 + e1 2 2 + Routing for Networks: + 10.0.0.0/24 + 10.0.1.0/24 + Routing Information Sources: + Gateway BadPackets BadRoutes Distance Last Update + 10.0.0.2 0 0 120 00:00:12 + 10.0.0.3 1 2 120 00:00:25 + Distance: (default is 120) +` + +const testRIPRoutes = `{ + "10.0.0.0/24": [ + { + "prefix": "10.0.0.0/24", + "protocol": "rip", + "metric": 1, + "nexthops": [ + {"ip": "10.0.0.2", "interfaceName": "e0"} + ] + } + ], + "10.0.1.0/24": [ + { + "prefix": "10.0.1.0/24", + "protocol": "rip", + "metric": 2, + "nexthops": [ + {"ip": "10.0.0.3", "interfaceName": "e1"} + ] + } + ] +}` + +const testBFDPeers = `[ + { + "multihop": false, + "peer": "10.0.0.2", + "interface": "e0", + "id": 1, + "remote-id": 2, + "status": "up", + "receive-interval": 300, + "transmit-interval": 300, + "detect-multiplier": 3 + }, + { + "multihop": true, + "peer": "10.0.0.99", + "interface": "e1", + "id": 5, + "remote-id": 6, + "status": "down" + } +]` + +func newRoutingCollector(runner *testutil.MockRunner) *RoutingCollector { + return NewRoutingCollector(runner, 10*time.Second) +} + +func routingCollect(t *testing.T, runner *testutil.MockRunner) map[string]interface{} { + t.Helper() + c := newRoutingCollector(runner) + tr := tree.New() + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect failed: %v", err) + } + raw := tr.Get("ietf-routing:routing") + if raw == nil { + t.Fatal("missing ietf-routing:routing in tree") + } + var out map[string]interface{} + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal routing: %v", err) + } + return out +} + +func fullRunner() *testutil.MockRunner { + return &testutil.MockRunner{ + Results: map[string][]byte{ + "/usr/libexec/statd/ospf-status": []byte(testOSPFStatus), + "vtysh -c show ip ospf route json": []byte(testOSPFRoutes), + "vtysh -c show ip rip status": []byte(testRIPStatus), + "vtysh -c show ip route rip json": []byte(testRIPRoutes), + "vtysh -c show bfd peers json": []byte(testBFDPeers), + }, + Errors: map[string]error{}, + } +} + +func TestRoutingCollectorNameAndInterval(t *testing.T) { + c := newRoutingCollector(fullRunner()) + if c.Name() != "routing" { + t.Fatalf("expected name 'routing', got %q", c.Name()) + } + if c.Interval() != 10*time.Second { + t.Fatalf("expected interval 10s, got %v", c.Interval()) + } +} + +func TestRoutingCollectorMergesThreeProtocols(t *testing.T) { + out := routingCollect(t, fullRunner()) + cpp := out["control-plane-protocols"].(map[string]interface{}) + protocols := cpp["control-plane-protocol"].([]interface{}) + if len(protocols) != 3 { + t.Fatalf("expected 3 protocols (OSPF+RIP+BFD), got %d", len(protocols)) + } + + types := make(map[string]bool) + for _, p := range protocols { + pm := p.(map[string]interface{}) + types[pm["type"].(string)] = true + } + for _, expected := range []string{"infix-routing:ospfv2", "infix-routing:ripv2", "infix-routing:bfdv1"} { + if !types[expected] { + t.Fatalf("missing protocol type %q; got %v", expected, types) + } + } +} + +// --- OSPF tests --- + +func getOSPFProtocol(t *testing.T, out map[string]interface{}) map[string]interface{} { + t.Helper() + cpp := out["control-plane-protocols"].(map[string]interface{}) + for _, p := range cpp["control-plane-protocol"].([]interface{}) { + pm := p.(map[string]interface{}) + if pm["type"] == "infix-routing:ospfv2" { + return pm + } + } + t.Fatal("OSPF protocol not found") + return nil +} + +func TestOSPFRouterID(t *testing.T) { + out := routingCollect(t, fullRunner()) + ospfProto := getOSPFProtocol(t, out) + ospf := ospfProto["ietf-ospf:ospf"].(map[string]interface{}) + if ospf["ietf-ospf:router-id"] != "10.0.0.1" { + t.Fatalf("router-id: expected 10.0.0.1, got %v", ospf["ietf-ospf:router-id"]) + } + if ospf["ietf-ospf:address-family"] != "ipv4" { + t.Fatalf("address-family: expected ipv4, got %v", ospf["ietf-ospf:address-family"]) + } +} + +func TestOSPFAreaAndInterfaces(t *testing.T) { + out := routingCollect(t, fullRunner()) + ospfProto := getOSPFProtocol(t, out) + ospf := ospfProto["ietf-ospf:ospf"].(map[string]interface{}) + areasContainer := ospf["ietf-ospf:areas"].(map[string]interface{}) + areas := areasContainer["ietf-ospf:area"].([]interface{}) + if len(areas) != 1 { + t.Fatalf("expected 1 area, got %d", len(areas)) + } + + area := areas[0].(map[string]interface{}) + if area["ietf-ospf:area-id"] != "0.0.0.0" { + t.Fatalf("area-id: expected 0.0.0.0, got %v", area["ietf-ospf:area-id"]) + } + + ifacesContainer := area["ietf-ospf:interfaces"].(map[string]interface{}) + ifaces := ifacesContainer["ietf-ospf:interface"].([]interface{}) + if len(ifaces) != 2 { + t.Fatalf("expected 2 interfaces, got %d", len(ifaces)) + } + + // First interface: e0 (DR) + e0 := ifaces[0].(map[string]interface{}) + if e0["name"] != "e0" { + t.Fatalf("interface[0] name: expected e0, got %v", e0["name"]) + } + if e0["state"] != "dr" { + t.Fatalf("interface[0] state: expected dr, got %v", e0["state"]) + } + if e0["interface-type"] != "broadcast" { + t.Fatalf("interface[0] type: expected broadcast, got %v", e0["interface-type"]) + } + if e0["passive"] != false { + t.Fatalf("e0 passive: expected false, got %v", e0["passive"]) + } + if e0["enabled"] != true { + t.Fatalf("e0 enabled: expected true, got %v", e0["enabled"]) + } + if e0["dr-router-id"] != "10.0.0.1" { + t.Fatalf("e0 dr-router-id: expected 10.0.0.1, got %v", e0["dr-router-id"]) + } + + // Check timer conversions (ms → seconds) + if toInt(e0["hello-interval"]) != 10 { + t.Fatalf("e0 hello-interval: expected 10, got %v", e0["hello-interval"]) + } + if toInt(e0["hello-timer"]) != 7 { + t.Fatalf("e0 hello-timer: expected 7, got %v", e0["hello-timer"]) + } + if toInt(e0["cost"]) != 10 { + t.Fatalf("e0 cost: expected 10, got %v", e0["cost"]) + } + + // Second interface: lo (passive loopback) + lo := ifaces[1].(map[string]interface{}) + if lo["name"] != "lo" { + t.Fatalf("interface[1] name: expected lo, got %v", lo["name"]) + } + if lo["state"] != "loopback" { + t.Fatalf("lo state: expected loopback, got %v", lo["state"]) + } + if lo["passive"] != true { + t.Fatalf("lo passive: expected true, got %v", lo["passive"]) + } + if lo["interface-type"] != "point-to-point" { + t.Fatalf("lo type: expected point-to-point, got %v", lo["interface-type"]) + } +} + +func TestOSPFNeighbors(t *testing.T) { + out := routingCollect(t, fullRunner()) + ospfProto := getOSPFProtocol(t, out) + ospf := ospfProto["ietf-ospf:ospf"].(map[string]interface{}) + areasContainer := ospf["ietf-ospf:areas"].(map[string]interface{}) + area := areasContainer["ietf-ospf:area"].([]interface{})[0].(map[string]interface{}) + ifacesContainer := area["ietf-ospf:interfaces"].(map[string]interface{}) + e0 := ifacesContainer["ietf-ospf:interface"].([]interface{})[0].(map[string]interface{}) + + neighborsContainer := e0["ietf-ospf:neighbors"].(map[string]interface{}) + neighbors := neighborsContainer["ietf-ospf:neighbor"].([]interface{}) + if len(neighbors) != 1 { + t.Fatalf("expected 1 neighbor, got %d", len(neighbors)) + } + + n := neighbors[0].(map[string]interface{}) + if n["neighbor-router-id"] != "10.0.0.2" { + t.Fatalf("neighbor router-id: expected 10.0.0.2, got %v", n["neighbor-router-id"]) + } + if n["address"] != "192.168.1.2" { + t.Fatalf("neighbor address: expected 192.168.1.2, got %v", n["address"]) + } + if n["state"] != "full" { + t.Fatalf("neighbor state: expected full, got %v", n["state"]) + } + if n["infix-routing:role"] != "BDR" { + t.Fatalf("neighbor role: expected BDR, got %v", n["infix-routing:role"]) + } + // Uptime: 120000ms → 120s + if toInt(n["infix-routing:uptime"]) != 120 { + t.Fatalf("neighbor uptime: expected 120, got %v", n["infix-routing:uptime"]) + } + // Dead timer: 35000ms → 35s + if toInt(n["dead-timer"]) != 35 { + t.Fatalf("neighbor dead-timer: expected 35, got %v", n["dead-timer"]) + } + // Interface name augmentation + if n["infix-routing:interface-name"] != "e0:192.168.1.1" { + t.Fatalf("neighbor interface-name: expected e0:192.168.1.1, got %v", n["infix-routing:interface-name"]) + } +} + +func TestOSPFRoutes(t *testing.T) { + out := routingCollect(t, fullRunner()) + ospfProto := getOSPFProtocol(t, out) + ospf := ospfProto["ietf-ospf:ospf"].(map[string]interface{}) + rib := ospf["ietf-ospf:local-rib"].(map[string]interface{}) + routes := rib["ietf-ospf:route"].([]interface{}) + if len(routes) != 2 { + t.Fatalf("expected 2 OSPF routes, got %d", len(routes)) + } + + routeByPrefix := make(map[string]map[string]interface{}) + for _, r := range routes { + rm := r.(map[string]interface{}) + routeByPrefix[rm["prefix"].(string)] = rm + } + + // Inter-area route + r1 := routeByPrefix["10.0.0.0/24"] + if r1 == nil { + t.Fatal("missing route 10.0.0.0/24") + } + if r1["route-type"] != "inter-area" { + t.Fatalf("route 10.0.0.0/24 type: expected inter-area, got %v", r1["route-type"]) + } + + // External-2 route with tag + r2 := routeByPrefix["10.0.1.0/24"] + if r2 == nil { + t.Fatal("missing route 10.0.1.0/24") + } + if r2["route-type"] != "external-2" { + t.Fatalf("route 10.0.1.0/24 type: expected external-2, got %v", r2["route-type"]) + } + // tag should be present + if r2["route-tag"] == nil { + t.Fatal("route 10.0.1.0/24 should have route-tag") + } + // Directly attached nexthop + nhs := r2["next-hops"].(map[string]interface{}) + nhList := nhs["next-hop"].([]interface{}) + if len(nhList) != 1 { + t.Fatalf("expected 1 nexthop, got %d", len(nhList)) + } + nh := nhList[0].(map[string]interface{}) + if nh["outgoing-interface"] != "e0" { + t.Fatalf("nexthop outgoing-interface: expected e0, got %v", nh["outgoing-interface"]) + } +} + +// --- RIP tests --- + +func getRIPProtocol(t *testing.T, out map[string]interface{}) map[string]interface{} { + t.Helper() + cpp := out["control-plane-protocols"].(map[string]interface{}) + for _, p := range cpp["control-plane-protocol"].([]interface{}) { + pm := p.(map[string]interface{}) + if pm["type"] == "infix-routing:ripv2" { + return pm + } + } + t.Fatal("RIP protocol not found") + return nil +} + +func TestRIPTimers(t *testing.T) { + out := routingCollect(t, fullRunner()) + ripProto := getRIPProtocol(t, out) + rip := ripProto["ietf-rip:rip"].(map[string]interface{}) + + timers := rip["timers"].(map[string]interface{}) + if toInt(timers["update-interval"]) != 30 { + t.Fatalf("RIP update-interval: expected 30, got %v", timers["update-interval"]) + } + if toInt(timers["invalid-interval"]) != 180 { + t.Fatalf("RIP invalid-interval: expected 180, got %v", timers["invalid-interval"]) + } + if toInt(timers["flush-interval"]) != 120 { + t.Fatalf("RIP flush-interval: expected 120, got %v", timers["flush-interval"]) + } + + if toInt(rip["default-metric"]) != 1 { + t.Fatalf("RIP default-metric: expected 1, got %v", rip["default-metric"]) + } + if toInt(rip["distance"]) != 120 { + t.Fatalf("RIP distance: expected 120, got %v", rip["distance"]) + } +} + +func TestRIPInterfaces(t *testing.T) { + out := routingCollect(t, fullRunner()) + ripProto := getRIPProtocol(t, out) + rip := ripProto["ietf-rip:rip"].(map[string]interface{}) + + ifContainer := rip["interfaces"].(map[string]interface{}) + ifaces := ifContainer["interface"].([]interface{}) + if len(ifaces) != 2 { + t.Fatalf("expected 2 RIP interfaces, got %d", len(ifaces)) + } + + iface0 := ifaces[0].(map[string]interface{}) + if iface0["interface"] != "e0" { + t.Fatalf("RIP iface[0]: expected e0, got %v", iface0["interface"]) + } + if iface0["oper-status"] != "up" { + t.Fatalf("RIP iface[0] status: expected up, got %v", iface0["oper-status"]) + } + if iface0["send-version"] != "2" { + t.Fatalf("RIP iface[0] send-version: expected '2', got %v", iface0["send-version"]) + } +} + +func TestRIPRoutes(t *testing.T) { + out := routingCollect(t, fullRunner()) + ripProto := getRIPProtocol(t, out) + rip := ripProto["ietf-rip:rip"].(map[string]interface{}) + + ipv4 := rip["ipv4"].(map[string]interface{}) + routesContainer := ipv4["routes"].(map[string]interface{}) + routes := routesContainer["route"].([]interface{}) + if len(routes) != 2 { + t.Fatalf("expected 2 RIP routes, got %d", len(routes)) + } + + if toInt(rip["num-of-routes"]) != 2 { + t.Fatalf("RIP num-of-routes: expected 2, got %v", rip["num-of-routes"]) + } +} + +func TestRIPNeighbors(t *testing.T) { + out := routingCollect(t, fullRunner()) + ripProto := getRIPProtocol(t, out) + rip := ripProto["ietf-rip:rip"].(map[string]interface{}) + + ipv4 := rip["ipv4"].(map[string]interface{}) + neighContainer := ipv4["neighbors"].(map[string]interface{}) + neighs := neighContainer["neighbor"].([]interface{}) + if len(neighs) != 2 { + t.Fatalf("expected 2 RIP neighbors, got %d", len(neighs)) + } + + n0 := neighs[0].(map[string]interface{}) + if n0["ipv4-address"] != "10.0.0.2" { + t.Fatalf("RIP neighbor[0] address: expected 10.0.0.2, got %v", n0["ipv4-address"]) + } + // Bad packets/routes should be int (from text parse) + if toInt(n0["bad-packets-rcvd"]) != 0 { + t.Fatalf("RIP neighbor[0] bad-packets: expected 0, got %v", n0["bad-packets-rcvd"]) + } + + n1 := neighs[1].(map[string]interface{}) + if toInt(n1["bad-packets-rcvd"]) != 1 { + t.Fatalf("RIP neighbor[1] bad-packets: expected 1, got %v", n1["bad-packets-rcvd"]) + } + if toInt(n1["bad-routes-rcvd"]) != 2 { + t.Fatalf("RIP neighbor[1] bad-routes: expected 2, got %v", n1["bad-routes-rcvd"]) + } +} + +// --- BFD tests --- + +func getBFDProtocol(t *testing.T, out map[string]interface{}) map[string]interface{} { + t.Helper() + cpp := out["control-plane-protocols"].(map[string]interface{}) + for _, p := range cpp["control-plane-protocol"].([]interface{}) { + pm := p.(map[string]interface{}) + if pm["type"] == "infix-routing:bfdv1" { + return pm + } + } + t.Fatal("BFD protocol not found") + return nil +} + +func TestBFDSessions(t *testing.T) { + out := routingCollect(t, fullRunner()) + bfdProto := getBFDProtocol(t, out) + bfd := bfdProto["ietf-bfd:bfd"].(map[string]interface{}) + ipsh := bfd["ietf-bfd-ip-sh:ip-sh"].(map[string]interface{}) + sessionsContainer := ipsh["sessions"].(map[string]interface{}) + sessions := sessionsContainer["session"].([]interface{}) + + // Only single-hop sessions included (multihop=true is filtered) + if len(sessions) != 1 { + t.Fatalf("expected 1 BFD session (multihop filtered), got %d", len(sessions)) + } + + s := sessions[0].(map[string]interface{}) + if s["interface"] != "e0" { + t.Fatalf("BFD session interface: expected e0, got %v", s["interface"]) + } + if s["dest-addr"] != "10.0.0.2" { + t.Fatalf("BFD session dest-addr: expected 10.0.0.2, got %v", s["dest-addr"]) + } + if s["path-type"] != "ietf-bfd-types:path-ip-sh" { + t.Fatalf("BFD path-type: expected ietf-bfd-types:path-ip-sh, got %v", s["path-type"]) + } + + running := s["session-running"].(map[string]interface{}) + if running["local-state"] != "up" { + t.Fatalf("BFD local-state: expected up, got %v", running["local-state"]) + } + if running["detection-mode"] != "async-without-echo" { + t.Fatalf("BFD detection-mode: expected async-without-echo, got %v", running["detection-mode"]) + } + + // Intervals: ms → µs (×1000) + // receive-interval=300ms → 300000µs + if toInt(running["negotiated-rx-interval"]) != 300000 { + t.Fatalf("BFD rx-interval: expected 300000, got %v", running["negotiated-rx-interval"]) + } + if toInt(running["negotiated-tx-interval"]) != 300000 { + t.Fatalf("BFD tx-interval: expected 300000, got %v", running["negotiated-tx-interval"]) + } + // detection-time = detect-multiplier * receive-interval * 1000 = 3 * 300 * 1000 = 900000 + if toInt(running["detection-time"]) != 900000 { + t.Fatalf("BFD detection-time: expected 900000, got %v", running["detection-time"]) + } +} + +// --- Graceful degradation tests --- + +func TestRoutingCollectorOSPFOnly(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "/usr/libexec/statd/ospf-status": []byte(testOSPFStatus), + "vtysh -c show ip ospf route json": []byte(testOSPFRoutes), + }, + Errors: map[string]error{}, + } + + out := routingCollect(t, runner) + cpp := out["control-plane-protocols"].(map[string]interface{}) + protocols := cpp["control-plane-protocol"].([]interface{}) + if len(protocols) != 1 { + t.Fatalf("expected 1 protocol when only OSPF available, got %d", len(protocols)) + } + pm := protocols[0].(map[string]interface{}) + if pm["type"] != "infix-routing:ospfv2" { + t.Fatalf("expected OSPF protocol, got %v", pm["type"]) + } +} + +func TestRoutingCollectorAllFail(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{}, + } + + c := newRoutingCollector(runner) + tr := tree.New() + err := c.Collect(context.Background(), tr) + if err != nil { + t.Fatalf("Collect should not error when all protocols fail: %v", err) + } + // No tree key should be set when there's nothing to report + if tr.Get("ietf-routing:routing") != nil { + t.Fatal("expected no ietf-routing:routing key when all protocols fail") + } +} + +func TestRIPStatusParsing(t *testing.T) { + status := parseRIPStatus(testRIPStatus) + + if status["update-interval"] != 30 { + t.Fatalf("update-interval: expected 30, got %v", status["update-interval"]) + } + if status["invalid-interval"] != 180 { + t.Fatalf("invalid-interval: expected 180, got %v", status["invalid-interval"]) + } + if status["flush-interval"] != 120 { + t.Fatalf("flush-interval: expected 120, got %v", status["flush-interval"]) + } + if status["default-metric"] != 1 { + t.Fatalf("default-metric: expected 1, got %v", status["default-metric"]) + } + if status["distance"] != 120 { + t.Fatalf("distance: expected 120, got %v", status["distance"]) + } + + ifaces := status["interfaces"].([]interface{}) + if len(ifaces) != 2 { + t.Fatalf("expected 2 parsed interfaces, got %d", len(ifaces)) + } + + neighs := status["neighbors"].([]interface{}) + if len(neighs) != 2 { + t.Fatalf("expected 2 parsed neighbors, got %d", len(neighs)) + } +} + +func TestFrrToIETFNeighborState(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"Full/DR", "full"}, + {"TwoWay/DROther", "2-way"}, + {"Init/DROther", "init"}, + {"Down/DROther", "down"}, + {"ExStart", "exstart"}, + } + for _, tt := range tests { + got := frrToIETFNeighborState(tt.input) + if got != tt.expected { + t.Fatalf("frrToIETFNeighborState(%q): expected %q, got %q", tt.input, tt.expected, got) + } + } +} + +func TestOSPFNetworkType(t *testing.T) { + tests := []struct { + nt string + p2mpNB bool + expected string + }{ + {"POINTOPOINT", false, "point-to-point"}, + {"BROADCAST", false, "broadcast"}, + {"POINTOMULTIPOINT", false, "hybrid"}, + {"POINTOMULTIPOINT", true, "point-to-multipoint"}, + {"NBMA", false, "non-broadcast"}, + {"UNKNOWN", false, ""}, + } + for _, tt := range tests { + got := ospfNetworkType(tt.nt, tt.p2mpNB) + if got != tt.expected { + t.Fatalf("ospfNetworkType(%q, %v): expected %q, got %q", tt.nt, tt.p2mpNB, tt.expected, got) + } + } +} + +func TestBFDMultihopFiltered(t *testing.T) { + // Ensure multihop peers don't appear in output + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "vtysh -c show bfd peers json": []byte(`[ + {"multihop": true, "peer": "10.0.0.99", "interface": "e1", "id": 5, "status": "up"} + ]`), + }, + Errors: map[string]error{}, + } + + c := newRoutingCollector(runner) + tr := tree.New() + c.Collect(context.Background(), tr) + + // BFD should not set anything when all peers are multihop + raw := tr.Get("ietf-routing:routing") + if raw != nil { + // If routing is set, BFD should not be present + var out map[string]interface{} + json.Unmarshal(raw, &out) + cpp := out["control-plane-protocols"].(map[string]interface{}) + protocols := cpp["control-plane-protocol"].([]interface{}) + for _, p := range protocols { + pm := p.(map[string]interface{}) + if pm["type"] == "infix-routing:bfdv1" { + t.Fatal("multihop-only BFD should not produce a protocol entry") + } + } + } +} diff --git a/src/yangerd/internal/collector/runner.go b/src/yangerd/internal/collector/runner.go new file mode 100644 index 000000000..6776b4e7c --- /dev/null +++ b/src/yangerd/internal/collector/runner.go @@ -0,0 +1,83 @@ +package collector + +import ( + "context" + "os" + "os/exec" + "path/filepath" + + "github.com/godbus/dbus/v5" +) + +// CommandRunner executes external commands and returns their stdout. +type CommandRunner interface { + Run(ctx context.Context, name string, args ...string) ([]byte, error) +} + +// FileReader reads files and globs paths on the filesystem. +type FileReader interface { + ReadFile(path string) ([]byte, error) + Glob(pattern string) ([]string, error) +} + +// InstallerStatus queries RAUC installation progress. +type InstallerStatus interface { + GetInstallStatus() (operation string, lastError string, percentage int, message string, err error) +} + +// ExecRunner is the production CommandRunner using os/exec. +type ExecRunner struct{} + +func (ExecRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) { + return exec.CommandContext(ctx, name, args...).Output() +} + +// OSFileReader is the production FileReader using the os package. +type OSFileReader struct{} + +func (OSFileReader) ReadFile(path string) ([]byte, error) { + return os.ReadFile(path) +} + +func (OSFileReader) Glob(pattern string) ([]string, error) { + return filepath.Glob(pattern) +} + +// DBusInstaller reads RAUC installation status from D-Bus properties. +type DBusInstaller struct{} + +func (DBusInstaller) GetInstallStatus() (string, string, int, string, error) { + conn, err := dbus.ConnectSystemBus() + if err != nil { + return "", "", 0, "", err + } + defer conn.Close() + + obj := conn.Object("de.pengutronix.rauc", "/") + + operation, _ := obj.GetProperty("de.pengutronix.rauc.Installer.Operation") + lastError, _ := obj.GetProperty("de.pengutronix.rauc.Installer.LastError") + + var pct int + var msg string + progress, err := obj.GetProperty("de.pengutronix.rauc.Installer.Progress") + if err == nil { + if vals, ok := progress.Value().([]interface{}); ok && len(vals) >= 2 { + if p, ok := vals[0].(int32); ok { + pct = int(p) + } + if s, ok := vals[1].(string); ok { + msg = s + } + } + } + + return variantString(operation), variantString(lastError), pct, msg, nil +} + +func variantString(v dbus.Variant) string { + if s, ok := v.Value().(string); ok { + return s + } + return "" +} diff --git a/src/yangerd/internal/collector/system.go b/src/yangerd/internal/collector/system.go new file mode 100644 index 000000000..fdae4c413 --- /dev/null +++ b/src/yangerd/internal/collector/system.go @@ -0,0 +1,120 @@ +package collector + +import ( + "context" + "encoding/json" + "strconv" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +var platformKeyMap = map[string]string{ + "NAME": "os-name", + "VERSION_ID": "os-version", + "BUILD_ID": "os-release", + "ARCHITECTURE": "machine", +} + +// SystemCollector gathers ietf-system operational data. +type SystemCollector struct { + cmd CommandRunner + fs FileReader + interval time.Duration +} + +// NewSystemCollector creates a SystemCollector with the given dependencies. +func NewSystemCollector(cmd CommandRunner, fs FileReader, interval time.Duration) *SystemCollector { + return &SystemCollector{cmd: cmd, fs: fs, interval: interval} +} + +// Name implements Collector. +func (c *SystemCollector) Name() string { return "system" } + +// Interval implements Collector. +func (c *SystemCollector) Interval() time.Duration { return c.interval } + +// Collect implements Collector. It merges service data into +// "ietf-system:system-state". DNS is handled reactively by +// fswatcher on /var/lib/misc/resolv.conf. Other system-state +// subtrees (platform, software, users, hostname, timezone, clock, +// memory, load, filesystems) are populated by boot-once, reactive, +// or on-demand providers. +func (c *SystemCollector) Collect(ctx context.Context, t *tree.Tree) error { + state := make(map[string]interface{}) + + c.addServices(ctx, state) + + if data, err := json.Marshal(state); err == nil { + t.Merge("ietf-system:system-state", data) + } + return nil +} + +func (c *SystemCollector) addServices(ctx context.Context, state map[string]interface{}) { + out, err := c.cmd.Run(ctx, "initctl", "-j") + if err != nil { + return + } + + var initData []map[string]interface{} + if json.Unmarshal(out, &initData) != nil { + return + } + + var services []interface{} + for _, d := range initData { + pid, ok := d["pid"] + if !ok { + continue + } + identity, ok := d["identity"] + if !ok { + continue + } + svc := map[string]interface{}{ + "pid": toInt(pid), + "name": identity, + "status": d["status"], + "description": d["description"], + "statistics": map[string]interface{}{ + "memory-usage": strconv.Itoa(toInt(zeroIfNil(d["memory"]))), + "uptime": strconv.Itoa(toInt(zeroIfNil(d["uptime"]))), + "restart-count": toInt(zeroIfNil(d["restarts"])), + }, + } + services = append(services, svc) + } + + state["infix-system:services"] = map[string]interface{}{ + "service": services, + } +} + +func yangDateTime(t time.Time) string { + return t.Format("2006-01-02T15:04:05-07:00") +} + +func toInt(v interface{}) int { + switch n := v.(type) { + case float64: + return int(n) + case int: + return n + case json.Number: + i, _ := n.Int64() + return int(i) + case string: + i, _ := strconv.Atoi(n) + return i + default: + return 0 + } +} + +func zeroIfNil(v interface{}) interface{} { + if v == nil { + return 0 + } + return v +} diff --git a/src/yangerd/internal/collector/system_test.go b/src/yangerd/internal/collector/system_test.go new file mode 100644 index 000000000..a1fe43e39 --- /dev/null +++ b/src/yangerd/internal/collector/system_test.go @@ -0,0 +1,194 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/testutil" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const ( + testInitctlJSON = `[ + { + "identity": "sshd", + "pid": 123, + "status": "running", + "description": "OpenSSH daemon", + "memory": 4096000, + "uptime": 3600, + "restarts": 2 + }, + { + "identity": "sysklogd", + "pid": 456, + "status": "running", + "description": "System logger", + "memory": 2048000, + "uptime": 7200, + "restarts": 0 + } +]` +) + +func newTestCollector() (*SystemCollector, *testutil.MockRunner, *testutil.MockFileReader) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "initctl -j": []byte(testInitctlJSON), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{ + Files: map[string][]byte{}, + Globs: map[string][]string{}, + } + + c := NewSystemCollector(runner, fs, 60*time.Second) + return c, runner, fs +} + +func collectToState(t *testing.T, c *SystemCollector) map[string]interface{} { + t.Helper() + tr := tree.New() + if err := c.Collect(context.Background(), tr); err != nil { + t.Fatalf("Collect failed: %v", err) + } + + stateRaw := tr.Get("ietf-system:system-state") + if stateRaw == nil { + t.Fatal("missing ietf-system:system-state in tree") + } + + state := make(map[string]interface{}) + if err := json.Unmarshal(stateRaw, &state); err != nil { + t.Fatalf("unmarshal system-state: %v", err) + } + return state +} + +func TestSystemCollectorName(t *testing.T) { + c, _, _ := newTestCollector() + if c.Name() != "system" { + t.Fatalf("expected name 'system', got %q", c.Name()) + } +} + +func TestSystemCollectorInterval(t *testing.T) { + c, _, _ := newTestCollector() + if c.Interval() != 60*time.Second { + t.Fatalf("expected interval 60s, got %v", c.Interval()) + } +} + +func TestSystemCollectorServices(t *testing.T) { + c, _, _ := newTestCollector() + state := collectToState(t, c) + + svcs, ok := state["infix-system:services"].(map[string]interface{}) + if !ok { + t.Fatal("missing infix-system:services in system-state") + } + + serviceList, ok := svcs["service"].([]interface{}) + if !ok || len(serviceList) != 2 { + t.Fatalf("expected 2 services, got %v", svcs["service"]) + } + + svc0 := serviceList[0].(map[string]interface{}) + if svc0["name"] != "sshd" { + t.Fatalf("service[0] name: expected sshd, got %v", svc0["name"]) + } + if int(svc0["pid"].(float64)) != 123 { + t.Fatalf("service[0] pid: expected 123, got %v", svc0["pid"]) + } + + stats := svc0["statistics"].(map[string]interface{}) + if stats["memory-usage"] != "4096000" { + t.Fatalf("service[0] memory-usage: expected '4096000', got %v", stats["memory-usage"]) + } + if stats["uptime"] != "3600" { + t.Fatalf("service[0] uptime: expected '3600', got %v", stats["uptime"]) + } + if int(stats["restart-count"].(float64)) != 2 { + t.Fatalf("service[0] restart-count: expected 2, got %v", stats["restart-count"]) + } +} + +func TestSystemCollectorCommandFailureGraceful(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{}, + Errors: map[string]error{ + "initctl -j": fmt.Errorf("not available"), + }, + } + + fs := &testutil.MockFileReader{ + Files: map[string][]byte{}, + Globs: map[string][]string{}, + } + + c := NewSystemCollector(runner, fs, 60*time.Second) + tr := tree.New() + err := c.Collect(context.Background(), tr) + if err != nil { + t.Fatalf("Collect should not return error on partial failures: %v", err) + } + + if tr.Get("ietf-system:system-state") == nil { + t.Fatal("ietf-system:system-state should be set even with command failures") + } +} + +func TestSystemCollectorTreeKeys(t *testing.T) { + c, _, _ := newTestCollector() + tr := tree.New() + c.Collect(context.Background(), tr) + + keys := tr.Keys() + if len(keys) != 1 { + t.Fatalf("expected exactly 1 tree key, got %d: %v", len(keys), keys) + } + if keys[0] != "ietf-system:system-state" { + t.Fatalf("expected tree key 'ietf-system:system-state', got %q", keys[0]) + } +} + +func TestSystemCollectorServicesNilFields(t *testing.T) { + runner := &testutil.MockRunner{ + Results: map[string][]byte{ + "initctl -j": []byte(`[{"identity":"minimal","pid":999,"status":"running","description":"Minimal service"}]`), + }, + Errors: map[string]error{}, + } + + fs := &testutil.MockFileReader{ + Files: map[string][]byte{}, + Globs: map[string][]string{}, + } + + c := NewSystemCollector(runner, fs, 60*time.Second) + state := collectToState(t, c) + + svcs := state["infix-system:services"].(map[string]interface{}) + serviceList := svcs["service"].([]interface{}) + if len(serviceList) != 1 { + t.Fatalf("expected 1 service, got %d", len(serviceList)) + } + + svc := serviceList[0].(map[string]interface{}) + stats := svc["statistics"].(map[string]interface{}) + + if stats["memory-usage"] != "0" { + t.Fatalf("nil memory should become '0', got %v", stats["memory-usage"]) + } + if stats["uptime"] != "0" { + t.Fatalf("nil uptime should become '0', got %v", stats["uptime"]) + } + if int(stats["restart-count"].(float64)) != 0 { + t.Fatalf("nil restarts should become 0, got %v", stats["restart-count"]) + } +} diff --git a/src/yangerd/internal/config/config.go b/src/yangerd/internal/config/config.go new file mode 100644 index 000000000..3d0ea7c0b --- /dev/null +++ b/src/yangerd/internal/config/config.go @@ -0,0 +1,75 @@ +package config + +import ( + "os" + "strconv" + "time" +) + +// Config holds all yangerd runtime configuration, populated from +// environment variables with sensible defaults. +type Config struct { + Socket string + LogLevel string + PollSystem time.Duration + PollRouting time.Duration + PollNTP time.Duration + PollHardware time.Duration + PollSTP time.Duration + EnableWifi bool + EnableLLDP bool + EnableFirewall bool + EnableDHCP bool + EnableContainers bool + EnableGPS bool +} + +// Load reads configuration from the environment. +func Load() *Config { + return &Config{ + Socket: envStr("YANGERD_SOCKET", "/run/yangerd.sock"), + LogLevel: envStr("YANGERD_LOG_LEVEL", "info"), + PollSystem: envDur("YANGERD_POLL_INTERVAL_SYSTEM", 60*time.Second), + PollRouting: envDur("YANGERD_POLL_INTERVAL_ROUTING", 10*time.Second), + PollNTP: envDur("YANGERD_POLL_INTERVAL_NTP", 60*time.Second), + PollHardware: envDur("YANGERD_POLL_INTERVAL_HARDWARE", 10*time.Second), + PollSTP: envDur("YANGERD_POLL_INTERVAL_STP", 5*time.Second), + EnableWifi: envBool("YANGERD_ENABLE_WIFI", false), + EnableLLDP: envBool("YANGERD_ENABLE_LLDP", true), + EnableFirewall: envBool("YANGERD_ENABLE_FIREWALL", true), + EnableDHCP: envBool("YANGERD_ENABLE_DHCP", true), + EnableContainers: envBool("YANGERD_ENABLE_CONTAINERS", false), + EnableGPS: envBool("YANGERD_ENABLE_GPS", false), + } +} + +func envStr(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} + +func envBool(key string, def bool) bool { + v := os.Getenv(key) + if v == "" { + return def + } + b, err := strconv.ParseBool(v) + if err != nil { + return def + } + return b +} + +func envDur(key string, def time.Duration) time.Duration { + v := os.Getenv(key) + if v == "" { + return def + } + d, err := time.ParseDuration(v) + if err != nil { + return def + } + return d +} diff --git a/src/yangerd/internal/containermonitor/containermonitor.go b/src/yangerd/internal/containermonitor/containermonitor.go new file mode 100644 index 000000000..70e1556b3 --- /dev/null +++ b/src/yangerd/internal/containermonitor/containermonitor.go @@ -0,0 +1,181 @@ +// Package containermonitor keeps the infix-containers subtree in the tree +// in sync with podman. A persistent `podman events` subprocess is used +// purely as a change trigger; on every event the full container table is +// re-read with `podman ps` (via collector.CollectContainers) and the +// subtree replaced, so removed containers disappear and containers present +// before yangerd started are picked up. +// +// This replaces an earlier inotify watch on /run/libpod/events, which was +// reactive-only and silently went stale whenever an event was missed +// (debounce coalescing, inotify overflow, a removal racing the re-read, or +// yangerd starting after the container). `podman events` reads whichever +// events backend podman is configured for (file or journald), so it does +// not depend on a specific on-disk layout. +package containermonitor + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "os/exec" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/backoff" + "github.com/kernelkit/infix/src/yangerd/internal/collector" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const ( + treeKey = "infix-containers:containers" + + // debounceDelay coalesces bursts of events into one re-read. + debounceDelay = 200 * time.Millisecond +) + +// ContainerMonitor subscribes to container lifecycle events via a +// persistent `podman events` subprocess and re-reads the full container +// table on every event. +type ContainerMonitor struct { + tree *tree.Tree + log *slog.Logger + refresh chan struct{} + + // collect returns the current container subtree, or nil when there are + // no containers; overridable in tests. + collect func() json.RawMessage +} + +// New creates a ContainerMonitor. +func New(t *tree.Tree, cmd collector.CommandRunner, fs collector.FileReader, log *slog.Logger) *ContainerMonitor { + if log == nil { + log = slog.Default() + } + return &ContainerMonitor{ + tree: t, + log: log, + refresh: make(chan struct{}, 1), + collect: func() json.RawMessage { return collector.CollectContainers(cmd, fs) }, + } +} + +// Run starts the container monitor. It blocks until ctx is cancelled, +// restarting the events subprocess with backoff if it exits. +func (m *ContainerMonitor) Run(ctx context.Context) error { + go m.refreshLoop(ctx) + + bo := backoff.Default() + delay := bo.Initial + + for { + err := m.runOnce(ctx) + if ctx.Err() != nil { + return ctx.Err() + } + + m.log.Warn("container monitor: subprocess exited, restarting", + "err", err, "delay", delay) + if err := backoff.Sleep(ctx, delay); err != nil { + return err + } + delay = bo.Next(delay) + } +} + +func (m *ContainerMonitor) runOnce(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "podman", "events", "--filter", "type=container", "--format", "json") + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("start podman events: %w", err) + } + defer cmd.Wait() + + // Pick up containers that existed before we attached. + m.triggerRefresh() + + return m.readEvents(stdout) +} + +// readEvents consumes the newline-delimited JSON event stream. Each event +// is only a trigger; the payload is never used to build state. +func (m *ContainerMonitor) readEvents(r io.Reader) error { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 1*1024*1024) + + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + if status := eventStatus(line); status != "" { + m.log.Debug("container monitor: event", "status", status) + } + m.triggerRefresh() + } + if err := scanner.Err(); err != nil { + return fmt.Errorf("read podman events: %w", err) + } + return fmt.Errorf("podman events process exited") +} + +// eventStatus extracts the event status for logging; best-effort only. +func eventStatus(line []byte) string { + var ev struct { + Status string `json:"Status"` + } + if json.Unmarshal(line, &ev) != nil { + return "" + } + return ev.Status +} + +// triggerRefresh requests a table re-read; the buffered channel collapses +// pending requests into one. +func (m *ContainerMonitor) triggerRefresh() { + select { + case m.refresh <- struct{}{}: + default: + } +} + +func (m *ContainerMonitor) refreshLoop(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-m.refresh: + } + + // Let a burst of events settle before reading. + select { + case <-ctx.Done(): + return + case <-time.After(debounceDelay): + } + select { + case <-m.refresh: + default: + } + + m.updateTree() + } +} + +// updateTree re-reads the full container table and replaces the subtree. +// With no containers the key is deleted rather than left as an empty node, +// so an idle-but-enabled container feature reads as absent. +func (m *ContainerMonitor) updateTree() { + data := m.collect() + if len(data) == 0 { + m.tree.Delete(treeKey) + m.log.Debug("container monitor: no containers, key removed") + return + } + m.tree.Set(treeKey, data) + m.log.Debug("container monitor: tree updated") +} diff --git a/src/yangerd/internal/containermonitor/containermonitor_test.go b/src/yangerd/internal/containermonitor/containermonitor_test.go new file mode 100644 index 000000000..6634f4bd3 --- /dev/null +++ b/src/yangerd/internal/containermonitor/containermonitor_test.go @@ -0,0 +1,88 @@ +package containermonitor + +import ( + "context" + "encoding/json" + "strings" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// newTestMonitor builds a monitor whose collect() is driven by the test. +// cmd/fs are nil since collect is overridden, so the default closure that +// would use them is never called. +func newTestMonitor(t *testing.T, collect func() json.RawMessage) (*ContainerMonitor, *tree.Tree) { + t.Helper() + tr := tree.New() + m := New(tr, nil, nil, nil) + m.collect = collect + return m, tr +} + +func TestUpdateTreeSetsContainers(t *testing.T) { + m, tr := newTestMonitor(t, func() json.RawMessage { + return json.RawMessage(`{"container":[{"name":"web"}]}`) + }) + + m.updateTree() + + got := tr.Get(treeKey) + if got == nil || !strings.Contains(string(got), "web") { + t.Fatalf("expected container data, got %s", got) + } +} + +// With no containers the key must be deleted, not left as an empty node, +// so an idle-but-enabled container feature reads as absent. +func TestUpdateTreeDeletesWhenEmpty(t *testing.T) { + m, tr := newTestMonitor(t, func() json.RawMessage { return nil }) + + tr.Set(treeKey, json.RawMessage(`{"container":[{"name":"old"}]}`)) + m.updateTree() + + if got := tr.Get(treeKey); got != nil { + t.Fatalf("expected key removed when no containers, got %s", got) + } +} + +// An event in the stream must trigger a re-read; here the re-read clears a +// previously-present container, proving the stream drives reconciliation. +func TestEventTriggersRefresh(t *testing.T) { + calls := 0 + m, tr := newTestMonitor(t, func() json.RawMessage { + calls++ + return nil // container is gone + }) + tr.Set(treeKey, json.RawMessage(`{"container":[{"name":"gone"}]}`)) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go m.refreshLoop(ctx) + + // A container "died" event, newline-framed as podman emits it. + go m.readEvents(strings.NewReader(`{"Type":"container","Status":"died","Name":"gone"}` + "\n")) + + deadline := time.After(2 * time.Second) + for { + if tr.Get(treeKey) == nil && calls > 0 { + break + } + select { + case <-deadline: + t.Fatalf("event did not trigger reconcile; calls=%d tree=%s", calls, tr.Get(treeKey)) + default: + time.Sleep(10 * time.Millisecond) + } + } +} + +func TestEventStatus(t *testing.T) { + if s := eventStatus([]byte(`{"Status":"start"}`)); s != "start" { + t.Errorf("eventStatus = %q, want start", s) + } + if s := eventStatus([]byte(`not json`)); s != "" { + t.Errorf("eventStatus on garbage = %q, want empty", s) + } +} diff --git a/src/yangerd/internal/dbusmonitor/dbusmonitor.go b/src/yangerd/internal/dbusmonitor/dbusmonitor.go new file mode 100644 index 000000000..b66c3b423 --- /dev/null +++ b/src/yangerd/internal/dbusmonitor/dbusmonitor.go @@ -0,0 +1,1048 @@ +// Package dbusmonitor watches D-Bus signals from dnsmasq and firewalld +// and keeps their operational YANG subtrees updated. +package dbusmonitor + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "os" + "strconv" + "strings" + "time" + + "github.com/godbus/dbus/v5" + "github.com/kernelkit/infix/src/yangerd/internal/backoff" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const ( + dnsmasqBusName = "uk.org.thekelleys.dnsmasq" + dnsmasqInterface = "uk.org.thekelleys.dnsmasq" + dnsmasqPath = "/uk/org/thekelleys/dnsmasq" + + firewalldBusName = "org.fedoraproject.FirewallD1" + firewalldInterface = "org.fedoraproject.FirewallD1" + firewalldPath = "/org/fedoraproject/FirewallD1" + + dbusInterface = "org.freedesktop.DBus" + dbusPath = "/org/freedesktop/DBus" + + dnsmasqLeaseFile = "/var/lib/misc/dnsmasq.leases" + + dhcpTreeKey = "infix-dhcp-server:dhcp-server" + firewallTreeKey = "infix-firewall:firewall" +) + +// DBusMonitor subscribes to dnsmasq and firewalld D-Bus signals and +// updates the shared operational tree. +type DBusMonitor struct { + tree *tree.Tree + log *slog.Logger +} + +// New creates a DBusMonitor. +func New(t *tree.Tree, log *slog.Logger) *DBusMonitor { + return &DBusMonitor{tree: t, log: log} +} + +// Run starts the monitor loop. It connects to the system bus, subscribes +// to relevant signals, loads initial DHCP/firewall data, and reconnects +// with exponential backoff on failures until ctx is cancelled. +func (m *DBusMonitor) Run(ctx context.Context) error { + bo := backoff.Default() + delay := bo.Initial + + for { + if err := ctx.Err(); err != nil { + return err + } + + conn, err := dbus.ConnectSystemBus() + if err != nil { + m.log.Warn("dbus monitor: connect system bus failed", "err", err, "delay", delay) + if err := backoff.Sleep(ctx, delay); err != nil { + return err + } + delay = bo.Next(delay) + continue + } + + if err := m.subscribe(conn); err != nil { + m.log.Warn("dbus monitor: subscribe failed", "err", err, "delay", delay) + _ = conn.Close() + if err := backoff.Sleep(ctx, delay); err != nil { + return err + } + delay = bo.Next(delay) + continue + } + + delay = bo.Initial + + if err := m.refreshDHCP(conn); err != nil { + m.log.Warn("dbus monitor: initial dhcp refresh failed", "err", err) + } + if err := m.refreshFirewall(conn); err != nil { + m.log.Warn("dbus monitor: initial firewall refresh failed", "err", err) + } + + err = m.processSignals(ctx, conn) + _ = conn.Close() + if ctx.Err() != nil { + return ctx.Err() + } + + m.log.Warn("dbus monitor: signal loop ended, reconnecting", "err", err, "delay", delay) + if err := backoff.Sleep(ctx, delay); err != nil { + return err + } + delay = bo.Next(delay) + } +} + +func (m *DBusMonitor) subscribe(conn *dbus.Conn) error { + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dnsmasqInterface), + dbus.WithMatchMember("DHCPLeaseAdded"), + ); err != nil { + return fmt.Errorf("add dnsmasq DHCPLeaseAdded match: %w", err) + } + + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dnsmasqInterface), + dbus.WithMatchMember("DHCPLeaseDeleted"), + ); err != nil { + return fmt.Errorf("add dnsmasq DHCPLeaseDeleted match: %w", err) + } + + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dnsmasqInterface), + dbus.WithMatchMember("DHCPLeaseUpdated"), + ); err != nil { + return fmt.Errorf("add dnsmasq DHCPLeaseUpdated match: %w", err) + } + + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(firewalldInterface), + dbus.WithMatchMember("Reloaded"), + ); err != nil { + return fmt.Errorf("add firewalld Reloaded match: %w", err) + } + + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dbusInterface), + dbus.WithMatchMember("NameOwnerChanged"), + dbus.WithMatchArg(0, dnsmasqBusName), + ); err != nil { + return fmt.Errorf("add NameOwnerChanged dnsmasq match: %w", err) + } + + if err := conn.AddMatchSignal( + dbus.WithMatchInterface(dbusInterface), + dbus.WithMatchMember("NameOwnerChanged"), + dbus.WithMatchArg(0, firewalldBusName), + ); err != nil { + return fmt.Errorf("add NameOwnerChanged firewalld match: %w", err) + } + + return nil +} + +func (m *DBusMonitor) processSignals(ctx context.Context, conn *dbus.Conn) error { + sigCh := make(chan *dbus.Signal, 128) + conn.Signal(sigCh) + defer conn.RemoveSignal(sigCh) + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case sig, ok := <-sigCh: + if !ok { + return fmt.Errorf("dbus signal channel closed") + } + if sig == nil { + continue + } + if err := m.handleSignal(conn, sig); err != nil { + m.log.Warn("dbus monitor: failed handling signal", "name", sig.Name, "path", sig.Path, "err", err) + } + } + } +} + +func (m *DBusMonitor) handleSignal(conn *dbus.Conn, sig *dbus.Signal) error { + switch sig.Name { + case dnsmasqInterface + ".DHCPLeaseAdded", + dnsmasqInterface + ".DHCPLeaseDeleted", + dnsmasqInterface + ".DHCPLeaseUpdated": + if sig.Path != "" && string(sig.Path) != dnsmasqPath { + return nil + } + return m.refreshDHCP(conn) + + case firewalldInterface + ".Reloaded": + if sig.Path != "" && string(sig.Path) != firewalldPath { + return nil + } + return m.refreshFirewall(conn) + + case dbusInterface + ".NameOwnerChanged": + if sig.Path != "" && string(sig.Path) != dbusPath { + return nil + } + if len(sig.Body) < 3 { + return fmt.Errorf("NameOwnerChanged: expected 3 args, got %d", len(sig.Body)) + } + + name, ok1 := sig.Body[0].(string) + oldOwner, ok2 := sig.Body[1].(string) + newOwner, ok3 := sig.Body[2].(string) + if !ok1 || !ok2 || !ok3 { + return fmt.Errorf("NameOwnerChanged: unexpected arg types") + } + + switch name { + case dnsmasqBusName: + if newOwner == "" { + m.clearTreeKey(dhcpTreeKey) + return nil + } + if oldOwner == "" { + return m.refreshDHCP(conn) + } + case firewalldBusName: + if newOwner == "" { + m.clearTreeKey(firewallTreeKey) + return nil + } + if oldOwner == "" { + return m.refreshFirewall(conn) + } + } + } + + return nil +} + +func (m *DBusMonitor) refreshDHCP(conn *dbus.Conn) error { + data, err := os.ReadFile(dnsmasqLeaseFile) + if err != nil { + m.log.Warn("dbus monitor: read dnsmasq leases failed", "file", dnsmasqLeaseFile, "err", err) + } + + leases := parseDnsmasqLeases(string(data)) + stats := defaultDHCPStats() + + obj := conn.Object(dnsmasqBusName, dbus.ObjectPath(dnsmasqPath)) + call := obj.Call(dnsmasqInterface+".GetMetrics", 0) + if call.Err != nil { + m.log.Warn("dbus monitor: dnsmasq GetMetrics failed", "err", call.Err) + } else if len(call.Body) > 0 { + stats = mergeDHCPStats(stats, decodeDHCPMetrics(call.Body[0])) + } + + m.tree.Set(dhcpTreeKey, buildDHCPTree(leases, stats)) + return nil +} + +func (m *DBusMonitor) refreshFirewall(conn *dbus.Conn) error { + obj := conn.Object(firewalldBusName, dbus.ObjectPath(firewalldPath)) + + defaultZone := "" + if call := obj.Call(firewalldInterface+".getDefaultZone", 0); call.Err != nil { + m.log.Info("dbus monitor: firewalld not reachable, skipping", "err", call.Err) + return nil + } else if err := call.Store(&defaultZone); err != nil { + m.log.Warn("dbus monitor: firewalld getDefaultZone decode failed", "err", err) + return nil + } + + logDenied := "" + if call := obj.Call(firewalldInterface+".getLogDenied", 0); call.Err != nil { + m.log.Warn("dbus monitor: firewalld getLogDenied failed", "err", call.Err) + } else if err := call.Store(&logDenied); err != nil { + m.log.Warn("dbus monitor: firewalld getLogDenied decode failed", "err", err) + } + + lockdown := false + if call := obj.Call(firewalldInterface+".queryPanicMode", 0); call.Err != nil { + m.log.Warn("dbus monitor: firewalld queryPanicMode failed", "err", call.Err) + } else if len(call.Body) > 0 { + lockdown = asBool(call.Body[0]) + } + + zones := m.getFirewallZones(obj) + policies := m.getFirewallPolicies(obj) + services := m.getFirewallServices(obj, referencedServices(zones, policies)) + + m.tree.Set(firewallTreeKey, buildFirewallTree(defaultZone, logDenied, lockdown, zones, policies, services)) + return nil +} + +func (m *DBusMonitor) getFirewallZones(obj dbus.BusObject) []map[string]any { + active := make(map[string]map[string]any) + if call := obj.Call(firewalldInterface+".zone.getActiveZones", 0); call.Err != nil { + m.log.Warn("dbus monitor: firewalld zone.getActiveZones failed", "err", call.Err) + return nil + } else if len(call.Body) > 0 { + active = decodeActiveZones(call.Body[0]) + } + + zones := make([]map[string]any, 0, len(active)) + for name, zoneInfo := range active { + settings := map[string]any{} + if call := obj.Call(firewalldInterface+".zone.getZoneSettings2", 0, name); call.Err != nil { + m.log.Warn("dbus monitor: firewalld zone.getZoneSettings2 failed", "zone", name, "err", call.Err) + continue + } else if len(call.Body) > 0 { + settings = variantMap(call.Body[0]) + } + + zone := map[string]any{ + "name": name, + "immutable": hasImmutableTag(getString(settings, "short")), + "action": mapZoneTarget(getString(settings, "target")), + } + if ifaces := firstStringList(zoneInfo, "interfaces", getStringList(settings, "interfaces")); len(ifaces) > 0 { + zone["interface"] = ifaces + } + if networks := firstStringList(zoneInfo, "sources", getStringList(settings, "sources")); len(networks) > 0 { + zone["network"] = networks + } + if services := getStringList(settings, "services"); len(services) > 0 { + zone["service"] = services + } + if desc := getString(settings, "description"); desc != "" { + zone["description"] = desc + } + + if forwards := getForwardPorts(settings); len(forwards) > 0 { + zone["port-forward"] = forwards + } + + zones = append(zones, zone) + } + + return zones +} + +func (m *DBusMonitor) getFirewallPolicies(obj dbus.BusObject) []map[string]any { + var names []string + if call := obj.Call(firewalldInterface+".policy.getPolicies", 0); call.Err != nil { + m.log.Warn("dbus monitor: firewalld policy.getPolicies failed", "err", call.Err) + } else if err := call.Store(&names); err != nil { + m.log.Warn("dbus monitor: firewalld policy.getPolicies decode failed", "err", err) + } + + policies := make([]map[string]any, 0, len(names)+1) + for _, name := range names { + settings := map[string]any{} + if call := obj.Call(firewalldInterface+".policy.getPolicySettings", 0, name); call.Err != nil { + m.log.Warn("dbus monitor: firewalld policy.getPolicySettings failed", "policy", name, "err", call.Err) + continue + } else if len(call.Body) > 0 { + settings = variantMap(call.Body[0]) + } + + policy := map[string]any{ + "name": name, + "action": mapPolicyTarget(getString(settings, "target")), + "priority": getInt(settings, "priority", 32767), + "immutable": hasImmutableTag(getString(settings, "short")), + "masquerade": asBool(settings["masquerade"]), + } + if ingress := getStringList(settings, "ingress_zones"); len(ingress) > 0 { + policy["ingress"] = ingress + } + if egress := getStringList(settings, "egress_zones"); len(egress) > 0 { + policy["egress"] = egress + } + if desc := getString(settings, "description"); desc != "" { + policy["description"] = desc + } + if services := getStringList(settings, "services"); len(services) > 0 { + policy["service"] = services + } + if custom := parsePolicyCustomFilters(getStringList(settings, "rich_rules")); len(custom) > 0 { + policy["custom"] = map[string]any{"filter": custom} + } + + policies = append(policies, policy) + } + + policies = append(policies, map[string]any{ + "name": "default-drop", + "description": "Default deny rule - drops all unmatched traffic", + "action": "drop", + "priority": 32767, + "ingress": []string{"ANY"}, + "egress": []string{"ANY"}, + "immutable": true, + }) + + return policies +} + +func referencedServices(zones, policies []map[string]any) map[string]bool { + refs := map[string]bool{} + for _, z := range zones { + if svcs, ok := z["service"].([]string); ok { + for _, s := range svcs { + refs[s] = true + } + } + } + for _, p := range policies { + if svcs, ok := p["service"].([]string); ok { + for _, s := range svcs { + refs[s] = true + } + } + } + return refs +} + +func (m *DBusMonitor) getFirewallServices(obj dbus.BusObject, wanted map[string]bool) []map[string]any { + var names []string + if call := obj.Call(firewalldInterface+".listServices", 0); call.Err != nil { + m.log.Warn("dbus monitor: firewalld listServices failed", "err", call.Err) + return nil + } else if err := call.Store(&names); err != nil { + m.log.Warn("dbus monitor: firewalld listServices decode failed", "err", err) + return nil + } + + services := make([]map[string]any, 0, len(wanted)) + for _, name := range names { + if !wanted[name] { + continue + } + + settings := map[string]any{} + if call := obj.Call(firewalldInterface+".getServiceSettings2", 0, name); call.Err != nil { + m.log.Warn("dbus monitor: firewalld getServiceSettings2 failed", "service", name, "err", call.Err) + continue + } else if len(call.Body) > 0 { + settings = variantMap(call.Body[0]) + } + + ports := parseServicePorts(settings) + if len(ports) == 0 { + continue + } + + service := map[string]any{ + "name": name, + "port": ports, + } + if desc := getString(settings, "description"); desc != "" { + service["description"] = desc + } + + services = append(services, service) + } + + return services +} + +func (m *DBusMonitor) clearTreeKey(key string) { + m.tree.Set(key, json.RawMessage(`{}`)) +} + +func parseDnsmasqLeases(data string) []map[string]any { + leases := make([]map[string]any, 0) + for _, line := range strings.Split(data, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + fields := strings.Fields(line) + if len(fields) != 5 { + continue + } + + expires := "never" + if fields[0] != "0" { + ts, err := strconv.ParseInt(fields[0], 10, 64) + if err != nil { + continue + } + expires = time.Unix(ts, 0).UTC().Format(time.RFC3339) + } + + hostname := "" + if fields[3] != "*" { + hostname = fields[3] + } + + clientID := "" + if fields[4] != "*" { + clientID = fields[4] + } + + leases = append(leases, map[string]any{ + "expires": expires, + "address": fields[2], + "phys-address": fields[1], + "hostname": hostname, + "client-id": clientID, + }) + } + + return leases +} + +func buildDHCPTree(leases []map[string]any, stats map[string]any) json.RawMessage { + root := map[string]any{ + "statistics": stats, + "leases": map[string]any{ + "lease": leases, + }, + } + raw, err := json.Marshal(root) + if err != nil { + return json.RawMessage(`{}`) + } + return raw +} + +func buildFirewallTree(defaultZone, logDenied string, lockdown bool, zones, policies, services []map[string]any) json.RawMessage { + fw := map[string]any{ + "default": defaultZone, + "logging": logDenied, + "lockdown": lockdown, + } + if len(zones) > 0 { + fw["zone"] = zones + } + if len(policies) > 0 { + fw["policy"] = policies + } + if len(services) > 0 { + fw["service"] = services + } + + raw, err := json.Marshal(fw) + if err != nil { + return json.RawMessage(`{}`) + } + return raw +} + +func defaultDHCPStats() map[string]any { + return map[string]any{ + "out-offers": uint64(0), + "out-acks": uint64(0), + "out-naks": uint64(0), + "in-declines": uint64(0), + "in-discovers": uint64(0), + "in-requests": uint64(0), + "in-releases": uint64(0), + "in-informs": uint64(0), + } +} + +func decodeDHCPMetrics(v any) map[string]any { + metrics := map[string]any{} + + switch raw := v.(type) { + case map[string]dbus.Variant: + for k, val := range raw { + metrics[k] = val.Value() + } + case map[string]any: + for k, val := range raw { + metrics[k] = val + } + } + + return map[string]any{ + "out-offers": toUint64(metrics["dhcp_offer"]), + "out-acks": toUint64(metrics["dhcp_ack"]), + "out-naks": toUint64(metrics["dhcp_nak"]), + "in-declines": toUint64(metrics["dhcp_decline"]), + "in-discovers": toUint64(metrics["dhcp_discover"]), + "in-requests": toUint64(metrics["dhcp_request"]), + "in-releases": toUint64(metrics["dhcp_release"]), + "in-informs": toUint64(metrics["dhcp_inform"]), + } +} + +func mergeDHCPStats(base, override map[string]any) map[string]any { + out := map[string]any{} + for k, v := range base { + out[k] = v + } + for k, v := range override { + out[k] = v + } + return out +} + +func parseServicePorts(settings map[string]any) []map[string]any { + rawPorts, ok := settings["ports"] + if !ok { + return []map[string]any{} + } + + out := []map[string]any{} + for _, entry := range toAnySlice(rawPorts) { + pair := toAnySlice(entry) + if len(pair) < 2 { + continue + } + + portSpec := fmt.Sprint(pair[0]) + proto := fmt.Sprint(pair[1]) + if portSpec == "" || proto == "" { + continue + } + + port := map[string]any{"proto": proto} + if strings.Contains(portSpec, "-") { + parts := strings.SplitN(portSpec, "-", 2) + lower, err1 := strconv.Atoi(strings.TrimSpace(parts[0])) + upper, err2 := strconv.Atoi(strings.TrimSpace(parts[1])) + if err1 != nil || err2 != nil { + continue + } + port["lower"] = lower + port["upper"] = upper + } else { + lower, err := strconv.Atoi(strings.TrimSpace(portSpec)) + if err != nil { + continue + } + port["lower"] = lower + } + + out = append(out, port) + } + + return out +} + +func parsePolicyCustomFilters(rules []string) []map[string]any { + filters := []map[string]any{} + for _, rule := range rules { + family := "both" + if strings.Contains(rule, `family="ipv4"`) { + family = "ipv4" + } else if strings.Contains(rule, `family="ipv6"`) { + family = "ipv6" + } + + icmpType := "" + action := "" + prio := -1 + + if idx := strings.Index(rule, "priority="); idx >= 0 { + prio = parsePriority(rule[idx+len("priority="):]) + } + + if strings.Contains(rule, "icmp-type") && strings.Contains(rule, `name="`) { + icmpType = parseQuotedName(rule) + action = "accept" + if strings.Contains(rule, " drop") { + action = "drop" + } else if strings.Contains(rule, " reject") { + action = "reject" + } + } else if strings.Contains(rule, "icmp-block") && strings.Contains(rule, `name="`) { + icmpType = parseQuotedName(rule) + action = "reject" + } + + if icmpType == "" || action == "" { + continue + } + + filters = append(filters, map[string]any{ + "name": "icmp-" + icmpType, + "priority": prio, + "family": family, + "action": action, + "icmp": map[string]any{ + "type": icmpType, + }, + }) + } + + return filters +} + +func getForwardPorts(settings map[string]any) []map[string]any { + raw, ok := settings["forward_ports"] + if !ok { + return nil + } + + out := []map[string]any{} + for _, item := range toAnySlice(raw) { + vals := toAnySlice(item) + if len(vals) < 4 { + continue + } + + portStr := fmt.Sprint(vals[0]) + proto := fmt.Sprint(vals[1]) + toPortStr := strings.TrimSpace(fmt.Sprint(vals[2])) + toAddr := fmt.Sprint(vals[3]) + + if portStr == "" || proto == "" { + continue + } + + entry := map[string]any{"proto": proto} + if strings.Contains(portStr, "-") { + parts := strings.SplitN(portStr, "-", 2) + lower, err1 := strconv.Atoi(strings.TrimSpace(parts[0])) + upper, err2 := strconv.Atoi(strings.TrimSpace(parts[1])) + if err1 != nil || err2 != nil { + continue + } + entry["lower"] = lower + entry["upper"] = upper + } else { + lower, err := strconv.Atoi(strings.TrimSpace(portStr)) + if err != nil { + continue + } + entry["lower"] = lower + } + + to := map[string]any{"addr": toAddr} + if toPortStr != "" && !strings.ContainsAny(toPortStr, ".:") { + if p, err := strconv.Atoi(toPortStr); err == nil { + to["port"] = p + } + } + if _, ok := to["port"]; !ok { + to["port"] = entry["lower"] + } + + entry["to"] = to + out = append(out, entry) + } + + return out +} + +func decodeActiveZones(v any) map[string]map[string]any { + out := map[string]map[string]any{} + + switch m := v.(type) { + case map[string]map[string]dbus.Variant: + for zone, data := range m { + inner := map[string]any{} + for k, vv := range data { + inner[k] = vv.Value() + } + out[zone] = inner + } + case map[string]map[string]any: + for zone, data := range m { + out[zone] = data + } + case map[string]map[string][]string: + for zone, data := range m { + inner := map[string]any{} + for k, v := range data { + inner[k] = v + } + out[zone] = inner + } + case map[string]any: + for zone, raw := range m { + if mm, ok := raw.(map[string]any); ok { + out[zone] = mm + } + } + } + + return out +} + +func variantMap(v any) map[string]any { + out := map[string]any{} + switch m := v.(type) { + case map[string]dbus.Variant: + for k, vv := range m { + out[k] = vv.Value() + } + case map[string]any: + for k, vv := range m { + if dv, ok := vv.(dbus.Variant); ok { + out[k] = dv.Value() + } else { + out[k] = vv + } + } + } + return out +} + +func getString(m map[string]any, key string) string { + v, ok := m[key] + if !ok || v == nil { + return "" + } + return fmt.Sprint(v) +} + +func getInt(m map[string]any, key string, def int) int { + v, ok := m[key] + if !ok { + return def + } + switch n := v.(type) { + case int: + return n + case int8: + return int(n) + case int16: + return int(n) + case int32: + return int(n) + case int64: + return int(n) + case uint: + return int(n) + case uint8: + return int(n) + case uint16: + return int(n) + case uint32: + return int(n) + case uint64: + return int(n) + case float32: + return int(n) + case float64: + return int(n) + case string: + i, err := strconv.Atoi(strings.TrimSpace(n)) + if err == nil { + return i + } + } + return def +} + +func getStringList(m map[string]any, key string) []string { + v, ok := m[key] + if !ok { + return nil + } + return toStringSlice(v) +} + +func toStringSlice(v any) []string { + vals := toAnySlice(v) + if len(vals) == 0 { + if s, ok := v.(string); ok { + if s == "" { + return nil + } + return []string{s} + } + return nil + } + + out := make([]string, 0, len(vals)) + for _, item := range vals { + s := strings.TrimSpace(fmt.Sprint(item)) + if s != "" { + out = append(out, s) + } + } + return out +} + +func toAnySlice(v any) []any { + switch a := v.(type) { + case []any: + return a + case []string: + out := make([]any, 0, len(a)) + for _, item := range a { + out = append(out, item) + } + return out + case [][]any: + out := make([]any, 0, len(a)) + for _, item := range a { + out = append(out, any(item)) + } + return out + case [][]string: + out := make([]any, 0, len(a)) + for _, item := range a { + inner := make([]any, 0, len(item)) + for _, p := range item { + inner = append(inner, p) + } + out = append(out, inner) + } + return out + } + return nil +} + +func firstStringList(a map[string]any, key string, fallback []string) []string { + if list := getStringList(a, key); len(list) > 0 { + return list + } + return fallback +} + +func hasImmutableTag(short string) bool { + return strings.Contains(short, "(immutable)") +} + +func mapZoneTarget(target string) string { + switch strings.ToUpper(strings.TrimSpace(target)) { + case "%%REJECT%%", "REJECT": + return "reject" + case "DROP": + return "drop" + case "ACCEPT", "DEFAULT", "": + return "accept" + default: + return "accept" + } +} + +func mapPolicyTarget(target string) string { + switch strings.ToUpper(strings.TrimSpace(target)) { + case "CONTINUE": + return "continue" + case "ACCEPT": + return "accept" + case "DROP": + return "drop" + case "REJECT", "": + return "reject" + default: + return "reject" + } +} + +func parseQuotedName(rule string) string { + idx := strings.Index(rule, `name="`) + if idx < 0 { + return "" + } + start := idx + len(`name="`) + end := strings.Index(rule[start:], `"`) + if end < 0 { + return "" + } + return rule[start : start+end] +} + +func parsePriority(fragment string) int { + fragment = strings.TrimSpace(fragment) + if fragment == "" { + return -1 + } + fields := strings.Fields(fragment) + if len(fields) == 0 { + return -1 + } + p, err := strconv.Atoi(strings.Trim(fields[0], `"`)) + if err != nil { + return -1 + } + return p +} + +func asBool(v any) bool { + switch x := v.(type) { + case bool: + return x + case uint8: + return x != 0 + case uint16: + return x != 0 + case uint32: + return x != 0 + case uint64: + return x != 0 + case int8: + return x != 0 + case int16: + return x != 0 + case int32: + return x != 0 + case int64: + return x != 0 + case int: + return x != 0 + case string: + x = strings.TrimSpace(strings.ToLower(x)) + return x == "1" || x == "true" || x == "yes" || x == "on" + default: + return false + } +} + +func toUint64(v any) uint64 { + switch x := v.(type) { + case uint8: + return uint64(x) + case uint16: + return uint64(x) + case uint32: + return uint64(x) + case uint64: + return x + case uint: + return uint64(x) + case int8: + if x < 0 { + return 0 + } + return uint64(x) + case int16: + if x < 0 { + return 0 + } + return uint64(x) + case int32: + if x < 0 { + return 0 + } + return uint64(x) + case int64: + if x < 0 { + return 0 + } + return uint64(x) + case int: + if x < 0 { + return 0 + } + return uint64(x) + case float32: + if x < 0 { + return 0 + } + return uint64(x) + case float64: + if x < 0 { + return 0 + } + return uint64(x) + case string: + u, err := strconv.ParseUint(strings.TrimSpace(x), 10, 64) + if err == nil { + return u + } + } + return 0 +} diff --git a/src/yangerd/internal/dbusmonitor/dbusmonitor_test.go b/src/yangerd/internal/dbusmonitor/dbusmonitor_test.go new file mode 100644 index 000000000..2bddf3eaf --- /dev/null +++ b/src/yangerd/internal/dbusmonitor/dbusmonitor_test.go @@ -0,0 +1,693 @@ +package dbusmonitor + +import ( + "context" + "encoding/json" + "errors" + "reflect" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/backoff" +) + +func TestParseDnsmasqLeases(t *testing.T) { + tests := []struct { + name string + input string + want []map[string]any + }{ + { + name: "normal lease line", + input: "1711900000 aa:bb:cc:dd:ee:ff 192.168.1.100 myhost 01:aa:bb:cc:dd:ee:ff", + want: []map[string]any{{ + "expires": time.Unix(1711900000, 0).UTC().Format(time.RFC3339), + "address": "192.168.1.100", + "phys-address": "aa:bb:cc:dd:ee:ff", + "hostname": "myhost", + "client-id": "01:aa:bb:cc:dd:ee:ff", + }}, + }, + { + name: "wildcard hostname and client id", + input: "1711900000 aa:bb:cc:dd:ee:ff 192.168.1.100 * *", + want: []map[string]any{{ + "expires": time.Unix(1711900000, 0).UTC().Format(time.RFC3339), + "address": "192.168.1.100", + "phys-address": "aa:bb:cc:dd:ee:ff", + "hostname": "", + "client-id": "", + }}, + }, + { + name: "never expiring lease", + input: "0 aa:bb:cc:dd:ee:ff 192.168.1.100 host *", + want: []map[string]any{{ + "expires": "never", + "address": "192.168.1.100", + "phys-address": "aa:bb:cc:dd:ee:ff", + "hostname": "host", + "client-id": "", + }}, + }, + { + name: "multiple leases with malformed lines skipped", + input: "1711900000 aa:bb:cc:dd:ee:ff 192.168.1.100 myhost 01:aa:bb:cc:dd:ee:ff\n" + + "bad line with too few fields\n" + + "1711900100 11:22:33:44:55:66 192.168.1.101 host2 *\n", + want: []map[string]any{ + { + "expires": time.Unix(1711900000, 0).UTC().Format(time.RFC3339), + "address": "192.168.1.100", + "phys-address": "aa:bb:cc:dd:ee:ff", + "hostname": "myhost", + "client-id": "01:aa:bb:cc:dd:ee:ff", + }, + { + "expires": time.Unix(1711900100, 0).UTC().Format(time.RFC3339), + "address": "192.168.1.101", + "phys-address": "11:22:33:44:55:66", + "hostname": "host2", + "client-id": "", + }, + }, + }, + { + name: "empty input", + input: "", + want: []map[string]any{}, + }, + { + name: "invalid timestamp skipped", + input: "abc aa:bb:cc:dd:ee:ff 192.168.1.100 host *", + want: []map[string]any{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := parseDnsmasqLeases(tc.input) + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("parseDnsmasqLeases() mismatch\nwant: %#v\n got: %#v", tc.want, got) + } + }) + } +} + +func TestBuildDHCPTree(t *testing.T) { + tests := []struct { + name string + leases []map[string]any + stats map[string]any + check func(t *testing.T, root map[string]any) + }{ + { + name: "with leases and stats", + leases: []map[string]any{{ + "expires": "never", + "address": "192.168.1.100", + "phys-address": "aa:bb:cc:dd:ee:ff", + "hostname": "host", + "client-id": "", + }}, + stats: map[string]any{"out-offers": 3, "in-requests": 4}, + check: func(t *testing.T, root map[string]any) { + t.Helper() + stats, ok := root["statistics"].(map[string]any) + if !ok { + t.Fatalf("missing statistics map") + } + if stats["out-offers"] != float64(3) || stats["in-requests"] != float64(4) { + t.Fatalf("unexpected statistics: %#v", stats) + } + + leasesNode, ok := root["leases"].(map[string]any) + if !ok { + t.Fatalf("missing leases map") + } + leaseList, ok := leasesNode["lease"].([]any) + if !ok || len(leaseList) != 1 { + t.Fatalf("unexpected lease list: %#v", leasesNode["lease"]) + } + lease, ok := leaseList[0].(map[string]any) + if !ok || lease["address"] != "192.168.1.100" { + t.Fatalf("unexpected lease entry: %#v", leaseList[0]) + } + }, + }, + { + name: "with empty leases", + leases: []map[string]any{}, + stats: map[string]any{"out-offers": 0}, + check: func(t *testing.T, root map[string]any) { + t.Helper() + leasesNode := root["leases"].(map[string]any) + leaseList, ok := leasesNode["lease"].([]any) + if !ok || len(leaseList) != 0 { + t.Fatalf("expected empty lease list, got %#v", leasesNode["lease"]) + } + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + raw := buildDHCPTree(tc.leases, tc.stats) + var root map[string]any + if err := json.Unmarshal(raw, &root); err != nil { + t.Fatalf("unmarshal buildDHCPTree output: %v", err) + } + tc.check(t, root) + }) + } +} + +func TestBuildFirewallTree(t *testing.T) { + tests := []struct { + name string + defaultZ string + logDenied string + lockdown bool + zones []map[string]any + policies []map[string]any + services []map[string]any + expectKeys map[string]bool + }{ + { + name: "with zones policies and services", + defaultZ: "public", + logDenied: "all", + lockdown: true, + zones: []map[string]any{{"name": "public"}}, + policies: []map[string]any{{"name": "default-drop"}}, + services: []map[string]any{{"name": "ssh"}}, + expectKeys: map[string]bool{ + "zone": true, + "policy": true, + "service": true, + }, + }, + { + name: "omits empty zone policy service keys", + defaultZ: "trusted", + logDenied: "off", + lockdown: false, + expectKeys: map[string]bool{ + "zone": false, + "policy": false, + "service": false, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + raw := buildFirewallTree(tc.defaultZ, tc.logDenied, tc.lockdown, tc.zones, tc.policies, tc.services) + var root map[string]any + if err := json.Unmarshal(raw, &root); err != nil { + t.Fatalf("unmarshal buildFirewallTree output: %v", err) + } + if root["default"] != tc.defaultZ || root["logging"] != tc.logDenied || root["lockdown"] != tc.lockdown { + t.Fatalf("default/logging/lockdown mismatch: %#v", root) + } + for k, shouldExist := range tc.expectKeys { + _, exists := root[k] + if exists != shouldExist { + t.Fatalf("key %q exists=%v, want %v", k, exists, shouldExist) + } + } + }) + } +} + +func TestParseServicePorts(t *testing.T) { + tests := []struct { + name string + settings map[string]any + want []map[string]any + }{ + { + name: "single port", + settings: map[string]any{"ports": []any{[]any{"80", "tcp"}}}, + want: []map[string]any{{"proto": "tcp", "lower": 80}}, + }, + { + name: "port range", + settings: map[string]any{"ports": []any{[]any{"8080-8090", "tcp"}}}, + want: []map[string]any{{"proto": "tcp", "lower": 8080, "upper": 8090}}, + }, + { + name: "multiple ports", + settings: map[string]any{"ports": []any{ + []any{"80", "tcp"}, + []any{"53", "udp"}, + }}, + want: []map[string]any{ + {"proto": "tcp", "lower": 80}, + {"proto": "udp", "lower": 53}, + }, + }, + { + name: "missing ports", + settings: map[string]any{}, + want: []map[string]any{}, + }, + { + name: "empty ports", + settings: map[string]any{"ports": []any{}}, + want: []map[string]any{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := parseServicePorts(tc.settings) + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("parseServicePorts mismatch\nwant: %#v\n got: %#v", tc.want, got) + } + }) + } +} + +func TestParsePolicyCustomFilters(t *testing.T) { + tests := []struct { + name string + rules []string + want []map[string]any + }{ + { + name: "rich rule icmp type accept", + rules: []string{`rule priority="0" family="ipv4" icmp-type name="echo-request" accept`}, + want: []map[string]any{{ + "name": "icmp-echo-request", + "priority": 0, + "family": "ipv4", + "action": "accept", + "icmp": map[string]any{"type": "echo-request"}, + }}, + }, + { + name: "rich rule icmp block reject", + rules: []string{`rule family="ipv6" icmp-block name="router-advertisement" reject`}, + want: []map[string]any{{ + "name": "icmp-router-advertisement", + "priority": -1, + "family": "ipv6", + "action": "reject", + "icmp": map[string]any{"type": "router-advertisement"}, + }}, + }, + { + name: "rule without icmp skipped", + rules: []string{`rule family="ipv4" service name="ssh" accept`}, + want: []map[string]any{}, + }, + { + name: "multiple rules include only icmp", + rules: []string{ + `rule priority="10" family="ipv4" icmp-type name="echo-reply" drop`, + `rule family="ipv4" service name="http" accept`, + `rule family="ipv6" icmp-block name="router-advertisement" reject`, + }, + want: []map[string]any{ + { + "name": "icmp-echo-reply", + "priority": 10, + "family": "ipv4", + "action": "drop", + "icmp": map[string]any{"type": "echo-reply"}, + }, + { + "name": "icmp-router-advertisement", + "priority": -1, + "family": "ipv6", + "action": "reject", + "icmp": map[string]any{"type": "router-advertisement"}, + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := parsePolicyCustomFilters(tc.rules) + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("parsePolicyCustomFilters mismatch\nwant: %#v\n got: %#v", tc.want, got) + } + }) + } +} + +func TestGetForwardPorts(t *testing.T) { + tests := []struct { + name string + settings map[string]any + want []map[string]any + wantNil bool + }{ + { + name: "single port forward", + settings: map[string]any{"forward_ports": []any{[]any{"80", "tcp", "8080", "192.168.1.1"}}}, + want: []map[string]any{{ + "proto": "tcp", + "lower": 80, + "to": map[string]any{"addr": "192.168.1.1", "port": 8080}, + }}, + }, + { + name: "port range forward", + settings: map[string]any{"forward_ports": []any{[]any{"1000-1005", "udp", "2000", "10.0.0.2"}}}, + want: []map[string]any{{ + "proto": "udp", + "lower": 1000, + "upper": 1005, + "to": map[string]any{"addr": "10.0.0.2", "port": 2000}, + }}, + }, + { + name: "missing to port defaults to lower", + settings: map[string]any{"forward_ports": []any{[]any{"8081", "tcp", "", "192.168.1.1"}}}, + want: []map[string]any{{ + "proto": "tcp", + "lower": 8081, + "to": map[string]any{"addr": "192.168.1.1", "port": 8081}, + }}, + }, + { + name: "missing forward ports", + settings: map[string]any{}, + wantNil: true, + }, + { + name: "empty forward ports", + settings: map[string]any{"forward_ports": []any{}}, + want: []map[string]any{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := getForwardPorts(tc.settings) + if tc.wantNil { + if got != nil { + t.Fatalf("expected nil, got %#v", got) + } + return + } + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("getForwardPorts mismatch\nwant: %#v\n got: %#v", tc.want, got) + } + }) + } +} + +func TestMapZoneTarget(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {name: "percent reject", in: "%%REJECT%%", want: "reject"}, + {name: "reject", in: "REJECT", want: "reject"}, + {name: "drop", in: "DROP", want: "drop"}, + {name: "accept", in: "ACCEPT", want: "accept"}, + {name: "default", in: "DEFAULT", want: "accept"}, + {name: "empty", in: "", want: "accept"}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := mapZoneTarget(tc.in); got != tc.want { + t.Fatalf("mapZoneTarget(%q) = %q, want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestMapPolicyTarget(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {name: "continue", in: "CONTINUE", want: "continue"}, + {name: "accept", in: "ACCEPT", want: "accept"}, + {name: "drop", in: "DROP", want: "drop"}, + {name: "reject", in: "REJECT", want: "reject"}, + {name: "empty", in: "", want: "reject"}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := mapPolicyTarget(tc.in); got != tc.want { + t.Fatalf("mapPolicyTarget(%q) = %q, want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestAsBool(t *testing.T) { + tests := []struct { + name string + in any + want bool + }{ + {name: "bool true", in: true, want: true}, + {name: "bool false", in: false, want: false}, + {name: "int one", in: 1, want: true}, + {name: "int zero", in: 0, want: false}, + {name: "int8 one", in: int8(1), want: true}, + {name: "int16 zero", in: int16(0), want: false}, + {name: "int64 one", in: int64(1), want: true}, + {name: "uint32 zero", in: uint32(0), want: false}, + {name: "uint64 one", in: uint64(1), want: true}, + {name: "string true", in: "true", want: true}, + {name: "string false", in: "false", want: false}, + {name: "string one", in: "1", want: true}, + {name: "string zero", in: "0", want: false}, + {name: "string yes", in: "yes", want: true}, + {name: "string no", in: "no", want: false}, + {name: "string on", in: "on", want: true}, + {name: "trim and case", in: " TRUE ", want: true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := asBool(tc.in); got != tc.want { + t.Fatalf("asBool(%#v) = %v, want %v", tc.in, got, tc.want) + } + }) + } +} + +func TestToUint64(t *testing.T) { + tests := []struct { + name string + in any + want uint64 + }{ + {name: "uint8", in: uint8(8), want: 8}, + {name: "uint16", in: uint16(16), want: 16}, + {name: "uint32", in: uint32(32), want: 32}, + {name: "uint64", in: uint64(64), want: 64}, + {name: "uint", in: uint(7), want: 7}, + {name: "int positive", in: 42, want: 42}, + {name: "int negative", in: -1, want: 0}, + {name: "int64 negative", in: int64(-9), want: 0}, + {name: "float64", in: float64(99.9), want: 99}, + {name: "float64 negative", in: float64(-0.1), want: 0}, + {name: "string number", in: "42", want: 42}, + {name: "string invalid", in: "nope", want: 0}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := toUint64(tc.in); got != tc.want { + t.Fatalf("toUint64(%#v) = %d, want %d", tc.in, got, tc.want) + } + }) + } +} + +func TestParseHelpers(t *testing.T) { + t.Run("parseQuotedName", func(t *testing.T) { + tests := []struct { + name string + rule string + want string + }{ + {name: "extract name", rule: `rule icmp-type name="echo-request" accept`, want: "echo-request"}, + {name: "missing name", rule: `rule icmp-type accept`, want: ""}, + {name: "unterminated quote", rule: `rule icmp-type name="echo-request accept`, want: ""}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := parseQuotedName(tc.rule); got != tc.want { + t.Fatalf("parseQuotedName(%q) = %q, want %q", tc.rule, got, tc.want) + } + }) + } + }) + + t.Run("parsePriority", func(t *testing.T) { + tests := []struct { + name string + in string + want int + }{ + {name: "quoted value", in: `"0" family="ipv4"`, want: 0}, + {name: "plain value", in: `10 family="ipv6"`, want: 10}, + {name: "empty", in: ``, want: -1}, + {name: "invalid", in: `abc family="ipv4"`, want: -1}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := parsePriority(tc.in); got != tc.want { + t.Fatalf("parsePriority(%q) = %d, want %d", tc.in, got, tc.want) + } + }) + } + }) + + t.Run("hasImmutableTag", func(t *testing.T) { + tests := []struct { + name string + in string + want bool + }{ + {name: "has tag", in: "Public (immutable)", want: true}, + {name: "no tag", in: "Public", want: false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := hasImmutableTag(tc.in); got != tc.want { + t.Fatalf("hasImmutableTag(%q) = %v, want %v", tc.in, got, tc.want) + } + }) + } + }) +} + +func TestSleepOrDone(t *testing.T) { + tests := []struct { + name string + cancelNow bool + delay time.Duration + wantErr bool + }{ + {name: "done context returns error", cancelNow: true, delay: time.Millisecond, wantErr: true}, + {name: "sleep completes when context active", cancelNow: false, delay: time.Millisecond, wantErr: false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + if tc.cancelNow { + cancel() + } else { + defer cancel() + } + + err := backoff.Sleep(ctx, tc.delay) + if tc.wantErr { + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } + return + } + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + }) + } +} + +func TestDecodeActiveZones(t *testing.T) { + tests := []struct { + name string + in any + want map[string]map[string]any + }{ + { + name: "godbus concrete type (a{sa{sas}})", + in: map[string]map[string][]string{ + "public": { + "interfaces": {"eth0", "eth1"}, + "sources": {"10.0.0.0/8"}, + }, + "mgmt": { + "interfaces": {"eth2"}, + }, + }, + want: map[string]map[string]any{ + "public": { + "interfaces": []string{"eth0", "eth1"}, + "sources": []string{"10.0.0.0/8"}, + }, + "mgmt": { + "interfaces": []string{"eth2"}, + }, + }, + }, + { + name: "pre-decoded map[string]map[string]any", + in: map[string]map[string]any{ + "home": { + "interfaces": []string{"wlan0"}, + }, + }, + want: map[string]map[string]any{ + "home": { + "interfaces": []string{"wlan0"}, + }, + }, + }, + { + name: "nil input", + in: nil, + want: map[string]map[string]any{}, + }, + { + name: "unsupported type", + in: "garbage", + want: map[string]map[string]any{}, + }, + { + name: "empty map", + in: map[string]map[string][]string{}, + want: map[string]map[string]any{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := decodeActiveZones(tc.in) + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("decodeActiveZones() =\n %v\nwant:\n %v", got, tc.want) + } + }) + } +} + +func TestNextDelay(t *testing.T) { + b := backoff.Default() + tests := []struct { + name string + in time.Duration + want time.Duration + }{ + {name: "doubles normal delay", in: b.Initial, want: b.Initial * 2}, + {name: "caps at max", in: b.Max, want: b.Max}, + {name: "near max also caps", in: b.Max - time.Second, want: b.Max}, + {name: "zero becomes initial", in: 0, want: b.Initial}, + {name: "negative becomes initial", in: -time.Second, want: b.Initial}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := b.Next(tc.in); got != tc.want { + t.Fatalf("Next(%v) = %v, want %v", tc.in, got, tc.want) + } + }) + } +} diff --git a/src/yangerd/internal/ethmonitor/ethmonitor.go b/src/yangerd/internal/ethmonitor/ethmonitor.go new file mode 100644 index 000000000..d62460857 --- /dev/null +++ b/src/yangerd/internal/ethmonitor/ethmonitor.go @@ -0,0 +1,391 @@ +// Package ethmonitor subscribes to ethtool genetlink notifications and +// keeps per-interface ethernet settings updated via a callback. +// +// Data is fetched by shelling out to `ethtool --json ` (matching +// the Python yanger approach) while genetlink provides reactive change +// notifications. +package ethmonitor + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "net" + "strings" + + "github.com/mdlayher/genetlink" + "github.com/mdlayher/netlink" +) + +const ( + ETHTOOL_MSG_LINKINFO_NTF = 28 + ETHTOOL_MSG_LINKMODES_NTF = 29 + + ethtoolFamilyName = "ethtool" + ethtoolMonitorGroupName = "monitor" + + nlaHeaderIfindex = 1 + + ethtoolSpeedUnknown = (1 << 32) - 1 +) + +// CommandRunner executes external commands and returns stdout. +type CommandRunner interface { + Run(ctx context.Context, name string, args ...string) ([]byte, error) +} + +// EthMonitor listens for ethtool genetlink monitor events and updates +// interface ethernet operational state via a callback. +type EthMonitor struct { + conn *genetlink.Conn + family genetlink.Family + groupID uint32 + cmd CommandRunner + ctx context.Context + log *slog.Logger + onUpdate func(ifname string, data json.RawMessage) +} + +// New creates an EthMonitor, resolves the ethtool genetlink family, +// and joins its "monitor" multicast group. +func New(log *slog.Logger, cmd CommandRunner) (*EthMonitor, error) { + conn, err := genetlink.Dial(nil) + if err != nil { + return nil, fmt.Errorf("dial genetlink: %w", err) + } + + family, err := conn.GetFamily(ethtoolFamilyName) + if err != nil { + _ = conn.Close() + return nil, fmt.Errorf("resolve %q genetlink family: %w", ethtoolFamilyName, err) + } + + var groupID uint32 + for _, g := range family.Groups { + if g.Name == ethtoolMonitorGroupName { + groupID = g.ID + break + } + } + if groupID == 0 { + _ = conn.Close() + return nil, fmt.Errorf("multicast group %q not found in family %q", ethtoolMonitorGroupName, ethtoolFamilyName) + } + + if err := conn.JoinGroup(groupID); err != nil { + _ = conn.Close() + return nil, fmt.Errorf("join ethtool monitor group %d: %w", groupID, err) + } + + return &EthMonitor{ + conn: conn, + family: family, + groupID: groupID, + cmd: cmd, + log: log, + }, nil +} + +// SetOnUpdate sets the callback invoked when ethernet data changes. +func (m *EthMonitor) SetOnUpdate(fn func(string, json.RawMessage)) { + m.onUpdate = fn +} + +// Run starts the ethtool genetlink receive loop and updates interface +// ethernet settings when link info or link mode notifications are seen. +func (m *EthMonitor) Run(ctx context.Context) error { + m.ctx = ctx + defer func() { + if err := m.conn.Close(); err != nil { + m.log.Warn("ethmonitor: close genetlink conn", "err", err) + } + }() + + for { + if err := ctx.Err(); err != nil { + return err + } + + msgs, _, err := m.conn.Receive() + if err != nil { + if cerr := ctx.Err(); cerr != nil { + return cerr + } + return fmt.Errorf("receive ethtool genetlink message: %w", err) + } + + for _, msg := range msgs { + switch msg.Header.Command { + case ETHTOOL_MSG_LINKINFO_NTF, ETHTOOL_MSG_LINKMODES_NTF: + ifname, err := extractIfname(msg.Data) + if err != nil { + m.log.Warn("ethmonitor: extract interface name", "err", err) + continue + } + m.refreshEthernetSettings(ifname) + } + } + } +} + +// RefreshInterface refreshes ethernet settings for ifname. It is intended +// to be called by other subsystems (for example nlmonitor RTM_NEWLINK). +func (m *EthMonitor) RefreshInterface(ifname string) { + m.refreshEthernetSettings(ifname) +} + +// ethtoolJSON represents the relevant fields from `ethtool --json `. +type ethtoolJSON struct { + // Speed must be a 64-bit type: ethtool reports unknown speed as + // 0xFFFFFFFF, which overflows int on 32-bit targets (arm). + Speed int64 `json:"speed"` + Duplex string `json:"duplex"` + Port string `json:"port"` + AutoNegotiation bool `json:"auto-negotiation"` + SupportedLinkModes []string `json:"supported-link-modes"` + AdvertisedLinkModes []string `json:"advertised-link-modes"` +} + +func (m *EthMonitor) refreshEthernetSettings(ifname string) { + ctx := m.ctx + if ctx == nil { + ctx = context.Background() + } + + out, err := m.cmd.Run(ctx, "ethtool", "--json", ifname) + if err != nil { + m.log.Warn("ethmonitor: run ethtool", "ifname", ifname, "err", err) + return + } + + var results []ethtoolJSON + if err := json.Unmarshal(out, &results); err != nil { + m.log.Warn("ethmonitor: parse ethtool json", "ifname", ifname, "err", err) + return + } + if len(results) == 0 { + return + } + + data := results[0] + eth, speedBPS := buildEthernetContainer(data) + + // Marshal the result; include interface-level speed as a special key + // that mergeAugments will lift onto the interface object. + result := map[string]any{"ethernet": eth} + if speedBPS > 0 { + result["speed"] = fmt.Sprintf("%d", speedBPS) + } + + raw, err := json.Marshal(result) + if err != nil { + m.log.Warn("ethmonitor: marshal ethernet settings", "ifname", ifname, "err", err) + return + } + + if m.onUpdate != nil { + m.onUpdate(ifname, json.RawMessage(raw)) + } +} + +// buildEthernetContainer builds the ieee802-ethernet-interface:ethernet +// container and returns (container, interface speed in bits/s or 0). +func buildEthernetContainer(data ethtoolJSON) (map[string]any, int64) { + autoneg := map[string]any{"enable": data.AutoNegotiation} + eth := map[string]any{"auto-negotiation": autoneg} + + duplex := strings.ToLower(data.Duplex) + if duplex == "full" || duplex == "half" { + eth["duplex"] = duplex + } + + // Supported PMD types (config-false leaf-list). + supported := ethtoolModesToPMD(data.SupportedLinkModes) + if len(supported) > 0 { + eth["infix-ethernet-interface:supported-pmd-types"] = supported + } + + // Advertised PMD types — suppress when identical to supported (default). + advertised := ethtoolModesToPMD(data.AdvertisedLinkModes) + if len(advertised) > 0 && !stringSliceEqual(advertised, supported) { + autoneg["infix-ethernet-interface:advertised-pmd-types"] = advertised + } + + // Speed, phy-type, pmd-type. + var speedBPS int64 + speedMbps := data.Speed + if speedMbps > 0 && speedMbps < ethtoolSpeedUnknown { + speedBPS = int64(speedMbps) * 1_000_000 + + // Speed inside the ethernet container (decimal64, Gb/s). + eth["speed"] = fmt.Sprintf("%.3f", float64(speedMbps)/1000.0) + + key := linkModeKey{Port: data.Port, SpeedMbps: speedMbps, Duplex: duplex} + if mapping, ok := linkModes[key]; ok { + eth["phy-type"] = "ieee802-ethernet-phy-type:phy-type-" + mapping.PhyType + if mapping.PMDType != "" { + eth["pmd-type"] = "ieee802-ethernet-phy-type:pmd-type-" + mapping.PMDType + } + } + + // Refine pmd-type when exactly one supported mode (specific SFP). + if len(supported) == 1 { + eth["pmd-type"] = supported[0] + } + } + + return eth, speedBPS +} + +func extractIfname(data []byte) (string, error) { + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return "", fmt.Errorf("new decoder: %w", err) + } + + for ad.Next() { + nested, err := netlink.NewAttributeDecoder(ad.Bytes()) + if err != nil { + continue + } + + for nested.Next() { + if nested.Type() != nlaHeaderIfindex { + continue + } + + ifindex := int(nested.Uint32()) + iface, err := net.InterfaceByIndex(ifindex) + if err != nil { + return "", fmt.Errorf("lookup interface index %d: %w", ifindex, err) + } + return iface.Name, nil + } + if err := nested.Err(); err != nil { + return "", fmt.Errorf("decode nested attrs: %w", err) + } + } + + if err := ad.Err(); err != nil { + return "", fmt.Errorf("decode attrs: %w", err) + } + + return "", fmt.Errorf("header ifindex attribute not found") +} + +// linkModeKey is the lookup key for phy-type/pmd-type mapping. +type linkModeKey struct { + Port string + SpeedMbps int64 + Duplex string +} + +// linkModeMapping holds the IEEE identity suffixes. +type linkModeMapping struct { + PhyType string + PMDType string // empty means "cannot determine from this tuple alone" +} + +// linkModes maps (port, speed, duplex) → (phy-type, pmd-type) per +// IEEE Std 802.3.2-2025 (ieee802-ethernet-phy-type). +var linkModes = map[linkModeKey]linkModeMapping{ + {"Twisted Pair", 10, "full"}: {"10BASE-T", "10BASE-T"}, + {"Twisted Pair", 10, "half"}: {"10BASE-T", "10BASE-T"}, + {"Twisted Pair", 100, "full"}: {"100BASE-X", "100BASE-TX"}, + {"Twisted Pair", 100, "half"}: {"100BASE-X", "100BASE-TX"}, + {"Twisted Pair", 1000, "full"}: {"1000BASE-T", "1000BASE-T"}, + {"Twisted Pair", 1000, "half"}: {"1000BASE-T", "1000BASE-T"}, + {"Twisted Pair", 2500, "full"}: {"2.5GBASE-T", "2.5GBASE-T"}, + {"Twisted Pair", 5000, "full"}: {"5GBASE-T", "5GBASE-T"}, + {"Twisted Pair", 10000, "full"}: {"10GBASE-T", "10GBASE-T"}, + {"Twisted Pair", 25000, "full"}: {"25GBASE-T", "25GBASE-T"}, + {"Twisted Pair", 40000, "full"}: {"40GBASE-T", "40GBASE-T"}, + {"MII", 10, "full"}: {"10BASE-T", "10BASE-T"}, + {"MII", 10, "half"}: {"10BASE-T", "10BASE-T"}, + {"MII", 100, "full"}: {"100BASE-X", "100BASE-TX"}, + {"MII", 100, "half"}: {"100BASE-X", "100BASE-TX"}, + {"FIBRE", 100, "full"}: {"100BASE-X", ""}, + {"FIBRE", 1000, "full"}: {"1000BASE-X", ""}, + {"FIBRE", 10000, "full"}: {"10GBASE-R", ""}, + {"FIBRE", 25000, "full"}: {"25GBASE-R", ""}, + {"FIBRE", 40000, "full"}: {"40GBASE-R", ""}, + {"FIBRE", 100000, "full"}: {"100GBASE-R", ""}, + {"Direct Attach Copper", 10000, "full"}: {"10GBASE-R", ""}, + {"Direct Attach Copper", 25000, "full"}: {"25GBASE-R", "25GBASE-CR"}, + {"Direct Attach Copper", 40000, "full"}: {"40GBASE-R", "40GBASE-CR4"}, + {"Direct Attach Copper", 100000, "full"}: {"100GBASE-R", "100GBASE-CR4"}, +} + +// ethtoolToPMD maps kernel link-mode base names to IEEE pmd-type +// identity suffixes. The kernel reports modes like "1000baseT/Full"; +// we strip the "/Full" or "/Half" suffix before lookup. +var ethtoolToPMD = map[string]string{ + "10baseT": "10BASE-T", + "10baseT1L": "10BASE-T1L", + "100baseT": "100BASE-TX", + "100baseT1": "100BASE-T1", + "100baseFX": "100BASE-FX", + "1000baseT": "1000BASE-T", + "1000baseT1": "1000BASE-T1", + "1000baseX": "1000BASE-LX", + "1000baseKX": "1000BASE-KX", + "2500baseT": "2.5GBASE-T", + "2500baseX": "2.5GBASE-X", + "5000baseT": "5GBASE-T", + "10000baseT": "10GBASE-T", + "10000baseSR": "10GBASE-SR", + "10000baseLR": "10GBASE-LR", + "10000baseLRM": "10GBASE-LRM", + "10000baseER": "10GBASE-ER", + "10000baseKR": "10GBASE-KR", + "10000baseKX4": "10GBASE-KX4", + "25000baseCR": "25GBASE-CR", + "25000baseSR": "25GBASE-SR", + "25000baseKR": "25GBASE-KR", + "40000baseCR4": "40GBASE-CR4", + "40000baseSR4": "40GBASE-SR4", + "40000baseLR4": "40GBASE-LR4", + "40000baseKR4": "40GBASE-KR4", + "100000baseCR4": "100GBASE-CR4", + "100000baseSR4": "100GBASE-SR4", + "100000baseLR4_ER4": "100GBASE-LR4", + "100000baseKR4": "100GBASE-KR4", +} + +// ethtoolModesToPMD translates a list of ethtool link-mode strings +// (e.g. "1000baseT/Full") into deduped, order-preserving PMD identity +// strings. +func ethtoolModesToPMD(modes []string) []string { + seen := make(map[string]bool) + var out []string + for _, entry := range modes { + base := entry + if idx := strings.IndexByte(entry, '/'); idx >= 0 { + base = entry[:idx] + } + pmd, ok := ethtoolToPMD[base] + if !ok || seen[pmd] { + continue + } + seen[pmd] = true + out = append(out, "ieee802-ethernet-phy-type:pmd-type-"+pmd) + } + return out +} + +func stringSliceEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + set := make(map[string]bool, len(a)) + for _, s := range a { + set[s] = true + } + for _, s := range b { + if !set[s] { + return false + } + } + return true +} diff --git a/src/yangerd/internal/ethmonitor/ethmonitor_test.go b/src/yangerd/internal/ethmonitor/ethmonitor_test.go new file mode 100644 index 000000000..2accb1ca9 --- /dev/null +++ b/src/yangerd/internal/ethmonitor/ethmonitor_test.go @@ -0,0 +1,138 @@ +package ethmonitor + +import ( + "testing" +) + +func TestBuildEthernetContainerCopper1G(t *testing.T) { + data := ethtoolJSON{ + Speed: 1000, + Duplex: "Full", + Port: "Twisted Pair", + AutoNegotiation: true, + SupportedLinkModes: []string{ + "10baseT/Half", "10baseT/Full", + "100baseT/Half", "100baseT/Full", + "1000baseT/Full", + }, + AdvertisedLinkModes: []string{ + "10baseT/Half", "10baseT/Full", + "100baseT/Half", "100baseT/Full", + "1000baseT/Full", + }, + } + + eth, speedBPS := buildEthernetContainer(data) + + if speedBPS != 1_000_000_000 { + t.Fatalf("speed = %d, want 1000000000", speedBPS) + } + if eth["phy-type"] != "ieee802-ethernet-phy-type:phy-type-1000BASE-T" { + t.Fatalf("phy-type = %v", eth["phy-type"]) + } + if eth["pmd-type"] != "ieee802-ethernet-phy-type:pmd-type-1000BASE-T" { + t.Fatalf("pmd-type = %v", eth["pmd-type"]) + } + if eth["duplex"] != "full" { + t.Fatalf("duplex = %v", eth["duplex"]) + } + autoneg := eth["auto-negotiation"].(map[string]any) + if autoneg["enable"] != true { + t.Fatal("autoneg should be true") + } + // advertised == supported → no advertised-pmd-types key + if _, ok := autoneg["infix-ethernet-interface:advertised-pmd-types"]; ok { + t.Fatal("advertised-pmd-types should be suppressed when equal to supported") + } +} + +func TestBuildEthernetContainerFibre10G(t *testing.T) { + data := ethtoolJSON{ + Speed: 10000, + Duplex: "Full", + Port: "FIBRE", + AutoNegotiation: false, + SupportedLinkModes: []string{"10000baseSR/Full"}, + AdvertisedLinkModes: []string{"10000baseSR/Full"}, + } + + eth, speedBPS := buildEthernetContainer(data) + + if speedBPS != 10_000_000_000 { + t.Fatalf("speed = %d, want 10000000000", speedBPS) + } + // Fibre 10G → phy-type 10GBASE-R, no pmd-type from lookup table + // But exactly one supported mode → pmd-type refined from supported list + if eth["pmd-type"] != "ieee802-ethernet-phy-type:pmd-type-10GBASE-SR" { + t.Fatalf("pmd-type = %v, want refined from single supported mode", eth["pmd-type"]) + } + if eth["phy-type"] != "ieee802-ethernet-phy-type:phy-type-10GBASE-R" { + t.Fatalf("phy-type = %v", eth["phy-type"]) + } +} + +func TestBuildEthernetContainerSpeedUnknown(t *testing.T) { + data := ethtoolJSON{ + Speed: ethtoolSpeedUnknown, + Duplex: "Unknown! (255)", + Port: "Twisted Pair", + AutoNegotiation: true, + } + + eth, speedBPS := buildEthernetContainer(data) + + if speedBPS != 0 { + t.Fatalf("speed = %d, want 0 for unknown", speedBPS) + } + if _, ok := eth["speed"]; ok { + t.Fatal("speed should not be set when unknown") + } + if _, ok := eth["phy-type"]; ok { + t.Fatal("phy-type should not be set when speed unknown") + } +} + +func TestBuildEthernetContainerAdvertisedDiffers(t *testing.T) { + data := ethtoolJSON{ + Speed: 1000, + Duplex: "Full", + Port: "Twisted Pair", + SupportedLinkModes: []string{ + "10baseT/Full", "100baseT/Full", "1000baseT/Full", + }, + AdvertisedLinkModes: []string{"1000baseT/Full"}, + } + + eth, _ := buildEthernetContainer(data) + + autoneg := eth["auto-negotiation"].(map[string]any) + adv, ok := autoneg["infix-ethernet-interface:advertised-pmd-types"] + if !ok { + t.Fatal("advertised-pmd-types should be present when != supported") + } + advList := adv.([]string) + if len(advList) != 1 || advList[0] != "ieee802-ethernet-phy-type:pmd-type-1000BASE-T" { + t.Fatalf("advertised = %v", advList) + } +} + +func TestEthtoolModesToPMD(t *testing.T) { + modes := []string{ + "10baseT/Half", "10baseT/Full", + "1000baseT/Full", + "Autoneg", "TP", + } + got := ethtoolModesToPMD(modes) + want := []string{ + "ieee802-ethernet-phy-type:pmd-type-10BASE-T", + "ieee802-ethernet-phy-type:pmd-type-1000BASE-T", + } + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("got[%d] = %q, want %q", i, got[i], want[i]) + } + } +} diff --git a/src/yangerd/internal/frrvty/frrvty.go b/src/yangerd/internal/frrvty/frrvty.go new file mode 100644 index 000000000..4e228b6f9 --- /dev/null +++ b/src/yangerd/internal/frrvty/frrvty.go @@ -0,0 +1,83 @@ +// Package frrvty is a minimal in-process client for an FRR daemon's vty +// Unix socket. +// +// It speaks the same protocol vtysh uses, so yangerd can run "show ..." +// commands (e.g. "show ip route json") against zebra without forking +// vtysh. The command is written NUL-terminated; the daemon streams the +// command output followed by a four-byte trailer of three NUL bytes and a +// one-byte CLI return code (\0\0\0). +package frrvty + +import ( + "bytes" + "context" + "fmt" + "io" + "net" +) + +// ZebraVtySocket is the default path to zebra's vty socket. It lives in +// the same runstatedir as the zserv API socket. +const ZebraVtySocket = "/var/run/frr/zebra.vty" + +// Client runs commands against a single FRR daemon vty socket. A fresh +// connection is opened per query, matching vtysh's behaviour. +type Client struct { + socket string +} + +// New returns a Client for the given vty socket path. An empty path +// selects the zebra socket. +func New(socket string) *Client { + if socket == "" { + socket = ZebraVtySocket + } + return &Client{socket: socket} +} + +// Query connects to the vty socket, runs one command, and returns its raw +// output with the protocol trailer stripped. A non-zero CLI return code +// is reported as an error (the partial output is still returned). +func (c *Client) Query(ctx context.Context, command string) ([]byte, error) { + var d net.Dialer + conn, err := d.DialContext(ctx, "unix", c.socket) + if err != nil { + return nil, fmt.Errorf("dial %s: %w", c.socket, err) + } + defer conn.Close() + + if deadline, ok := ctx.Deadline(); ok { + _ = conn.SetDeadline(deadline) + } + + // vtysh writes the command including its trailing NUL terminator. + if _, err := conn.Write(append([]byte(command), 0)); err != nil { + return nil, fmt.Errorf("write %q: %w", command, err) + } + + var buf bytes.Buffer + tmp := make([]byte, 4096) + for { + n, rerr := conn.Read(tmp) + if n > 0 { + buf.Write(tmp[:n]) + // The response ends with \0\0\0. The payload is + // text/JSON and never contains NUL, so testing the last + // four accumulated bytes is unambiguous. + if b := buf.Bytes(); len(b) >= 4 && + b[len(b)-4] == 0 && b[len(b)-3] == 0 && b[len(b)-2] == 0 { + payload := b[:len(b)-4] + if ret := b[len(b)-1]; ret != 0 { + return payload, fmt.Errorf("vty command %q: status %d", command, ret) + } + return payload, nil + } + } + if rerr != nil { + if rerr == io.EOF { + return nil, fmt.Errorf("vty command %q: closed before trailer", command) + } + return nil, fmt.Errorf("vty command %q: read: %w", command, rerr) + } + } +} diff --git a/src/yangerd/internal/frrvty/frrvty_test.go b/src/yangerd/internal/frrvty/frrvty_test.go new file mode 100644 index 000000000..2194e19bf --- /dev/null +++ b/src/yangerd/internal/frrvty/frrvty_test.go @@ -0,0 +1,98 @@ +package frrvty + +import ( + "context" + "net" + "path/filepath" + "sync" + "testing" + "time" +) + +// fakeZebra serves a single vty connection: it reads the NUL-terminated +// command, then writes the configured reply followed by the \0\0\0 +// trailer. +func fakeZebra(t *testing.T, reply string, ret byte) string { + t.Helper() + + sock := filepath.Join(t.TempDir(), "zebra.vty") + ln, err := net.Listen("unix", sock) + if err != nil { + t.Fatalf("listen: %v", err) + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + conn, err := ln.Accept() + if err != nil { + return + } + defer conn.Close() + + // Read the NUL-terminated command. + buf := make([]byte, 256) + for { + n, err := conn.Read(buf) + if n > 0 && buf[n-1] == 0 { + break + } + if err != nil { + return + } + } + + out := append([]byte(reply), 0, 0, 0, ret) + _, _ = conn.Write(out) + }() + + t.Cleanup(func() { + ln.Close() + wg.Wait() + }) + return sock +} + +func TestQueryStripsTrailer(t *testing.T) { + sock := fakeZebra(t, `{"a":1}`, 0) + c := New(sock) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + out, err := c.Query(ctx, "show ip route json") + if err != nil { + t.Fatalf("Query: %v", err) + } + if string(out) != `{"a":1}` { + t.Errorf("output = %q, want %q", out, `{"a":1}`) + } +} + +func TestQueryNonZeroStatus(t *testing.T) { + sock := fakeZebra(t, "Unknown command", 1) + c := New(sock) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + out, err := c.Query(ctx, "bogus") + if err == nil { + t.Fatal("expected error for non-zero status") + } + if string(out) != "Unknown command" { + t.Errorf("partial output = %q, want %q", out, "Unknown command") + } +} + +func TestQueryDialError(t *testing.T) { + c := New(filepath.Join(t.TempDir(), "does-not-exist.vty")) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + if _, err := c.Query(ctx, "show ip route json"); err == nil { + t.Fatal("expected dial error") + } +} diff --git a/src/yangerd/internal/fswatcher/fswatcher.go b/src/yangerd/internal/fswatcher/fswatcher.go new file mode 100644 index 000000000..f70c17894 --- /dev/null +++ b/src/yangerd/internal/fswatcher/fswatcher.go @@ -0,0 +1,260 @@ +// Package fswatcher provides inotify-based reactive monitoring of +// filesystem paths. It replaces polling for procfs files that support +// inotify (e.g. IP forwarding flags). Each watched path has a +// handler that reads the file and updates the tree, with per-path +// debouncing to coalesce burst writes. +package fswatcher + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "path/filepath" + "sync" + "time" + + "github.com/fsnotify/fsnotify" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// WatchHandler defines the callback for a watched path. +type WatchHandler struct { + TreeKey string + ReadFunc func(path string) (json.RawMessage, error) + Debounce time.Duration + // UseMerge causes the watcher to call tree.Merge instead of + // tree.Set, performing a shallow first-level JSON merge into + // the existing blob at TreeKey. + UseMerge bool +} + +// FSWatcher monitors filesystem paths via inotify and updates the +// tree when files change. +type FSWatcher struct { + watcher *fsnotify.Watcher + tree *tree.Tree + handlers map[string]WatchHandler + dirHandlers map[string]WatchHandler // directory path → handler + debounce map[string]*time.Timer + mu sync.Mutex + log *slog.Logger +} + +// New creates an FSWatcher backed by an inotify instance. +func New(t *tree.Tree, log *slog.Logger) (*FSWatcher, error) { + w, err := fsnotify.NewWatcher() + if err != nil { + return nil, fmt.Errorf("fsnotify: %w", err) + } + return &FSWatcher{ + watcher: w, + tree: t, + handlers: make(map[string]WatchHandler), + dirHandlers: make(map[string]WatchHandler), + debounce: make(map[string]*time.Timer), + log: log, + }, nil +} + +// Watch registers a handler for a specific filesystem path and adds +// the inotify watch. +func (fw *FSWatcher) Watch(path string, handler WatchHandler) error { + fw.mu.Lock() + fw.handlers[path] = handler + fw.mu.Unlock() + return fw.watcher.Add(path) +} + +// WatchGlob expands a glob pattern and registers a handler for each +// matching path. Returns the number of paths matched. +func (fw *FSWatcher) WatchGlob(pattern string, handler WatchHandler) (int, error) { + matches, err := filepath.Glob(pattern) + if err != nil { + return 0, fmt.Errorf("glob %s: %w", pattern, err) + } + for _, path := range matches { + if err := fw.Watch(path, handler); err != nil { + fw.log.Warn("fswatcher: watch failed, skipping", "path", path, "err", err) + } + } + return len(matches), nil +} + +// WatchSymlink registers a handler for a symlink by watching its parent +// directory. fsnotify follows symlinks to the target inode, so replacing +// a symlink (ln -sf) would not trigger events on a direct watch. Watching +// the parent directory catches Create and Rename events for the symlink +// entry itself. +func (fw *FSWatcher) WatchSymlink(path string, handler WatchHandler) error { + dir := filepath.Dir(path) + fw.mu.Lock() + fw.handlers[path] = handler + fw.mu.Unlock() + return fw.watcher.Add(dir) +} + +// WatchDir registers a handler for an entire directory. Any file +// create/write/remove event inside the directory triggers the handler +// with the directory path. The handler's ReadFunc receives the directory +// path (not the individual file), so it can rescan all contents. +func (fw *FSWatcher) WatchDir(dir string, handler WatchHandler) error { + fw.mu.Lock() + fw.dirHandlers[dir] = handler + fw.mu.Unlock() + return fw.watcher.Add(dir) +} + +// InitialRead reads the current value of every watched file and +// populates the tree. Called once after all Watch() calls and glob +// expansion, before Run(). +func (fw *FSWatcher) InitialRead() { + fw.mu.Lock() + defer fw.mu.Unlock() + for path, handler := range fw.handlers { + data, err := handler.ReadFunc(path) + if err != nil { + fw.log.Warn("fswatcher: initial read failed", "path", path, "err", err) + continue + } + fw.apply(handler, data) + fw.log.Debug("fswatcher: initial read", "path", path, "key", handler.TreeKey) + } + for dir, handler := range fw.dirHandlers { + data, err := handler.ReadFunc(dir) + if err != nil { + fw.log.Warn("fswatcher: initial read failed", "path", dir, "err", err) + continue + } + fw.apply(handler, data) + fw.log.Debug("fswatcher: initial read", "path", dir, "key", handler.TreeKey) + } +} + +// Run processes inotify events until ctx is cancelled. +func (fw *FSWatcher) Run(ctx context.Context) error { + defer fw.watcher.Close() + for { + select { + case <-ctx.Done(): + return ctx.Err() + case event, ok := <-fw.watcher.Events: + if !ok { + return fmt.Errorf("watcher closed") + } + if event.Has(fsnotify.Write) || event.Has(fsnotify.Create) { + fw.handleEvent(event.Name) + } + if event.Has(fsnotify.Remove) { + fw.handleRemove(event.Name) + } + case err, ok := <-fw.watcher.Errors: + if !ok { + return fmt.Errorf("watcher error channel closed") + } + fw.log.Warn("fsnotify error", "err", err) + } + } +} + +// Close shuts down the inotify watcher and cancels pending timers. +func (fw *FSWatcher) Close() { + fw.mu.Lock() + defer fw.mu.Unlock() + for _, timer := range fw.debounce { + timer.Stop() + } + fw.watcher.Close() +} + +func (fw *FSWatcher) handleEvent(path string) { + fw.mu.Lock() + handler, ok := fw.handlers[path] + handlerPath := path + if !ok { + dir := filepath.Dir(path) + handler, ok = fw.dirHandlers[dir] + handlerPath = dir + if !ok { + fw.mu.Unlock() + return + } + } + + if handler.Debounce > 0 { + if timer, exists := fw.debounce[handlerPath]; exists { + timer.Reset(handler.Debounce) + fw.mu.Unlock() + return + } + fw.debounce[handlerPath] = time.AfterFunc(handler.Debounce, func() { + fw.fireHandler(handlerPath, handler) + }) + fw.mu.Unlock() + return + } + fw.mu.Unlock() + fw.fireHandler(handlerPath, handler) +} + +func (fw *FSWatcher) handleRemove(path string) { + fw.mu.Lock() + handler, ok := fw.handlers[path] + if !ok { + dir := filepath.Dir(path) + handler, ok = fw.dirHandlers[dir] + if ok { + fw.mu.Unlock() + fw.fireHandler(dir, handler) + return + } + fw.mu.Unlock() + return + } + fw.mu.Unlock() + + if handler.UseMerge { + fw.fireHandler(path, handler) + } else { + fw.tree.Delete(handler.TreeKey) + fw.log.Debug("fswatcher: removed", "path", path, "key", handler.TreeKey) + } + + if err := fw.watcher.Add(path); err != nil { + fw.mu.Lock() + delete(fw.handlers, path) + if timer, exists := fw.debounce[path]; exists { + timer.Stop() + delete(fw.debounce, path) + } + fw.mu.Unlock() + fw.log.Debug("fswatcher: file gone, handler removed", "path", path) + } +} + +func (fw *FSWatcher) fireHandler(path string, handler WatchHandler) { + data, err := handler.ReadFunc(path) + if err != nil { + fw.log.Warn("fswatcher: read failed", "path", path, "err", err) + return + } + fw.apply(handler, data) + fw.log.Debug("fswatcher: updated", "path", path, "key", handler.TreeKey) +} + +// apply writes a ReadFunc result into the tree. For merge handlers it +// merges; for plain handlers an empty result deletes the key rather than +// writing an empty node -- a collector that has "nothing" to report (e.g. +// the containers feature is enabled but no container is running) must not +// leave a bare subtree behind, or clients would see operational data where +// the feature is effectively absent. +func (fw *FSWatcher) apply(handler WatchHandler, data json.RawMessage) { + switch { + case handler.UseMerge: + fw.tree.Merge(handler.TreeKey, data) + case len(data) == 0: + fw.tree.Delete(handler.TreeKey) + default: + fw.tree.Set(handler.TreeKey, data) + } +} diff --git a/src/yangerd/internal/fswatcher/fswatcher_test.go b/src/yangerd/internal/fswatcher/fswatcher_test.go new file mode 100644 index 000000000..0c8cc917a --- /dev/null +++ b/src/yangerd/internal/fswatcher/fswatcher_test.go @@ -0,0 +1,682 @@ +package fswatcher + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +func newTestFSWatcher(t *testing.T) (*FSWatcher, *tree.Tree) { + t.Helper() + tr := tree.New() + fw, err := New(tr, slog.Default()) + if err != nil { + t.Fatalf("New: %v", err) + } + t.Cleanup(func() { fw.Close() }) + return fw, tr +} + +func TestNew(t *testing.T) { + tr := tree.New() + fw, err := New(tr, slog.Default()) + if err != nil { + t.Fatalf("New: %v", err) + } + defer fw.Close() + + if fw.tree != tr { + t.Error("tree not stored") + } + if fw.handlers == nil { + t.Error("handlers map nil") + } + if fw.debounce == nil { + t.Error("debounce map nil") + } +} + +func TestWatch(t *testing.T) { + fw, _ := newTestFSWatcher(t) + + tmp := t.TempDir() + path := filepath.Join(tmp, "test.txt") + if err := os.WriteFile(path, []byte("hello"), 0644); err != nil { + t.Fatal(err) + } + + handler := WatchHandler{ + TreeKey: "test/key", + ReadFunc: func(p string) (json.RawMessage, error) { return json.RawMessage(`"ok"`), nil }, + } + + if err := fw.Watch(path, handler); err != nil { + t.Fatalf("Watch: %v", err) + } + + fw.mu.Lock() + _, ok := fw.handlers[path] + fw.mu.Unlock() + if !ok { + t.Error("handler not registered") + } +} + +func TestInitialRead(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + p1 := filepath.Join(tmp, "a.txt") + p2 := filepath.Join(tmp, "b.txt") + os.WriteFile(p1, []byte("1"), 0644) + os.WriteFile(p2, []byte("2"), 0644) + + fw.Watch(p1, WatchHandler{ + TreeKey: "key/a", + ReadFunc: func(path string) (json.RawMessage, error) { + return json.RawMessage(`"value-a"`), nil + }, + }) + fw.Watch(p2, WatchHandler{ + TreeKey: "key/b", + ReadFunc: func(path string) (json.RawMessage, error) { + return json.RawMessage(`"value-b"`), nil + }, + }) + + fw.InitialRead() + + if got := tr.Get("key/a"); string(got) != `"value-a"` { + t.Errorf("key/a = %s, want %q", got, `"value-a"`) + } + if got := tr.Get("key/b"); string(got) != `"value-b"` { + t.Errorf("key/b = %s, want %q", got, `"value-b"`) + } +} + +func TestInitialReadError(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + p := filepath.Join(tmp, "fail.txt") + os.WriteFile(p, []byte("x"), 0644) + + fw.Watch(p, WatchHandler{ + TreeKey: "key/fail", + ReadFunc: func(path string) (json.RawMessage, error) { + return nil, fmt.Errorf("read error") + }, + }) + + fw.InitialRead() + + if got := tr.Get("key/fail"); got != nil { + t.Errorf("expected nil for failed read, got %s", got) + } +} + +func TestWatchGlob(t *testing.T) { + fw, _ := newTestFSWatcher(t) + + tmp := t.TempDir() + for _, name := range []string{"x1.conf", "x2.conf", "x3.conf"} { + os.WriteFile(filepath.Join(tmp, name), []byte("data"), 0644) + } + os.WriteFile(filepath.Join(tmp, "y.txt"), []byte("data"), 0644) + + handler := WatchHandler{ + TreeKey: "glob/test", + ReadFunc: func(p string) (json.RawMessage, error) { return json.RawMessage(`"g"`), nil }, + } + + n, err := fw.WatchGlob(filepath.Join(tmp, "x*.conf"), handler) + if err != nil { + t.Fatalf("WatchGlob: %v", err) + } + if n != 3 { + t.Errorf("WatchGlob matched %d, want 3", n) + } + + fw.mu.Lock() + count := len(fw.handlers) + fw.mu.Unlock() + if count != 3 { + t.Errorf("handlers count = %d, want 3", count) + } +} + +func TestRunWriteEvent(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + path := filepath.Join(tmp, "watched.txt") + os.WriteFile(path, []byte("initial"), 0644) + + callCount := 0 + fw.Watch(path, WatchHandler{ + TreeKey: "run/test", + ReadFunc: func(p string) (json.RawMessage, error) { + callCount++ + return json.RawMessage(`"updated"`), nil + }, + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + errCh := make(chan error, 1) + go func() { errCh <- fw.Run(ctx) }() + + time.Sleep(50 * time.Millisecond) + + os.WriteFile(path, []byte("changed"), 0644) + + deadline := time.After(2 * time.Second) + for { + if got := tr.Get("run/test"); string(got) == `"updated"` { + break + } + select { + case <-deadline: + t.Fatal("timed out waiting for tree update after write event") + default: + time.Sleep(10 * time.Millisecond) + } + } + + cancel() + err := <-errCh + if err != nil && err != context.Canceled { + t.Errorf("Run returned unexpected error: %v", err) + } +} + +func TestFireHandler(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + handler := WatchHandler{ + TreeKey: "fire/test", + ReadFunc: func(path string) (json.RawMessage, error) { + return json.RawMessage(`{"fired":true}`), nil + }, + } + + fw.fireHandler("/fake/path", handler) + + if got := tr.Get("fire/test"); string(got) != `{"fired":true}` { + t.Errorf("tree value = %s, want %s", got, `{"fired":true}`) + } +} + +func TestFireHandlerReadError(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + handler := WatchHandler{ + TreeKey: "fire/err", + ReadFunc: func(path string) (json.RawMessage, error) { + return nil, fmt.Errorf("broken") + }, + } + + fw.fireHandler("/fake/path", handler) + + if got := tr.Get("fire/err"); got != nil { + t.Errorf("expected nil for errored handler, got %s", got) + } +} + +// A plain (non-merge) handler that returns an empty result must delete its +// key, not leave a stale or empty node behind -- e.g. the containers +// collector returns nil when no container is running. +func TestFireHandlerEmptyDeletes(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tr.Set("fire/gone", json.RawMessage(`{"container":[{"name":"old"}]}`)) + + empty := json.RawMessage(nil) + handler := WatchHandler{ + TreeKey: "fire/gone", + ReadFunc: func(string) (json.RawMessage, error) { return empty, nil }, + } + + fw.fireHandler("/fake/path", handler) + + if got := tr.Get("fire/gone"); got != nil { + t.Errorf("expected key deleted on empty result, got %s", got) + } +} + +func TestDebounce(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + path := filepath.Join(tmp, "debounce.txt") + os.WriteFile(path, []byte("init"), 0644) + + callCount := 0 + fw.Watch(path, WatchHandler{ + TreeKey: "debounce/test", + Debounce: 100 * time.Millisecond, + ReadFunc: func(p string) (json.RawMessage, error) { + callCount++ + return json.RawMessage(fmt.Sprintf(`"call-%d"`, callCount)), nil + }, + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go fw.Run(ctx) + time.Sleep(50 * time.Millisecond) + + for i := 0; i < 5; i++ { + os.WriteFile(path, []byte(fmt.Sprintf("data-%d", i)), 0644) + time.Sleep(10 * time.Millisecond) + } + + time.Sleep(300 * time.Millisecond) + + got := tr.Get("debounce/test") + if got == nil { + t.Fatal("tree not updated after debounced writes") + } + + if callCount > 3 { + t.Errorf("expected debounce to coalesce writes, but handler called %d times", callCount) + } + + cancel() +} + +func TestRunContextCancellation(t *testing.T) { + fw, _ := newTestFSWatcher(t) + + ctx, cancel := context.WithCancel(context.Background()) + + errCh := make(chan error, 1) + go func() { errCh <- fw.Run(ctx) }() + + time.Sleep(20 * time.Millisecond) + cancel() + + err := <-errCh + if err != context.Canceled { + t.Errorf("Run error = %v, want context.Canceled", err) + } +} + +func TestClose(t *testing.T) { + tr := tree.New() + fw, err := New(tr, slog.Default()) + if err != nil { + t.Fatal(err) + } + + tmp := t.TempDir() + path := filepath.Join(tmp, "close.txt") + os.WriteFile(path, []byte("x"), 0644) + + fw.Watch(path, WatchHandler{ + TreeKey: "close/test", + Debounce: time.Second, + ReadFunc: func(p string) (json.RawMessage, error) { return json.RawMessage(`"x"`), nil }, + }) + + fw.handleEvent(path) + + fw.mu.Lock() + timerCount := len(fw.debounce) + fw.mu.Unlock() + if timerCount != 1 { + t.Errorf("expected 1 debounce timer, got %d", timerCount) + } + + fw.Close() +} + +func TestHandleRemoveMergeHandler(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + path := filepath.Join(tmp, "forwarding") + os.WriteFile(path, []byte("1"), 0644) + + fw.Watch(path, WatchHandler{ + TreeKey: "routing", + ReadFunc: func(_ string) (json.RawMessage, error) { + return json.RawMessage(`{"interfaces":{"interface":["eth0"]}}`), nil + }, + UseMerge: true, + }) + + fw.InitialRead() + + got := tr.Get("routing") + if got == nil { + t.Fatal("tree not populated after InitialRead") + } + + os.Remove(path) + fw.handleRemove(path) + + got = tr.Get("routing") + if got == nil { + t.Fatal("tree entry should still exist after merge-remove") + } + if string(got) != `{"interfaces":{"interface":["eth0"]}}` { + t.Errorf("got %s, want updated merge data", got) + } + + fw.mu.Lock() + _, handlerExists := fw.handlers[path] + fw.mu.Unlock() + if handlerExists { + t.Error("handler should be cleaned up after permanent removal") + } +} + +func TestHandleRemovePlainHandler(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + path := filepath.Join(tmp, "value.txt") + os.WriteFile(path, []byte("data"), 0644) + + fw.Watch(path, WatchHandler{ + TreeKey: "plain/key", + ReadFunc: func(p string) (json.RawMessage, error) { + return json.RawMessage(`"hello"`), nil + }, + }) + + fw.InitialRead() + + if got := tr.Get("plain/key"); string(got) != `"hello"` { + t.Fatalf("initial = %s, want %q", got, `"hello"`) + } + + os.Remove(path) + fw.handleRemove(path) + + if got := tr.Get("plain/key"); got != nil { + t.Errorf("tree entry should be deleted after remove, got %s", got) + } + + fw.mu.Lock() + _, handlerExists := fw.handlers[path] + fw.mu.Unlock() + if handlerExists { + t.Error("handler should be cleaned up after permanent removal") + } +} + +func TestHandleRemoveUnknownPath(t *testing.T) { + fw, _ := newTestFSWatcher(t) + fw.handleRemove("/nonexistent/path") +} + +func TestHandleRemoveRewatchSuccess(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + path := filepath.Join(tmp, "ephemeral.txt") + os.WriteFile(path, []byte("1"), 0644) + + calls := 0 + fw.Watch(path, WatchHandler{ + TreeKey: "ephem", + ReadFunc: func(_ string) (json.RawMessage, error) { + calls++ + return json.RawMessage(fmt.Sprintf(`"v%d"`, calls)), nil + }, + UseMerge: true, + }) + + fw.InitialRead() + + fw.handleRemove(path) + + fw.mu.Lock() + _, handlerExists := fw.handlers[path] + fw.mu.Unlock() + if !handlerExists { + t.Error("handler should still exist when file still exists (rewatch succeeds)") + } + + got := tr.Get("ephem") + if string(got) != `"v2"` { + t.Errorf("got %s, want %q (handler should have been called again)", got, `"v2"`) + } +} + +func TestWatchSymlink(t *testing.T) { + fw, _ := newTestFSWatcher(t) + + tmp := t.TempDir() + targetA := filepath.Join(tmp, "target-a") + targetB := filepath.Join(tmp, "target-b") + link := filepath.Join(tmp, "link") + os.WriteFile(targetA, []byte("a"), 0644) + os.WriteFile(targetB, []byte("b"), 0644) + os.Symlink(targetA, link) + + handler := WatchHandler{ + TreeKey: "sym/test", + ReadFunc: func(p string) (json.RawMessage, error) { return json.RawMessage(`"sym"`), nil }, + } + + if err := fw.WatchSymlink(link, handler); err != nil { + t.Fatalf("WatchSymlink: %v", err) + } + + fw.mu.Lock() + _, ok := fw.handlers[link] + fw.mu.Unlock() + if !ok { + t.Error("handler not registered under symlink path") + } +} + +func TestWatchSymlinkReplace(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + targetA := filepath.Join(tmp, "zone-a") + targetB := filepath.Join(tmp, "zone-b") + link := filepath.Join(tmp, "current") + os.WriteFile(targetA, []byte("a"), 0644) + os.WriteFile(targetB, []byte("b"), 0644) + os.Symlink(targetA, link) + + calls := 0 + fw.WatchSymlink(link, WatchHandler{ + TreeKey: "sym/replace", + ReadFunc: func(p string) (json.RawMessage, error) { + calls++ + target, _ := os.Readlink(p) + return json.RawMessage(fmt.Sprintf(`"target-%d-%s"`, calls, filepath.Base(target))), nil + }, + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go fw.Run(ctx) + time.Sleep(50 * time.Millisecond) + + os.Remove(link) + os.Symlink(targetB, link) + + deadline := time.After(2 * time.Second) + for { + got := tr.Get("sym/replace") + if got != nil && strings.Contains(string(got), "zone-b") { + break + } + select { + case <-deadline: + t.Fatalf("timed out waiting for symlink replace event; tree = %s", tr.Get("sym/replace")) + default: + time.Sleep(10 * time.Millisecond) + } + } + + cancel() +} + +// fw_setenv (U-Boot) and grub-editenv rewrite the env via a temp file + +// atomic rename, so the env gets a new inode. A direct file watch misses +// that; the parent-directory watch used for boot-order must catch it, +// otherwise operational boot-order stays stale until the next reboot. +func TestWatchSymlinkAtomicRename(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + env := filepath.Join(tmp, "uboot.env") + os.WriteFile(env, []byte("BOOT_ORDER=net\n"), 0644) + + fw.WatchSymlink(env, WatchHandler{ + TreeKey: "boot/env", + ReadFunc: func(p string) (json.RawMessage, error) { + data, _ := os.ReadFile(p) + return json.RawMessage(fmt.Sprintf("%q", strings.TrimSpace(string(data)))), nil + }, + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go fw.Run(ctx) + time.Sleep(50 * time.Millisecond) + + // Replace the file the way fw_setenv does: write a temp, rename over. + tmpEnv := env + ".tmp" + os.WriteFile(tmpEnv, []byte("BOOT_ORDER=primary net\n"), 0644) + if err := os.Rename(tmpEnv, env); err != nil { + t.Fatalf("rename: %v", err) + } + + deadline := time.After(2 * time.Second) + for { + if got := tr.Get("boot/env"); got != nil && strings.Contains(string(got), "primary net") { + break + } + select { + case <-deadline: + t.Fatalf("timed out waiting for atomic-rename event; tree = %s", tr.Get("boot/env")) + default: + time.Sleep(10 * time.Millisecond) + } + } + + cancel() +} + +func TestWatchDir(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + os.WriteFile(filepath.Join(tmp, "a.keys"), []byte("key-a"), 0644) + + fw.WatchDir(tmp, WatchHandler{ + TreeKey: "dir/test", + ReadFunc: func(dir string) (json.RawMessage, error) { + entries, _ := os.ReadDir(dir) + names := make([]string, 0, len(entries)) + for _, e := range entries { + names = append(names, e.Name()) + } + return json.Marshal(map[string]interface{}{"files": names}) + }, + Debounce: 50 * time.Millisecond, + UseMerge: true, + }) + + fw.InitialRead() + got := tr.Get("dir/test") + if got == nil { + t.Fatal("tree not populated after InitialRead for dir handler") + } + if !strings.Contains(string(got), "a.keys") { + t.Fatalf("initial read missing a.keys: %s", got) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go fw.Run(ctx) + time.Sleep(50 * time.Millisecond) + + os.WriteFile(filepath.Join(tmp, "b.keys"), []byte("key-b"), 0644) + + deadline := time.After(2 * time.Second) + for { + got = tr.Get("dir/test") + if got != nil && strings.Contains(string(got), "b.keys") { + break + } + select { + case <-deadline: + t.Fatalf("timed out waiting for dir event; tree = %s", tr.Get("dir/test")) + default: + time.Sleep(10 * time.Millisecond) + } + } + + cancel() +} + +func TestWatchDirRemoveFile(t *testing.T) { + fw, tr := newTestFSWatcher(t) + + tmp := t.TempDir() + os.WriteFile(filepath.Join(tmp, "x.keys"), []byte("data"), 0644) + os.WriteFile(filepath.Join(tmp, "y.keys"), []byte("data"), 0644) + + fw.WatchDir(tmp, WatchHandler{ + TreeKey: "dir/rm", + ReadFunc: func(dir string) (json.RawMessage, error) { + entries, _ := os.ReadDir(dir) + names := make([]string, 0, len(entries)) + for _, e := range entries { + names = append(names, e.Name()) + } + return json.Marshal(map[string]interface{}{"files": names}) + }, + UseMerge: true, + }) + + fw.InitialRead() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go fw.Run(ctx) + time.Sleep(50 * time.Millisecond) + + os.Remove(filepath.Join(tmp, "x.keys")) + + deadline := time.After(2 * time.Second) + for { + got := tr.Get("dir/rm") + if got != nil && !strings.Contains(string(got), "x.keys") && strings.Contains(string(got), "y.keys") { + break + } + select { + case <-deadline: + t.Fatalf("timed out waiting for dir remove event; tree = %s", tr.Get("dir/rm")) + default: + time.Sleep(10 * time.Millisecond) + } + } + + cancel() +} diff --git a/src/yangerd/internal/iface/iface.go b/src/yangerd/internal/iface/iface.go new file mode 100644 index 000000000..6f74a16a4 --- /dev/null +++ b/src/yangerd/internal/iface/iface.go @@ -0,0 +1,952 @@ +// Package iface transforms raw `ip -json` data into YANG-shaped +// ietf-interfaces JSON. +package iface + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" +) + +// FileChecker abstracts filesystem probes needed during interface transformation. +type FileChecker interface { + Exists(path string) bool + ReadFile(path string) (string, error) +} + +// Transform converts raw `ip -json` link/address/statistics arrays into +// `{"interface":[...]}`. The caller (NLMonitor) stores this at tree key +// "ietf-interfaces:interfaces"; the IPC server adds the module-qualified +// wrapper when responding to clients. +// +// neighData is the output of `ip -json neigh show` — an array of objects +// with keys: dst, dev, lladdr, state (array of strings like "REACHABLE", +// "STALE", "PERMANENT", etc.). May be nil if unavailable. +func Transform(linkData, addrData, statsData, neighData json.RawMessage, fc FileChecker) json.RawMessage { + links := dedup(decodeObjects(linkData)) + addrs := decodeObjects(addrData) + stats := decodeObjects(statsData) + neighs := decodeObjects(neighData) + + addrByName := make(map[string]map[string]any, len(addrs)) + for _, addr := range addrs { + ifname := getString(addr, "ifname") + if ifname == "" { + continue + } + addrByName[ifname] = addr + } + + statsByName := make(map[string]map[string]any, len(stats)) + for _, st := range stats { + ifname := getString(st, "ifname") + if ifname == "" { + continue + } + statsByName[ifname] = st + } + + neighByName := make(map[string][]map[string]any) + for _, n := range neighs { + dev := getString(n, "dev") + if dev == "" { + continue + } + neighByName[dev] = append(neighByName[dev], n) + } + + interfaces := make([]map[string]any, 0, len(links)) + for _, iplink := range links { + if skipInterface(iplink) { + continue + } + + ifname := getString(iplink, "ifname") + ipaddr, ok := addrByName[ifname] + if !ok { + ipaddr = map[string]any{} + } + + if st, ok := statsByName[ifname]; ok { + if stat64, ok := st["stats64"]; ok { + iplink["stats64"] = stat64 + } + } + + iface := interfaceCommon(iplink, ipaddr, neighByName[ifname], fc) + yangType := getString(iface, "type") + + switch yangType { + case "infix-if-type:vlan": + if v := vlanAugment(iplink); len(v) > 0 { + iface["infix-interfaces:vlan"] = v + } + case "infix-if-type:veth": + if v := vethAugment(iplink); len(v) > 0 { + iface["infix-interfaces:veth"] = v + } + case "infix-if-type:gre", "infix-if-type:gretap": + if v := greAugment(iplink); len(v) > 0 { + iface["infix-interfaces:gre"] = v + } + case "infix-if-type:vxlan": + if v := vxlanAugment(iplink); len(v) > 0 { + iface["infix-interfaces:vxlan"] = v + } + case "infix-if-type:lag": + if v := lagAugment(iplink); len(v) > 0 { + iface["infix-interfaces:lag"] = v + } + } + + switch iplink2yangLower(iplink) { + case "infix-interfaces:bridge-port": + if lower := bridgePortLower(iplink); len(lower) > 0 { + iface["infix-interfaces:bridge-port"] = lower + } + case "infix-interfaces:lag-port": + if lower := lagPortLower(iplink); len(lower) > 0 { + iface["infix-interfaces:lag-port"] = lower + } + } + + interfaces = append(interfaces, iface) + } + + out := map[string]any{ + "interface": interfaces, + } + + raw, err := json.Marshal(out) + if err != nil { + return json.RawMessage(`{"interface":[]}`) + } + + return raw +} + +func decodeObjects(raw json.RawMessage) []map[string]any { + if len(raw) == 0 { + return nil + } + + var entries []any + if err := json.Unmarshal(raw, &entries); err != nil { + return nil + } + + out := make([]map[string]any, 0, len(entries)) + for _, entry := range entries { + obj, ok := asMap(entry) + if !ok { + continue + } + out = append(out, obj) + } + + return out +} + +func skipInterface(iplink map[string]any) bool { + if getString(iplink, "group") == "internal" { + return true + } + + switch getString(iplink, "link_type") { + case "can", "vcan": + return true + default: + return false + } +} + +// dedup removes duplicate link entries that share the same ifindex. +// When an interface is renamed (e.g. eth0 → e1), ip -json may report +// both the old and new names with the same ifindex. We keep the entry +// whose operstate is "UP", or the last one seen if neither is up. +func dedup(links []map[string]any) []map[string]any { + seen := make(map[int]int, len(links)) + out := make([]map[string]any, 0, len(links)) + for _, link := range links { + idx := getIntOrZero(link, "ifindex") + if idx == 0 { + out = append(out, link) + continue + } + if prev, ok := seen[idx]; ok { + if getString(link, "operstate") == "UP" && getString(out[prev], "operstate") != "UP" { + out[prev] = link + } + } else { + seen[idx] = len(out) + out = append(out, link) + } + } + return out +} + +func interfaceCommon(iplink, ipaddr map[string]any, neighEntries []map[string]any, fc FileChecker) map[string]any { + flags := getStrings(iplink, "flags") + + iface := map[string]any{ + "type": iplink2yangType(iplink, fc), + "name": getString(iplink, "ifname"), + "if-index": getIntOrZero(iplink, "ifindex"), + "admin-status": boolToStatus(contains(flags, "UP"), "up", "down"), + "oper-status": iplink2yangOperstate(getString(iplink, "operstate")), + } + + if _, ok := iplink["ifalias"]; ok { + iface["description"] = getString(iplink, "ifalias") + } + + if !contains(flags, "POINTOPOINT") { + if address, ok := iplink["address"]; ok { + iface["phys-address"] = fmt.Sprintf("%v", address) + } + } + + if stats := statistics(iplink); len(stats) > 0 { + iface["statistics"] = stats + } + + if ipv4 := ipv4Data(ipaddr, neighEntries); len(ipv4) > 0 { + iface["ietf-ip:ipv4"] = ipv4 + } + + if ipv6 := ipv6Data(ipaddr, neighEntries, fc); len(ipv6) > 0 { + iface["ietf-ip:ipv6"] = ipv6 + } + + return iface +} + +func iplink2yangType(iplink map[string]any, fc FileChecker) string { + ifname := getString(iplink, "ifname") + + switch getString(iplink, "link_type") { + case "loopback": + return "infix-if-type:loopback" + case "gre", "gre6": + return "infix-if-type:gre" + case "ether": + if fc != nil { + if fc.Exists(fmt.Sprintf("/sys/class/net/%s/wireless/", ifname)) { + return "infix-if-type:wifi" + } + } + case "none": + default: + return "infix-if-type:other" + } + + linkinfo, _ := asMap(iplink["linkinfo"]) + switch getString(linkinfo, "info_kind") { + case "bond": + return "infix-if-type:lag" + case "bridge": + return "infix-if-type:bridge" + case "dummy": + return "infix-if-type:dummy" + case "gretap", "ip6gretap": + return "infix-if-type:gretap" + case "vxlan": + return "infix-if-type:vxlan" + case "veth": + return "infix-if-type:veth" + case "vlan": + return "infix-if-type:vlan" + case "wireguard": + return "infix-if-type:wireguard" + default: + return "infix-if-type:ethernet" + } +} + +func iplink2yangLower(iplink map[string]any) string { + linkinfo, _ := asMap(iplink["linkinfo"]) + switch getString(linkinfo, "info_slave_kind") { + case "bridge": + return "infix-interfaces:bridge-port" + case "bond": + return "infix-interfaces:lag-port" + default: + return "" + } +} + +func iplink2yangOperstate(oper string) string { + switch oper { + case "DOWN": + return "down" + case "UP": + return "up" + case "DORMANT": + return "dormant" + case "TESTING": + return "testing" + case "LOWERLAYERDOWN": + return "lower-layer-down" + case "NOTPRESENT": + return "not-present" + default: + return "unknown" + } +} + +func statistics(iplink map[string]any) map[string]any { + out := map[string]any{} + + stats64, _ := asMap(iplink["stats64"]) + rx, _ := asMap(stats64["rx"]) + tx, _ := asMap(stats64["tx"]) + + if octets, ok := rx["bytes"]; ok && isTruthy(octets) { + out["in-octets"] = toCounterString(octets) + } + + if octets, ok := tx["bytes"]; ok && isTruthy(octets) { + out["out-octets"] = toCounterString(octets) + } + + return out +} + +func ipv4Data(ipaddr map[string]any, neighEntries []map[string]any) map[string]any { + if len(ipaddr) == 0 && len(neighEntries) == 0 { + return nil + } + + out := map[string]any{} + if len(ipaddr) > 0 { + if mtu, ok := getInt(ipaddr, "mtu"); ok && mtu != 0 && getString(ipaddr, "ifname") != "lo" { + out["mtu"] = mtu + } + + if addr := addresses(ipaddr, "inet"); len(addr) > 0 { + out["address"] = addr + } + } + + if n := neighbors(neighEntries, 4); len(n) > 0 { + out["neighbor"] = n + } + + return out +} + +func ipv6Data(ipaddr map[string]any, neighEntries []map[string]any, fc FileChecker) map[string]any { + if len(ipaddr) == 0 && len(neighEntries) == 0 { + return nil + } + + out := map[string]any{} + if len(ipaddr) > 0 { + ifname := getString(ipaddr, "ifname") + if ifname != "" && fc != nil { + path := fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/mtu", ifname) + if raw, err := fc.ReadFile(path); err == nil { + trimmed := strings.TrimSpace(raw) + if mtu, err := strconv.Atoi(trimmed); err == nil { + out["mtu"] = mtu + } + } + } + + if addr := addresses(ipaddr, "inet6"); len(addr) > 0 { + out["address"] = addr + } + } + + if n := neighbors(neighEntries, 6); len(n) > 0 { + out["neighbor"] = n + } + + return out +} + +func addresses(ipaddr map[string]any, family string) []map[string]any { + addrInfo, ok := ipaddr["addr_info"] + if !ok { + return nil + } + + arr, ok := asArray(addrInfo) + if !ok { + return nil + } + + out := make([]map[string]any, 0, len(arr)) + for _, entry := range arr { + inet, ok := asMap(entry) + if !ok { + continue + } + + if getString(inet, "family") != family { + continue + } + + address := map[string]any{ + "ip": inet["local"], + "prefix-length": getIntOrZero(inet, "prefixlen"), + "origin": inet2yangOrigin(inet), + } + out = append(out, address) + } + + return out +} + +func neighbors(entries []map[string]any, ipVersion int) []map[string]any { + out := make([]map[string]any, 0, len(entries)) + for _, entry := range entries { + dst := getString(entry, "dst") + if dst == "" { + continue + } + + if !neighMatchesFamily(dst, ipVersion) { + continue + } + + lladdr := getString(entry, "lladdr") + if lladdr == "" { + continue + } + + states := getStrings(entry, "state") + origin := "dynamic" + if contains(states, "PERMANENT") { + origin = "static" + } + + neigh := map[string]any{ + "ip": dst, + "link-layer-address": lladdr, + "origin": origin, + } + + if ipVersion == 6 { + if state := neighState(states); state != "" { + neigh["state"] = state + } + if getBool(entry, "router") { + neigh["is-router"] = []any{nil} + } + } + + out = append(out, neigh) + } + + if len(out) == 0 { + return nil + } + return out +} + +func neighMatchesFamily(dst string, ipVersion int) bool { + for i := 0; i < len(dst); i++ { + if dst[i] == '.' { + return ipVersion == 4 + } + if dst[i] == ':' { + return ipVersion == 6 + } + } + return false +} + +func neighState(states []string) string { + xlate := map[string]string{ + "REACHABLE": "reachable", + "STALE": "stale", + "DELAY": "delay", + "PROBE": "probe", + "INCOMPLETE": "incomplete", + } + for _, s := range states { + if v, ok := xlate[s]; ok { + return v + } + } + return "" +} + +func inet2yangOrigin(inet map[string]any) string { + proto := getString(inet, "protocol") + if proto == "kernel_ll" || proto == "kernel_ra" { + if _, ok := inet["stable-privacy"]; ok { + return "random" + } + } + + switch proto { + case "kernel_ll", "kernel_ra": + return "link-layer" + case "static": + return "static" + case "dhcp": + return "dhcp" + case "random": + return "random" + default: + return "other" + } +} + +func vlanAugment(iplink map[string]any) map[string]any { + info := infoData(iplink) + if len(info) == 0 { + return nil + } + + vlan := map[string]any{ + "tag-type": proto2yang(getString(info, "protocol")), + "id": getIntOrZero(info, "id"), + } + + if lower := getString(iplink, "link"); lower != "" { + vlan["lower-layer-if"] = lower + } + + return vlan +} + +func vethAugment(iplink map[string]any) map[string]any { + peer := getString(iplink, "link") + if peer == "" { + return nil + } + + return map[string]any{"peer": peer} +} + +func greAugment(iplink map[string]any) map[string]any { + info := infoData(iplink) + if len(info) == 0 { + return nil + } + + return map[string]any{ + "local": firstAny(info["local"], info["local6"]), + "remote": firstAny(info["remote"], info["remote6"]), + } +} + +func vxlanAugment(iplink map[string]any) map[string]any { + vxlan := greAugment(iplink) + if len(vxlan) == 0 { + return nil + } + + info := infoData(iplink) + if vni, ok := info["id"]; ok { + vxlan["vni"] = vni + } + + return vxlan +} + +func lagAugment(iplink map[string]any) map[string]any { + info := infoData(iplink) + if len(info) == 0 { + return nil + } + + mode := lagMode(getString(info, "mode")) + bond := map[string]any{ + "mode": mode, + "link-monitor": map[string]any{ + "debounce": map[string]any{ + "up": getIntOrZero(info, "updelay"), + "down": getIntOrZero(info, "downdelay"), + }, + }, + } + + if mode == "lacp" { + lacp := map[string]any{ + "mode": boolToStatus(getString(info, "ad_lacp_active") == "on", "active", "passive"), + "rate": getString(info, "ad_lacp_rate"), + "hash": lagHash(getString(info, "xmit_hash_policy")), + } + + adInfo, ok := asMap(info["ad_info"]) + if ok { + if v, ok := adInfo["aggregator"]; ok { + lacp["aggregator-id"] = v + } + if v, ok := adInfo["actor_key"]; ok { + lacp["actor-key"] = v + } + if v, ok := adInfo["partner_key"]; ok { + lacp["partner-key"] = v + } + if v, ok := adInfo["partner_mac"]; ok { + lacp["partner-mac"] = v + } + } + + if v, ok := info["ad_actor_sys_prio"]; ok { + lacp["system-priority"] = v + } + + bond["lacp"] = lacp + } else { + bond["static"] = map[string]any{ + "mode": getString(info, "mode"), + "hash": getString(info, "xmit_hash_policy"), + } + } + + return bond +} + +func bridgePortSTP(info map[string]any) map[string]any { + state := getString(info, "state") + if state == "" { + return map[string]any{} + } + + return map[string]any{ + "cist": map[string]any{ + "state": state, + }, + } +} + +func bridgePortLower(iplink map[string]any) map[string]any { + master := getString(iplink, "master") + if master == "" { + return nil + } + + linkinfo, _ := asMap(iplink["linkinfo"]) + info, _ := asMap(linkinfo["info_slave_data"]) + if len(info) == 0 { + return nil + } + + return map[string]any{ + "bridge": master, + "flood": map[string]any{ + "broadcast": getBool(info, "bcast_flood"), + "unicast": getBool(info, "flood"), + "multicast": getBool(info, "mcast_flood"), + }, + "multicast": map[string]any{ + "fast-leave": getBool(info, "fastleave"), + "router": bridgeRouterMode(getIntOrZero(info, "multicast_router")), + }, + "stp": bridgePortSTP(info), + } +} + +func lagPortLower(iplink map[string]any) map[string]any { + master := getString(iplink, "master") + if master == "" { + return nil + } + + port := map[string]any{"lag": master} + + linkinfo, _ := asMap(iplink["linkinfo"]) + info, _ := asMap(linkinfo["info_slave_data"]) + if len(info) == 0 { + port["state"] = "backup" + port["link-failures"] = 0 + return port + } + + port["state"] = strings.ToLower(getString(info, "state")) + port["link-failures"] = getIntOrZero(info, "link_failure_count") + + if _, ok := info["ad_aggregator_id"]; ok { + port["lacp"] = map[string]any{ + "aggregator-id": info["ad_aggregator_id"], + "actor-state": getString(info, "ad_actor_oper_port_state_str"), + "partner-state": getString(info, "ad_partner_oper_port_state_str"), + } + } + + return port +} + +func infoData(iplink map[string]any) map[string]any { + linkinfo, ok := asMap(iplink["linkinfo"]) + if !ok { + return nil + } + + data, ok := asMap(linkinfo["info_data"]) + if !ok { + return nil + } + + return data +} + +func proto2yang(proto string) string { + switch proto { + case "802.1Q": + return "ieee802-dot1q-types:c-vlan" + case "802.1ad": + return "ieee802-dot1q-types:s-vlan" + default: + return "other" + } +} + +func lagMode(mode string) string { + switch mode { + case "802.3ad": + return "lacp" + case "balance-xor": + return "static" + default: + return "static" + } +} + +func lagHash(hash string) string { + switch hash { + case "layer2": + return "layer2" + case "layer3+4": + return "layer3-4" + case "layer2+3": + return "layer2-3" + case "encap2+3": + return "encap2-3" + case "encap3+4": + return "encap3-4" + case "vlan+srcmac": + return "vlan-srcmac" + default: + return "layer2" + } +} + +func bridgeRouterMode(v int) string { + switch v { + case 0: + return "off" + case 1: + return "auto" + case 2: + return "permanent" + default: + return "UNKNOWN" + } +} + +func getString(obj map[string]any, key string) string { + v, ok := obj[key] + if !ok || v == nil { + return "" + } + + s, ok := v.(string) + if ok { + return s + } + + return fmt.Sprintf("%v", v) +} + +func getInt(obj map[string]any, key string) (int, bool) { + v, ok := obj[key] + if !ok || v == nil { + return 0, false + } + + switch n := v.(type) { + case int: + return n, true + case int8: + return int(n), true + case int16: + return int(n), true + case int32: + return int(n), true + case int64: + return int(n), true + case uint: + return int(n), true + case uint8: + return int(n), true + case uint16: + return int(n), true + case uint32: + return int(n), true + case uint64: + return int(n), true + case float64: + return int(n), true + case json.Number: + i, err := n.Int64() + if err != nil { + return 0, false + } + return int(i), true + case string: + i, err := strconv.Atoi(strings.TrimSpace(n)) + if err != nil { + return 0, false + } + return i, true + default: + return 0, false + } +} + +func getIntOrZero(obj map[string]any, key string) int { + v, ok := getInt(obj, key) + if !ok { + return 0 + } + return v +} + +func getBool(obj map[string]any, key string) bool { + v, ok := obj[key] + if !ok || v == nil { + return false + } + + b, ok := v.(bool) + if ok { + return b + } + + s := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v))) + return s == "1" || s == "true" || s == "on" || s == "yes" +} + +func getStrings(obj map[string]any, key string) []string { + v, ok := obj[key] + if !ok || v == nil { + return nil + } + + if direct, ok := v.([]string); ok { + return direct + } + + arr, ok := asArray(v) + if !ok { + return nil + } + + out := make([]string, 0, len(arr)) + for _, item := range arr { + out = append(out, fmt.Sprintf("%v", item)) + } + return out +} + +func asMap(v any) (map[string]any, bool) { + if v == nil { + return nil, false + } + + m, ok := v.(map[string]any) + if ok { + return m, true + } + + m2, ok := v.(map[string]interface{}) + if ok { + return map[string]any(m2), true + } + + return nil, false +} + +func asArray(v any) ([]any, bool) { + if v == nil { + return nil, false + } + + arr, ok := v.([]any) + if ok { + return arr, true + } + + arr2, ok := v.([]interface{}) + if ok { + return []any(arr2), true + } + + return nil, false +} + +func contains(values []string, needle string) bool { + for _, value := range values { + if value == needle { + return true + } + } + return false +} + +func isTruthy(v any) bool { + if v == nil { + return false + } + + s := strings.TrimSpace(fmt.Sprintf("%v", v)) + return s != "" && s != "0" +} + +func toCounterString(v any) string { + switch n := v.(type) { + case int: + return strconv.FormatInt(int64(n), 10) + case int8: + return strconv.FormatInt(int64(n), 10) + case int16: + return strconv.FormatInt(int64(n), 10) + case int32: + return strconv.FormatInt(int64(n), 10) + case int64: + return strconv.FormatInt(n, 10) + case uint: + return strconv.FormatUint(uint64(n), 10) + case uint8: + return strconv.FormatUint(uint64(n), 10) + case uint16: + return strconv.FormatUint(uint64(n), 10) + case uint32: + return strconv.FormatUint(uint64(n), 10) + case uint64: + return strconv.FormatUint(n, 10) + case float64: + return strconv.FormatInt(int64(n), 10) + case json.Number: + return n.String() + case string: + return strings.TrimSpace(n) + default: + return fmt.Sprintf("%v", v) + } +} + +func firstAny(a, b any) any { + if a != nil { + s := strings.TrimSpace(fmt.Sprintf("%v", a)) + if s != "" { + return a + } + } + return b +} + +func boolToStatus(cond bool, yes, no string) string { + if cond { + return yes + } + return no +} diff --git a/src/yangerd/internal/iface/iface_test.go b/src/yangerd/internal/iface/iface_test.go new file mode 100644 index 000000000..e2b436f42 --- /dev/null +++ b/src/yangerd/internal/iface/iface_test.go @@ -0,0 +1,920 @@ +package iface + +import ( + "encoding/json" + "errors" + "testing" +) + +type mockFileChecker struct { + exists map[string]bool + files map[string]string + readErr map[string]error +} + +func (m *mockFileChecker) Exists(path string) bool { + if m == nil || m.exists == nil { + return false + } + return m.exists[path] +} + +func (m *mockFileChecker) ReadFile(path string) (string, error) { + if m == nil { + return "", errors.New("nil file checker") + } + if err, ok := m.readErr[path]; ok { + return "", err + } + if v, ok := m.files[path]; ok { + return v, nil + } + return "", errors.New("not found") +} + +func mustRaw(t *testing.T, v any) json.RawMessage { + t.Helper() + b, err := json.Marshal(v) + if err != nil { + t.Fatalf("marshal: %v", err) + } + return b +} + +func mustInterfaces(t *testing.T, raw json.RawMessage) []map[string]any { + t.Helper() + + var root map[string]any + if err := json.Unmarshal(raw, &root); err != nil { + t.Fatalf("unmarshal transform output: %v", err) + } + + arr, ok := root["interface"].([]any) + if !ok { + t.Fatalf("missing interface list: %#v", root) + } + + out := make([]map[string]any, 0, len(arr)) + for _, v := range arr { + m, ok := v.(map[string]any) + if !ok { + t.Fatalf("interface entry not object: %T", v) + } + out = append(out, m) + } + + return out +} + +func mustIfaceByName(t *testing.T, ifaces []map[string]any, name string) map[string]any { + t.Helper() + for _, iface := range ifaces { + if iface["name"] == name { + return iface + } + } + t.Fatalf("interface %q not found", name) + return nil +} + +func TestTransformEmptyInputs(t *testing.T) { + tests := []struct { + name string + linkData json.RawMessage + addrData json.RawMessage + stats json.RawMessage + }{ + {name: "nil raw messages"}, + { + name: "empty arrays", + linkData: mustRaw(t, []map[string]any{}), + addrData: mustRaw(t, []map[string]any{}), + stats: mustRaw(t, []map[string]any{}), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ifaces := mustInterfaces(t, Transform(tt.linkData, tt.addrData, tt.stats, nil, nil)) + if len(ifaces) != 0 { + t.Fatalf("expected empty interface list, got %d", len(ifaces)) + } + }) + } +} + +func TestTransformSingleLoopback(t *testing.T) { + link := []map[string]any{{ + "ifindex": 1, + "ifname": "lo", + "flags": []any{"LOOPBACK", "UP"}, + "link_type": "loopback", + "operstate": "UNKNOWN", + "address": "00:00:00:00:00:00", + "statistics": map[string]any{}, + }} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + if len(ifaces) != 1 { + t.Fatalf("expected 1 interface, got %d", len(ifaces)) + } + + lo := ifaces[0] + if lo["name"] != "lo" { + t.Fatalf("name = %v", lo["name"]) + } + if lo["type"] != "infix-if-type:loopback" { + t.Fatalf("type = %v", lo["type"]) + } + if lo["admin-status"] != "up" || lo["oper-status"] != "unknown" { + t.Fatalf("admin/oper mismatch: %v/%v", lo["admin-status"], lo["oper-status"]) + } +} + +func TestTransformSingleEthernetWithIPv4IPv6(t *testing.T) { + link := []map[string]any{{ + "ifindex": 2, + "ifname": "eth0", + "flags": []any{"UP"}, + "link_type": "ether", + "operstate": "UP", + "address": "52:54:00:12:34:56", + }} + + addr := []map[string]any{{ + "ifname": "eth0", + "mtu": 1500, + "addr_info": []map[string]any{ + {"family": "inet", "local": "192.0.2.10", "prefixlen": 24, "protocol": "static"}, + {"family": "inet6", "local": "2001:db8::10", "prefixlen": 64, "protocol": "kernel_ra"}, + }, + }} + + fc := &mockFileChecker{files: map[string]string{"/proc/sys/net/ipv6/conf/eth0/mtu": "1400\n"}} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), mustRaw(t, addr), nil, nil, fc)) + eth0 := mustIfaceByName(t, ifaces, "eth0") + + if eth0["type"] != "infix-if-type:ethernet" { + t.Fatalf("unexpected type: %v", eth0["type"]) + } + + ipv4, ok := eth0["ietf-ip:ipv4"].(map[string]any) + if !ok { + t.Fatalf("missing ipv4 container: %#v", eth0) + } + if ipv4["mtu"] != float64(1500) { + t.Fatalf("ipv4 mtu = %v", ipv4["mtu"]) + } + v4addrs := ipv4["address"].([]any) + v4 := v4addrs[0].(map[string]any) + if v4["ip"] != "192.0.2.10" || v4["prefix-length"] != float64(24) || v4["origin"] != "static" { + t.Fatalf("unexpected ipv4 address entry: %#v", v4) + } + + ipv6, ok := eth0["ietf-ip:ipv6"].(map[string]any) + if !ok { + t.Fatalf("missing ipv6 container: %#v", eth0) + } + if ipv6["mtu"] != float64(1400) { + t.Fatalf("ipv6 mtu = %v", ipv6["mtu"]) + } + v6addrs := ipv6["address"].([]any) + v6 := v6addrs[0].(map[string]any) + if v6["ip"] != "2001:db8::10" || v6["prefix-length"] != float64(64) || v6["origin"] != "link-layer" { + t.Fatalf("unexpected ipv6 address entry: %#v", v6) + } +} + +func TestTransformStatisticsCountersAsStrings(t *testing.T) { + link := []map[string]any{{ + "ifindex": 3, + "ifname": "eth1", + "flags": []any{"UP"}, + "link_type": "ether", + "operstate": "UP", + }} + + stats := []map[string]any{{ + "ifname": "eth1", + "stats64": map[string]any{ + "rx": map[string]any{"bytes": uint64(1234567890)}, + "tx": map[string]any{"bytes": uint64(9876543210)}, + }, + }} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, mustRaw(t, stats), nil, nil)) + eth1 := mustIfaceByName(t, ifaces, "eth1") + st, ok := eth1["statistics"].(map[string]any) + if !ok { + t.Fatalf("missing statistics: %#v", eth1) + } + + if _, ok := st["in-octets"].(string); !ok { + t.Fatalf("in-octets must be string, got %T", st["in-octets"]) + } + if _, ok := st["out-octets"].(string); !ok { + t.Fatalf("out-octets must be string, got %T", st["out-octets"]) + } +} + +func TestTransformVLANAugment(t *testing.T) { + link := []map[string]any{{ + "ifindex": 10, + "ifname": "eth0.100", + "flags": []any{"UP"}, + "link_type": "none", + "operstate": "UP", + "link": "eth0", + "linkinfo": map[string]any{ + "info_kind": "vlan", + "info_data": map[string]any{"protocol": "802.1Q", "id": 100}, + }, + }} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + vlan := mustIfaceByName(t, ifaces, "eth0.100") + + if vlan["type"] != "infix-if-type:vlan" { + t.Fatalf("type = %v", vlan["type"]) + } + v, ok := vlan["infix-interfaces:vlan"].(map[string]any) + if !ok { + t.Fatalf("missing vlan augment: %#v", vlan) + } + if v["tag-type"] != "ieee802-dot1q-types:c-vlan" || v["id"] != float64(100) || v["lower-layer-if"] != "eth0" { + t.Fatalf("unexpected vlan augment: %#v", v) + } +} + +func TestTransformVethAugment(t *testing.T) { + link := []map[string]any{{ + "ifname": "veth0", + "ifindex": 11, + "flags": []any{"UP"}, + "link_type": "none", + "operstate": "UP", + "link": "veth1", + "linkinfo": map[string]any{"info_kind": "veth"}, + }} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + veth := mustIfaceByName(t, ifaces, "veth0") + v, ok := veth["infix-interfaces:veth"].(map[string]any) + if !ok || v["peer"] != "veth1" { + t.Fatalf("unexpected veth augment: %#v", veth) + } +} + +func TestTransformGREAndVXLANAugments(t *testing.T) { + link := []map[string]any{ + { + "ifname": "gre1", + "ifindex": 12, + "flags": []any{"UP"}, + "link_type": "gre", + "operstate": "UP", + "linkinfo": map[string]any{ + "info_data": map[string]any{"local": "192.0.2.1", "remote": "198.51.100.1"}, + }, + }, + { + "ifname": "vxlan10", + "ifindex": 13, + "flags": []any{"UP"}, + "link_type": "none", + "operstate": "UP", + "linkinfo": map[string]any{ + "info_kind": "vxlan", + "info_data": map[string]any{"local": "10.0.0.1", "remote": "10.0.0.2", "id": 10}, + }, + }, + } + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + + gre := mustIfaceByName(t, ifaces, "gre1") + if gre["type"] != "infix-if-type:gre" { + t.Fatalf("gre type = %v", gre["type"]) + } + g, ok := gre["infix-interfaces:gre"].(map[string]any) + if !ok || g["local"] != "192.0.2.1" || g["remote"] != "198.51.100.1" { + t.Fatalf("unexpected gre augment: %#v", g) + } + + vx := mustIfaceByName(t, ifaces, "vxlan10") + if vx["type"] != "infix-if-type:vxlan" { + t.Fatalf("vxlan type = %v", vx["type"]) + } + v, ok := vx["infix-interfaces:vxlan"].(map[string]any) + if !ok || v["local"] != "10.0.0.1" || v["remote"] != "10.0.0.2" || v["vni"] != float64(10) { + t.Fatalf("unexpected vxlan augment: %#v", v) + } +} + +func TestTransformLAGAugmentModes(t *testing.T) { + link := []map[string]any{ + { + "ifname": "bond0", + "ifindex": 20, + "flags": []any{"UP"}, + "link_type": "none", + "operstate": "UP", + "linkinfo": map[string]any{ + "info_kind": "bond", + "info_data": map[string]any{ + "mode": "802.3ad", + "updelay": 10, + "downdelay": 20, + "ad_lacp_active": "on", + "ad_lacp_rate": "fast", + "xmit_hash_policy": "layer3+4", + "ad_actor_sys_prio": 100, + "ad_info": map[string]any{ + "aggregator": 7, + "actor_key": 1000, + "partner_key": 2000, + "partner_mac": "02:00:00:00:00:01", + }, + }, + }, + }, + { + "ifname": "bond1", + "ifindex": 21, + "flags": []any{"UP"}, + "link_type": "none", + "operstate": "UP", + "linkinfo": map[string]any{ + "info_kind": "bond", + "info_data": map[string]any{ + "mode": "balance-xor", + "xmit_hash_policy": "layer2", + }, + }, + }, + } + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + + bond0 := mustIfaceByName(t, ifaces, "bond0") + b0 := bond0["infix-interfaces:lag"].(map[string]any) + if b0["mode"] != "lacp" { + t.Fatalf("bond0 mode = %v", b0["mode"]) + } + lacp := b0["lacp"].(map[string]any) + if lacp["mode"] != "active" || lacp["rate"] != "fast" || lacp["hash"] != "layer3-4" { + t.Fatalf("unexpected bond0 lacp: %#v", lacp) + } + + bond1 := mustIfaceByName(t, ifaces, "bond1") + b1 := bond1["infix-interfaces:lag"].(map[string]any) + if b1["mode"] != "static" { + t.Fatalf("bond1 mode = %v", b1["mode"]) + } + static := b1["static"].(map[string]any) + if static["mode"] != "balance-xor" || static["hash"] != "layer2" { + t.Fatalf("unexpected bond1 static: %#v", static) + } +} + +func TestTransformBridgePortLowerLayer(t *testing.T) { + link := []map[string]any{{ + "ifname": "eth2", + "ifindex": 30, + "flags": []any{"UP"}, + "link_type": "ether", + "operstate": "UP", + "master": "br0", + "linkinfo": map[string]any{ + "info_slave_kind": "bridge", + "info_slave_data": map[string]any{ + "bcast_flood": true, + "flood": false, + "mcast_flood": true, + "fastleave": true, + "multicast_router": 2, + }, + }, + }} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + eth2 := mustIfaceByName(t, ifaces, "eth2") + lower := eth2["infix-interfaces:bridge-port"].(map[string]any) + if lower["bridge"] != "br0" { + t.Fatalf("bridge lower bridge = %v", lower["bridge"]) + } + mcast := lower["multicast"].(map[string]any) + if mcast["router"] != "permanent" { + t.Fatalf("bridge router mode = %v", mcast["router"]) + } +} + +func TestTransformLagPortLowerLayer(t *testing.T) { + link := []map[string]any{{ + "ifname": "eth3", + "ifindex": 31, + "flags": []any{"UP"}, + "link_type": "ether", + "operstate": "UP", + "master": "bond0", + "linkinfo": map[string]any{ + "info_slave_kind": "bond", + "info_slave_data": map[string]any{ + "state": "ACTIVE", + "link_failure_count": 5, + "ad_aggregator_id": 42, + "ad_actor_oper_port_state_str": "collecting_distributing", + "ad_partner_oper_port_state_str": "collecting_distributing", + }, + }, + }} + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + eth3 := mustIfaceByName(t, ifaces, "eth3") + lower := eth3["infix-interfaces:lag-port"].(map[string]any) + if lower["lag"] != "bond0" || lower["state"] != "active" || lower["link-failures"] != float64(5) { + t.Fatalf("unexpected lag-port lower-layer: %#v", lower) + } + lacp := lower["lacp"].(map[string]any) + if lacp["aggregator-id"] != float64(42) { + t.Fatalf("lag-port lacp aggregator-id = %v", lacp["aggregator-id"]) + } +} + +func TestTransformFilteredInterfaces(t *testing.T) { + link := []map[string]any{ + {"ifname": "dummy0", "group": "internal", "link_type": "none"}, + {"ifname": "can0", "link_type": "can"}, + {"ifname": "vcan0", "link_type": "vcan"}, + {"ifname": "eth9", "ifindex": 99, "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP"}, + } + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + if len(ifaces) != 1 { + t.Fatalf("expected only one surviving interface, got %d", len(ifaces)) + } + if ifaces[0]["name"] != "eth9" { + t.Fatalf("surviving interface = %v", ifaces[0]["name"]) + } +} + +func TestTransformWiFiType(t *testing.T) { + link := []map[string]any{{ + "ifname": "wlan0", + "ifindex": 40, + "flags": []any{"UP"}, + "link_type": "ether", + "operstate": "UP", + }} + + fc := &mockFileChecker{exists: map[string]bool{"/sys/class/net/wlan0/wireless/": true}} + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, fc)) + wlan0 := mustIfaceByName(t, ifaces, "wlan0") + if wlan0["type"] != "infix-if-type:wifi" { + t.Fatalf("wlan0 type = %v", wlan0["type"]) + } +} + +func TestIplink2yangTypeMappings(t *testing.T) { + fc := &mockFileChecker{exists: map[string]bool{"/sys/class/net/wlan0/wireless/": true}} + + tests := []struct { + name string + iplink map[string]any + want string + }{ + {name: "loopback", iplink: map[string]any{"ifname": "lo", "link_type": "loopback"}, want: "infix-if-type:loopback"}, + {name: "gre", iplink: map[string]any{"ifname": "gre0", "link_type": "gre"}, want: "infix-if-type:gre"}, + {name: "gre6", iplink: map[string]any{"ifname": "gre6", "link_type": "gre6"}, want: "infix-if-type:gre"}, + {name: "wifi via ether", iplink: map[string]any{"ifname": "wlan0", "link_type": "ether"}, want: "infix-if-type:wifi"}, + {name: "bond", iplink: map[string]any{"ifname": "bond0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "bond"}}, want: "infix-if-type:lag"}, + {name: "bridge", iplink: map[string]any{"ifname": "br0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "bridge"}}, want: "infix-if-type:bridge"}, + {name: "dummy", iplink: map[string]any{"ifname": "dummy0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "dummy"}}, want: "infix-if-type:dummy"}, + {name: "gretap", iplink: map[string]any{"ifname": "gretap0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "gretap"}}, want: "infix-if-type:gretap"}, + {name: "vxlan", iplink: map[string]any{"ifname": "vxlan10", "link_type": "none", "linkinfo": map[string]any{"info_kind": "vxlan"}}, want: "infix-if-type:vxlan"}, + {name: "veth", iplink: map[string]any{"ifname": "veth0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "veth"}}, want: "infix-if-type:veth"}, + {name: "vlan", iplink: map[string]any{"ifname": "eth0.10", "link_type": "none", "linkinfo": map[string]any{"info_kind": "vlan"}}, want: "infix-if-type:vlan"}, + {name: "wireguard", iplink: map[string]any{"ifname": "wg0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "wireguard"}}, want: "infix-if-type:wireguard"}, + {name: "default ethernet", iplink: map[string]any{"ifname": "eth0", "link_type": "none", "linkinfo": map[string]any{"info_kind": "unknown"}}, want: "infix-if-type:ethernet"}, + {name: "unknown link type", iplink: map[string]any{"ifname": "x", "link_type": "strange"}, want: "infix-if-type:other"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := iplink2yangType(tt.iplink, fc) + if got != tt.want { + t.Fatalf("iplink2yangType() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestIplink2yangOperstateMappings(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"DOWN", "down"}, + {"UP", "up"}, + {"DORMANT", "dormant"}, + {"TESTING", "testing"}, + {"LOWERLAYERDOWN", "lower-layer-down"}, + {"NOTPRESENT", "not-present"}, + {"WHATEVER", "unknown"}, + } + + for _, tt := range tests { + if got := iplink2yangOperstate(tt.in); got != tt.want { + t.Fatalf("iplink2yangOperstate(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +func TestSkipInterface(t *testing.T) { + tests := []struct { + name string + iplink map[string]any + want bool + }{ + {name: "internal group", iplink: map[string]any{"group": "internal"}, want: true}, + {name: "can", iplink: map[string]any{"link_type": "can"}, want: true}, + {name: "vcan", iplink: map[string]any{"link_type": "vcan"}, want: true}, + {name: "normal", iplink: map[string]any{"link_type": "ether"}, want: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := skipInterface(tt.iplink); got != tt.want { + t.Fatalf("skipInterface() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestInet2yangOrigin(t *testing.T) { + tests := []struct { + name string + inet map[string]any + want string + }{ + {name: "kernel_ll", inet: map[string]any{"protocol": "kernel_ll"}, want: "link-layer"}, + {name: "kernel_ra", inet: map[string]any{"protocol": "kernel_ra"}, want: "link-layer"}, + {name: "stable privacy kernel_ll", inet: map[string]any{"protocol": "kernel_ll", "stable-privacy": true}, want: "random"}, + {name: "static", inet: map[string]any{"protocol": "static"}, want: "static"}, + {name: "dhcp", inet: map[string]any{"protocol": "dhcp"}, want: "dhcp"}, + {name: "random", inet: map[string]any{"protocol": "random"}, want: "random"}, + {name: "other", inet: map[string]any{"protocol": "kernel_lo"}, want: "other"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := inet2yangOrigin(tt.inet); got != tt.want { + t.Fatalf("inet2yangOrigin() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestProto2yang(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"802.1Q", "ieee802-dot1q-types:c-vlan"}, + {"802.1ad", "ieee802-dot1q-types:s-vlan"}, + {"something", "other"}, + } + + for _, tt := range tests { + if got := proto2yang(tt.in); got != tt.want { + t.Fatalf("proto2yang(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +func TestLagMode(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"802.3ad", "lacp"}, + {"balance-xor", "static"}, + {"active-backup", "static"}, + } + + for _, tt := range tests { + if got := lagMode(tt.in); got != tt.want { + t.Fatalf("lagMode(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +func TestLagHash(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"layer2", "layer2"}, + {"layer3+4", "layer3-4"}, + {"layer2+3", "layer2-3"}, + {"encap2+3", "encap2-3"}, + {"encap3+4", "encap3-4"}, + {"vlan+srcmac", "vlan-srcmac"}, + {"something-else", "layer2"}, + } + + for _, tt := range tests { + if got := lagHash(tt.in); got != tt.want { + t.Fatalf("lagHash(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +func TestBridgeRouterMode(t *testing.T) { + tests := []struct { + in int + want string + }{ + {0, "off"}, + {1, "auto"}, + {2, "permanent"}, + {9, "UNKNOWN"}, + } + + for _, tt := range tests { + if got := bridgeRouterMode(tt.in); got != tt.want { + t.Fatalf("bridgeRouterMode(%d) = %q, want %q", tt.in, got, tt.want) + } + } +} + +func TestStatistics(t *testing.T) { + t.Run("with stats64", func(t *testing.T) { + st := statistics(map[string]any{ + "stats64": map[string]any{ + "rx": map[string]any{"bytes": json.Number("123")}, + "tx": map[string]any{"bytes": uint64(456)}, + }, + }) + if st["in-octets"] != "123" || st["out-octets"] != "456" { + t.Fatalf("unexpected statistics map: %#v", st) + } + }) + + t.Run("without stats64", func(t *testing.T) { + st := statistics(map[string]any{}) + if len(st) != 0 { + t.Fatalf("expected empty stats, got %#v", st) + } + }) +} + +func TestToCounterString(t *testing.T) { + tests := []struct { + name string + in any + want string + }{ + {name: "int", in: int(7), want: "7"}, + {name: "int64", in: int64(8), want: "8"}, + {name: "uint64", in: uint64(9), want: "9"}, + {name: "float64", in: float64(10.9), want: "10"}, + {name: "json number", in: json.Number("11"), want: "11"}, + {name: "string", in: " 12 ", want: "12"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := toCounterString(tt.in); got != tt.want { + t.Fatalf("toCounterString(%v) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +func TestAddressesFamilyFilter(t *testing.T) { + ipaddr := map[string]any{ + "addr_info": []any{ + map[string]any{"family": "inet", "local": "192.0.2.1", "prefixlen": 24, "protocol": "dhcp"}, + map[string]any{"family": "inet6", "local": "2001:db8::1", "prefixlen": 64, "protocol": "kernel_ra"}, + }, + } + + v4 := addresses(ipaddr, "inet") + if len(v4) != 1 || v4[0]["ip"] != "192.0.2.1" || v4[0]["prefix-length"] != 24 || v4[0]["origin"] != "dhcp" { + t.Fatalf("unexpected inet addresses: %#v", v4) + } + + v6 := addresses(ipaddr, "inet6") + if len(v6) != 1 || v6[0]["ip"] != "2001:db8::1" || v6[0]["prefix-length"] != 64 || v6[0]["origin"] != "link-layer" { + t.Fatalf("unexpected inet6 addresses: %#v", v6) + } +} + +func TestIPv4Data(t *testing.T) { + t.Run("with mtu and addresses", func(t *testing.T) { + in := map[string]any{ + "ifname": "eth0", + "mtu": 1500, + "addr_info": []any{ + map[string]any{"family": "inet", "local": "10.0.0.1", "prefixlen": 24, "protocol": "static"}, + }, + } + out := ipv4Data(in, nil) + if out["mtu"] != 1500 { + t.Fatalf("unexpected mtu: %#v", out) + } + if _, ok := out["address"]; !ok { + t.Fatalf("missing address list: %#v", out) + } + }) + + t.Run("without mtu", func(t *testing.T) { + in := map[string]any{ + "ifname": "eth0", + "addr_info": []any{ + map[string]any{"family": "inet", "local": "10.0.0.2", "prefixlen": 24, "protocol": "static"}, + }, + } + out := ipv4Data(in, nil) + if _, ok := out["mtu"]; ok { + t.Fatalf("did not expect mtu in %#v", out) + } + }) + + t.Run("loopback omits mtu", func(t *testing.T) { + in := map[string]any{"ifname": "lo", "mtu": 65536} + out := ipv4Data(in, nil) + if _, ok := out["mtu"]; ok { + t.Fatalf("loopback must not include mtu: %#v", out) + } + }) +} + +func TestIPv6Data(t *testing.T) { + t.Run("with mtu and addresses", func(t *testing.T) { + in := map[string]any{ + "ifname": "eth0", + "addr_info": []any{ + map[string]any{"family": "inet6", "local": "2001:db8::1", "prefixlen": 64, "protocol": "static"}, + }, + } + fc := &mockFileChecker{files: map[string]string{"/proc/sys/net/ipv6/conf/eth0/mtu": "1280\n"}} + out := ipv6Data(in, nil, fc) + if out["mtu"] != 1280 { + t.Fatalf("unexpected mtu: %#v", out) + } + if _, ok := out["address"]; !ok { + t.Fatalf("missing address list: %#v", out) + } + }) + + t.Run("without mtu from filechecker", func(t *testing.T) { + in := map[string]any{"ifname": "eth1"} + fc := &mockFileChecker{readErr: map[string]error{"/proc/sys/net/ipv6/conf/eth1/mtu": errors.New("no file")}} + out := ipv6Data(in, nil, fc) + if _, ok := out["mtu"]; ok { + t.Fatalf("did not expect mtu in %#v", out) + } + }) + + t.Run("without addresses", func(t *testing.T) { + in := map[string]any{"ifname": "eth2"} + out := ipv6Data(in, nil, nil) + if len(out) != 0 { + t.Fatalf("expected empty ipv6 map, got %#v", out) + } + }) +} + +func TestNeighbors(t *testing.T) { + t.Run("ipv4 static and dynamic", func(t *testing.T) { + link := []map[string]any{ + {"ifindex": 2, "ifname": "eth0", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP", "address": "02:00:00:00:00:01"}, + } + neighs := []map[string]any{ + {"dst": "192.168.1.1", "dev": "eth0", "lladdr": "aa:bb:cc:dd:ee:ff", "state": []any{"REACHABLE"}}, + {"dst": "192.168.1.2", "dev": "eth0", "lladdr": "11:22:33:44:55:66", "state": []any{"PERMANENT"}}, + {"dst": "2001:db8::1", "dev": "eth0", "lladdr": "aa:bb:cc:dd:ee:01", "state": []any{"STALE"}}, + } + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, mustRaw(t, neighs), nil)) + eth0 := mustIfaceByName(t, ifaces, "eth0") + + ipv4, ok := eth0["ietf-ip:ipv4"].(map[string]any) + if !ok { + t.Fatalf("missing ipv4: %#v", eth0) + } + v4neighs, ok := ipv4["neighbor"].([]any) + if !ok || len(v4neighs) != 2 { + t.Fatalf("expected 2 ipv4 neighbors, got %#v", ipv4["neighbor"]) + } + + n0 := v4neighs[0].(map[string]any) + if n0["ip"] != "192.168.1.1" || n0["link-layer-address"] != "aa:bb:cc:dd:ee:ff" || n0["origin"] != "dynamic" { + t.Fatalf("unexpected neighbor[0]: %#v", n0) + } + n1 := v4neighs[1].(map[string]any) + if n1["origin"] != "static" { + t.Fatalf("expected static origin: %#v", n1) + } + }) + + t.Run("ipv6 with state and is-router", func(t *testing.T) { + link := []map[string]any{ + {"ifindex": 2, "ifname": "eth0", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP", "address": "02:00:00:00:00:01"}, + } + neighs := []map[string]any{ + {"dst": "2001:db8::1", "dev": "eth0", "lladdr": "aa:bb:cc:dd:ee:01", "state": []any{"STALE"}, "router": true}, + } + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, mustRaw(t, neighs), nil)) + eth0 := mustIfaceByName(t, ifaces, "eth0") + + ipv6, ok := eth0["ietf-ip:ipv6"].(map[string]any) + if !ok { + t.Fatalf("missing ipv6: %#v", eth0) + } + v6neighs, ok := ipv6["neighbor"].([]any) + if !ok || len(v6neighs) != 1 { + t.Fatalf("expected 1 ipv6 neighbor, got %#v", ipv6["neighbor"]) + } + + n := v6neighs[0].(map[string]any) + if n["state"] != "stale" { + t.Fatalf("expected stale state: %#v", n) + } + if _, ok := n["is-router"]; !ok { + t.Fatalf("expected is-router: %#v", n) + } + }) + + t.Run("skips entries without lladdr", func(t *testing.T) { + link := []map[string]any{ + {"ifindex": 2, "ifname": "eth0", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP", "address": "02:00:00:00:00:01"}, + } + neighs := []map[string]any{ + {"dst": "192.168.1.1", "dev": "eth0", "state": []any{"INCOMPLETE"}}, + } + + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, mustRaw(t, neighs), nil)) + eth0 := mustIfaceByName(t, ifaces, "eth0") + + if _, ok := eth0["ietf-ip:ipv4"]; ok { + t.Fatalf("should not have ipv4 with no valid neighbors: %#v", eth0) + } + }) +} + +func TestDedupByIfindex(t *testing.T) { + t.Run("keeps UP over DOWN for same ifindex", func(t *testing.T) { + link := []map[string]any{ + {"ifindex": 2, "ifname": "eth0", "flags": []any{}, "link_type": "ether", "operstate": "DOWN", "address": "02:00:00:00:00:01"}, + {"ifindex": 2, "ifname": "e1", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP", "address": "02:00:00:00:00:01"}, + } + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + if len(ifaces) != 1 { + t.Fatalf("expected 1 interface after dedup, got %d", len(ifaces)) + } + if ifaces[0]["name"] != "e1" { + t.Fatalf("expected e1 to survive dedup, got %v", ifaces[0]["name"]) + } + }) + + t.Run("keeps first when both DOWN", func(t *testing.T) { + link := []map[string]any{ + {"ifindex": 3, "ifname": "a0", "flags": []any{}, "link_type": "ether", "operstate": "DOWN"}, + {"ifindex": 3, "ifname": "a1", "flags": []any{}, "link_type": "ether", "operstate": "DOWN"}, + } + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + if len(ifaces) != 1 { + t.Fatalf("expected 1 interface after dedup, got %d", len(ifaces)) + } + if ifaces[0]["name"] != "a0" { + t.Fatalf("expected a0 to survive dedup, got %v", ifaces[0]["name"]) + } + }) + + t.Run("different ifindex not deduped", func(t *testing.T) { + link := []map[string]any{ + {"ifindex": 1, "ifname": "lo", "flags": []any{"LOOPBACK", "UP"}, "link_type": "loopback", "operstate": "UNKNOWN"}, + {"ifindex": 2, "ifname": "e1", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP"}, + } + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + if len(ifaces) != 2 { + t.Fatalf("expected 2 interfaces, got %d", len(ifaces)) + } + }) + + t.Run("zero ifindex entries kept as-is", func(t *testing.T) { + link := []map[string]any{ + {"ifname": "x0", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP"}, + {"ifname": "x1", "flags": []any{"UP"}, "link_type": "ether", "operstate": "UP"}, + } + ifaces := mustInterfaces(t, Transform(mustRaw(t, link), nil, nil, nil, nil)) + if len(ifaces) != 2 { + t.Fatalf("expected 2 interfaces (zero ifindex not deduped), got %d", len(ifaces)) + } + }) +} diff --git a/src/yangerd/internal/ipbatch/ipbatch.go b/src/yangerd/internal/ipbatch/ipbatch.go new file mode 100644 index 000000000..eb36635e6 --- /dev/null +++ b/src/yangerd/internal/ipbatch/ipbatch.go @@ -0,0 +1,265 @@ +// Package ipbatch manages a persistent `ip -json [-s] [-d] -force -batch -` +// subprocess. Commands sent via Query are serialized by a mutex and +// paired with the single JSON-array line the subprocess writes to +// stdout. The caller chooses global flags via functional options: +// WithStats adds -s (statistics) and WithDetails adds -d (details). +// +// IMPORTANT: When -s is present, `link show` commands produce multiple +// lines of output — breaking the one-command-one-line protocol used by +// Query. Address queries must therefore use a separate IPBatch instance +// that omits -s (use WithDetails only). +// +// IMPORTANT: `ip -force -batch -` produces NO stdout for commands that +// fail (e.g. "link show dev "). Query uses a read timeout +// to detect this and kills the subprocess so restartLoop can recover. +// +// On subprocess death the manager enters a dead state and attempts +// automatic restart with exponential backoff. +package ipbatch + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "math" + "os/exec" + "sync" + "sync/atomic" + "time" +) + +// ErrBatchDead is returned by Query when the subprocess is not running. +// Callers should treat it as transient and retry on the next event. +var ErrBatchDead = errors.New("ip batch process is dead") + +const ( + canaryCommand = "link show lo" + + queryTimeout = 5 * time.Second + reconnectInitial = 100 * time.Millisecond + reconnectMax = 30 * time.Second + reconnectFactor = 2.0 +) + +// Option configures an IPBatch instance. +type Option func(*IPBatch) + +// WithStats adds -s (statistics) to the ip command. +func WithStats() Option { return func(b *IPBatch) { b.stats = true } } + +// WithDetails adds -d (details) to the ip command. +func WithDetails() Option { return func(b *IPBatch) { b.details = true } } + +// IPBatch wraps a persistent `ip -json -force -batch -` subprocess. +type IPBatch struct { + cmd *exec.Cmd + stdin io.WriteCloser + lines chan []byte + stderr io.ReadCloser + mu sync.Mutex // serializes queries + alive atomic.Bool + log *slog.Logger + ctx context.Context + cancel context.CancelFunc + + stats bool + details bool +} + +// New spawns the ip batch subprocess. The returned IPBatch is ready +// for Query calls. A background goroutine drains stderr. +func New(ctx context.Context, log *slog.Logger, opts ...Option) (*IPBatch, error) { + ctx, cancel := context.WithCancel(ctx) + b := &IPBatch{ + log: log, + ctx: ctx, + cancel: cancel, + } + for _, o := range opts { + o(b) + } + if err := b.start(); err != nil { + cancel() + return nil, err + } + go b.restartLoop() + return b, nil +} + +func (b *IPBatch) start() error { + args := []string{"-json"} + if b.stats { + args = append(args, "-s") + } + if b.details { + args = append(args, "-d") + } + args = append(args, "-force", "-batch", "-") + + cmd := exec.CommandContext(b.ctx, "ip", args...) + stdin, err := cmd.StdinPipe() + if err != nil { + return fmt.Errorf("stdin pipe: %w", err) + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("start ip batch: %w", err) + } + b.mu.Lock() + b.cmd = cmd + b.stdin = stdin + b.lines = make(chan []byte, 8) + b.stderr = stderr + b.alive.Store(true) + b.mu.Unlock() + go b.readLines(stdout) + go b.drainStderr() + return nil +} + +func (b *IPBatch) readLines(r io.Reader) { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 4*1024*1024), 4*1024*1024) + for scanner.Scan() { + line := make([]byte, len(scanner.Bytes())) + copy(line, scanner.Bytes()) + b.lines <- line + } + b.alive.Store(false) +} + +// Query sends a command to the ip batch process and returns the JSON +// response. Commands are newline-terminated (e.g. "link show dev eth0"). +// Each command produces exactly one line of JSON array output. If the +// subprocess produces no output (e.g. querying a non-existent device), +// Query times out and kills the subprocess for recovery. +func (b *IPBatch) Query(command string) (json.RawMessage, error) { + if !b.alive.Load() { + return nil, ErrBatchDead + } + b.mu.Lock() + defer b.mu.Unlock() + + if !b.alive.Load() { + return nil, ErrBatchDead + } + + if _, err := fmt.Fprintf(b.stdin, "%s\n", command); err != nil { + b.alive.Store(false) + return nil, fmt.Errorf("write command: %w", err) + } + + select { + case line, ok := <-b.lines: + if !ok { + b.alive.Store(false) + return nil, fmt.Errorf("ip batch process exited") + } + b.log.Debug("ipbatch query", "cmd", command, "respLen", len(line)) + return json.RawMessage(line), nil + case <-time.After(queryTimeout): + b.log.Warn("ip batch query timeout, killing subprocess", "cmd", command) + b.alive.Store(false) + if b.cmd != nil && b.cmd.Process != nil { + b.cmd.Process.Kill() + } + return nil, fmt.Errorf("timeout waiting for response to: %s", command) + } +} + +// Close terminates the subprocess and cancels the restart loop. +func (b *IPBatch) Close() { + b.cancel() + b.mu.Lock() + if b.stdin != nil { + b.stdin.Close() + } + if b.cmd != nil && b.cmd.Process != nil { + b.cmd.Process.Kill() + } + b.alive.Store(false) + b.mu.Unlock() +} + +// Status returns "running", "restarting", or "failed". +func (b *IPBatch) Status() string { + if b.alive.Load() { + return "running" + } + return "restarting" +} + +func (b *IPBatch) drainStderr() { + scanner := bufio.NewScanner(b.stderr) + for scanner.Scan() { + b.log.Warn("ip batch stderr", "line", scanner.Text()) + } +} + +// restartLoop runs in the background and respawns the subprocess when +// it dies. Uses exponential backoff: 100ms initial, 30s max, 2x factor. +// After a successful restart, a canary query validates the new process. +func (b *IPBatch) restartLoop() { + delay := reconnectInitial + for { + select { + case <-b.ctx.Done(): + return + default: + } + + if b.alive.Load() { + select { + case <-b.ctx.Done(): + return + case <-time.After(200 * time.Millisecond): + continue + } + } + + b.log.Info("ip batch: subprocess died, restarting", "delay", delay) + select { + case <-b.ctx.Done(): + return + case <-time.After(delay): + } + + b.mu.Lock() + if b.cmd != nil && b.cmd.Process != nil { + b.cmd.Process.Kill() + b.cmd.Wait() + } + b.mu.Unlock() + + if err := b.start(); err != nil { + b.log.Warn("ip batch: restart failed", "err", err) + delay = time.Duration(math.Min( + float64(delay)*reconnectFactor, + float64(reconnectMax))) + continue + } + + if _, err := b.Query(canaryCommand); err != nil { + b.log.Warn("ip batch: canary query failed", "err", err) + b.alive.Store(false) + delay = time.Duration(math.Min( + float64(delay)*reconnectFactor, + float64(reconnectMax))) + continue + } + + b.log.Info("ip batch: restarted successfully") + delay = reconnectInitial + } +} diff --git a/src/yangerd/internal/ipc/client.go b/src/yangerd/internal/ipc/client.go new file mode 100644 index 000000000..0a9de57dc --- /dev/null +++ b/src/yangerd/internal/ipc/client.go @@ -0,0 +1,56 @@ +package ipc + +import ( + "encoding/json" + "fmt" + "net" + "time" +) + +// Client connects to a yangerd Unix socket and issues IPC requests. +type Client struct { + addr string + timeout time.Duration +} + +// NewClient returns a Client that connects to the given socket path +// with per-request timeout. +func NewClient(socketPath string, timeout time.Duration) *Client { + return &Client{ + addr: socketPath, + timeout: timeout, + } +} + +// Get queries a YANG subtree by path. Path "/" returns all models. +func (c *Client) Get(path string) (*Response, error) { + return c.call(&Request{Method: "get", Path: path}) +} + +// Health returns per-model freshness metadata. +func (c *Client) Health() (*Response, error) { + return c.call(&Request{Method: "health"}) +} + +func (c *Client) call(req *Request) (*Response, error) { + conn, err := net.DialTimeout("unix", c.addr, c.timeout) + if err != nil { + return nil, fmt.Errorf("connect %s: %w", c.addr, err) + } + defer conn.Close() + + conn.SetDeadline(time.Now().Add(c.timeout)) + + payload, err := json.Marshal(req) + if err != nil { + return nil, err + } + if err := WriteFrame(conn, payload); err != nil { + return nil, fmt.Errorf("write request: %w", err) + } + resp, err := ReadResponse(conn) + if err != nil { + return nil, fmt.Errorf("read response: %w", err) + } + return resp, nil +} diff --git a/src/yangerd/internal/ipc/protocol.go b/src/yangerd/internal/ipc/protocol.go new file mode 100644 index 000000000..b6da032a8 --- /dev/null +++ b/src/yangerd/internal/ipc/protocol.go @@ -0,0 +1,116 @@ +// Package ipc implements the yangerd IPC protocol: a versioned, +// length-prefixed JSON framing over AF_UNIX SOCK_STREAM. +// +// Wire format: +// +// +--------+--------+--------+--------+--------+--- ... ---+ +// | ver(1) | length (uint32 big-endian, bytes) | JSON body | +// +--------+--------+--------+--------+--------+--- ... ---+ +package ipc + +import ( + "encoding/binary" + "encoding/json" + "fmt" + "io" +) + +const ( + // Version is the current protocol version. + Version byte = 1 + + // MaxPayload is the maximum JSON body size (4 MiB). + MaxPayload = 4 << 20 + + headerSize = 5 // 1 byte version + 4 bytes length +) + +// Request is the IPC request from a client. +type Request struct { + Method string `json:"method"` + Path string `json:"path,omitempty"` + Filter map[string]string `json:"filter,omitempty"` +} + +// Response is the IPC response to a client. +type Response struct { + Status string `json:"status"` + Code int `json:"code,omitempty"` + Message string `json:"message,omitempty"` + + // Used by "get" responses. + Data json.RawMessage `json:"data,omitempty"` + + // Used by "health" responses. + Subsystems map[string]json.RawMessage `json:"subsystems,omitempty"` + Models map[string]json.RawMessage `json:"models,omitempty"` +} + +// WriteFrame writes a versioned, length-prefixed frame to w. +func WriteFrame(w io.Writer, payload []byte) error { + if len(payload) > MaxPayload { + return fmt.Errorf("payload size %d exceeds maximum %d", len(payload), MaxPayload) + } + hdr := [headerSize]byte{Version} + binary.BigEndian.PutUint32(hdr[1:], uint32(len(payload))) + if _, err := w.Write(hdr[:]); err != nil { + return err + } + _, err := w.Write(payload) + return err +} + +// ReadFrame reads a versioned, length-prefixed frame from r. +func ReadFrame(r io.Reader) ([]byte, error) { + var hdr [headerSize]byte + if _, err := io.ReadFull(r, hdr[:]); err != nil { + return nil, err + } + if hdr[0] != Version { + return nil, fmt.Errorf("protocol version mismatch: got %d, want %d", hdr[0], Version) + } + length := binary.BigEndian.Uint32(hdr[1:]) + if length > MaxPayload { + return nil, fmt.Errorf("payload size %d exceeds maximum %d", length, MaxPayload) + } + buf := make([]byte, length) + if _, err := io.ReadFull(r, buf); err != nil { + return nil, err + } + return buf, nil +} + +// WriteResponse marshals a Response and writes it as a framed message. +func WriteResponse(w io.Writer, resp *Response) error { + data, err := json.Marshal(resp) + if err != nil { + return err + } + return WriteFrame(w, data) +} + +// ReadRequest reads and unmarshals a framed Request. +func ReadRequest(r io.Reader) (*Request, error) { + data, err := ReadFrame(r) + if err != nil { + return nil, err + } + var req Request + if err := json.Unmarshal(data, &req); err != nil { + return nil, fmt.Errorf("invalid request JSON: %w", err) + } + return &req, nil +} + +// ReadResponse reads and unmarshals a framed Response. +func ReadResponse(r io.Reader) (*Response, error) { + data, err := ReadFrame(r) + if err != nil { + return nil, err + } + var resp Response + if err := json.Unmarshal(data, &resp); err != nil { + return nil, fmt.Errorf("invalid response JSON: %w", err) + } + return &resp, nil +} diff --git a/src/yangerd/internal/ipc/protocol_test.go b/src/yangerd/internal/ipc/protocol_test.go new file mode 100644 index 000000000..5ce47b7c1 --- /dev/null +++ b/src/yangerd/internal/ipc/protocol_test.go @@ -0,0 +1,89 @@ +package ipc + +import ( + "bytes" + "encoding/json" + "testing" +) + +func TestFrameRoundTrip(t *testing.T) { + payload := []byte(`{"method":"get","path":"/test"}`) + var buf bytes.Buffer + + if err := WriteFrame(&buf, payload); err != nil { + t.Fatal(err) + } + + got, err := ReadFrame(&buf) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, payload) { + t.Fatalf("mismatch: %s vs %s", got, payload) + } +} + +func TestFrameVersionMismatch(t *testing.T) { + var buf bytes.Buffer + buf.Write([]byte{99, 0, 0, 0, 2, '{', '}'}) + + _, err := ReadFrame(&buf) + if err == nil { + t.Fatal("expected version mismatch error") + } +} + +func TestFrameOversized(t *testing.T) { + var buf bytes.Buffer + huge := make([]byte, MaxPayload+1) + if err := WriteFrame(&buf, huge); err == nil { + t.Fatal("expected oversized payload error") + } +} + +func TestRequestResponseRoundTrip(t *testing.T) { + var buf bytes.Buffer + + req := &Request{Method: "get", Path: "/ietf-system:system-state"} + data, _ := json.Marshal(req) + WriteFrame(&buf, data) + + got, err := ReadRequest(&buf) + if err != nil { + t.Fatal(err) + } + if got.Method != "get" || got.Path != "/ietf-system:system-state" { + t.Fatalf("unexpected request: %+v", got) + } +} + +func TestResponseRoundTrip(t *testing.T) { + var buf bytes.Buffer + + resp := &Response{ + Status: "ok", + Data: json.RawMessage(`{"hostname":"r1"}`), + } + WriteResponse(&buf, resp) + + got, err := ReadResponse(&buf) + if err != nil { + t.Fatal(err) + } + if got.Status != "ok" || string(got.Data) != `{"hostname":"r1"}` { + t.Fatalf("unexpected response: %+v", got) + } +} + +func TestEmptyFrame(t *testing.T) { + var buf bytes.Buffer + WriteFrame(&buf, []byte{}) + + got, err := ReadFrame(&buf) + if err != nil { + t.Fatal(err) + } + if len(got) != 0 { + t.Fatalf("expected empty, got %d bytes", len(got)) + } +} diff --git a/src/yangerd/internal/ipc/server.go b/src/yangerd/internal/ipc/server.go new file mode 100644 index 000000000..b4c9b6465 --- /dev/null +++ b/src/yangerd/internal/ipc/server.go @@ -0,0 +1,215 @@ +package ipc + +import ( + "context" + "encoding/json" + "log" + "net" + "os" + "sync" + "sync/atomic" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// Server listens on an AF_UNIX SOCK_STREAM socket and serves +// YANG operational data from an in-memory Tree. +type Server struct { + tree *tree.Tree + listener net.Listener + ready *atomic.Bool + wg sync.WaitGroup +} + +// NewServer creates a Server that serves data from the given Tree. +// While ready is false, all requests receive a 503 "starting" response. +func NewServer(t *tree.Tree, ready *atomic.Bool) *Server { + return &Server{ + tree: t, + ready: ready, + } +} + +// Listen creates and binds a Unix domain socket at path. +// A stale socket file is removed before binding. +func (s *Server) Listen(path string) error { + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return err + } + ln, err := net.Listen("unix", path) + if err != nil { + return err + } + if err := os.Chmod(path, 0660); err != nil { + ln.Close() + return err + } + s.listener = ln + return nil +} + +// Serve accepts connections until ctx is cancelled. Each connection +// is handled in its own goroutine. +func (s *Server) Serve(ctx context.Context) error { + go func() { + <-ctx.Done() + s.listener.Close() + }() + + for { + conn, err := s.listener.Accept() + if err != nil { + // Listener closed by context cancellation — normal shutdown. + select { + case <-ctx.Done(): + s.wg.Wait() + return nil + default: + return err + } + } + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.handleConn(conn) + }() + } +} + +// Addr returns the listener address, or empty string if not listening. +func (s *Server) Addr() string { + if s.listener == nil { + return "" + } + return s.listener.Addr().String() +} + +func (s *Server) handleConn(conn net.Conn) { + defer conn.Close() + + req, err := ReadRequest(conn) + if err != nil { + log.Printf("ipc: read request: %v", err) + return + } + + if !s.ready.Load() { + WriteResponse(conn, &Response{ + Status: "starting", + Code: 503, + Message: "yangerd is starting up", + }) + return + } + + switch req.Method { + case "get": + s.handleGet(conn, req) + case "health": + s.handleHealth(conn) + default: + WriteResponse(conn, &Response{ + Status: "error", + Code: 400, + Message: "unknown method: " + req.Method, + }) + } +} + +func (s *Server) handleGet(conn net.Conn, req *Request) { + path := req.Path + if path == "" || path == "/" { + s.handleDump(conn) + return + } + + key := path + if key[0] == '/' { + key = key[1:] + } + + data := s.tree.Get(key) + if data == nil { + // An absent subtree is a normal answer for operational data -- + // the feature is simply not active (e.g. NTP unconfigured). + // Answer ok with an empty object rather than an error, so every + // client gets "no data" without special-casing. Deliberately + // NOT {"": {}}: that would make libyang instantiate the + // container, which for presence containers is real data. + WriteResponse(conn, &Response{ + Status: "ok", + Data: json.RawMessage(`{}`), + }) + return + } + + envelope := map[string]json.RawMessage{key: data} + body, err := json.Marshal(envelope) + if err != nil { + WriteResponse(conn, &Response{ + Status: "error", + Code: 500, + Message: "marshal error: " + err.Error(), + }) + return + } + + WriteResponse(conn, &Response{ + Status: "ok", + Data: body, + }) +} + +func (s *Server) handleDump(conn net.Conn) { + keys := s.tree.Keys() + blobs := s.tree.GetMulti(keys) + + all := make(map[string]json.RawMessage, len(keys)) + for i, k := range keys { + if i < len(blobs) { + all[k] = blobs[i] + } + } + + body, err := json.Marshal(all) + if err != nil { + WriteResponse(conn, &Response{ + Status: "error", + Code: 500, + Message: "marshal error: " + err.Error(), + }) + return + } + + WriteResponse(conn, &Response{ + Status: "ok", + Data: body, + }) +} + +func (s *Server) handleHealth(conn net.Conn) { + keys := s.tree.Keys() + models := make(map[string]json.RawMessage, len(keys)) + + for _, k := range keys { + info, ok := s.tree.Info(k) + if !ok { + continue + } + entry := struct { + LastUpdated string `json:"last_updated"` + SizeBytes int `json:"size_bytes"` + }{ + LastUpdated: info.LastUpdated.UTC().Format(time.RFC3339), + SizeBytes: info.SizeBytes, + } + b, _ := json.Marshal(entry) + models[k] = b + } + + WriteResponse(conn, &Response{ + Status: "ok", + Models: models, + }) +} diff --git a/src/yangerd/internal/ipc/server_test.go b/src/yangerd/internal/ipc/server_test.go new file mode 100644 index 000000000..f329308b3 --- /dev/null +++ b/src/yangerd/internal/ipc/server_test.go @@ -0,0 +1,140 @@ +package ipc + +import ( + "context" + "encoding/json" + "net" + "os" + "path/filepath" + "sync/atomic" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +func TestServerGetSingle(t *testing.T) { + tr := tree.New() + tr.Set("ietf-system:system-state", json.RawMessage(`{"platform":{"os-name":"Infix"}}`)) + + resp := serverRoundTrip(t, tr, true, &Request{Method: "get", Path: "/ietf-system:system-state"}) + + if resp.Status != "ok" { + t.Fatalf("expected ok, got %s: %s", resp.Status, resp.Message) + } + var data map[string]json.RawMessage + json.Unmarshal(resp.Data, &data) + if _, ok := data["ietf-system:system-state"]; !ok { + t.Fatalf("missing key in response data: %s", resp.Data) + } +} + +func TestServerGetNotFound(t *testing.T) { + tr := tree.New() + resp := serverRoundTrip(t, tr, true, &Request{Method: "get", Path: "/nonexistent"}) + + // An absent subtree is "no data", not an error: ok + empty object, + // so clients (statd, yangerctl) need no special-casing. + if resp.Status != "ok" { + t.Fatalf("expected ok, got %+v", resp) + } + if string(resp.Data) != "{}" { + t.Fatalf("expected empty object data, got %s", resp.Data) + } +} + +func TestServerDump(t *testing.T) { + tr := tree.New() + tr.Set("a", json.RawMessage(`1`)) + tr.Set("b", json.RawMessage(`2`)) + + resp := serverRoundTrip(t, tr, true, &Request{Method: "get", Path: "/"}) + + if resp.Status != "ok" { + t.Fatalf("expected ok, got %s: %s", resp.Status, resp.Message) + } + var data map[string]json.RawMessage + json.Unmarshal(resp.Data, &data) + if len(data) != 2 { + t.Fatalf("expected 2 models in dump, got %d", len(data)) + } +} + +func TestServerHealth(t *testing.T) { + tr := tree.New() + tr.Set("model-a", json.RawMessage(`{}`)) + + resp := serverRoundTrip(t, tr, true, &Request{Method: "health"}) + + if resp.Status != "ok" { + t.Fatalf("expected ok, got %s", resp.Status) + } + if _, ok := resp.Models["model-a"]; !ok { + t.Fatalf("expected model-a in health models, got %v", resp.Models) + } +} + +func TestServerNotReady(t *testing.T) { + tr := tree.New() + resp := serverRoundTrip(t, tr, false, &Request{Method: "get", Path: "/"}) + + if resp.Status != "starting" || resp.Code != 503 { + t.Fatalf("expected 503 starting, got %+v", resp) + } +} + +func TestServerUnknownMethod(t *testing.T) { + tr := tree.New() + resp := serverRoundTrip(t, tr, true, &Request{Method: "invalid"}) + + if resp.Status != "error" || resp.Code != 400 { + t.Fatalf("expected 400 error, got %+v", resp) + } +} + +func serverRoundTrip(t *testing.T, tr *tree.Tree, ready bool, req *Request) *Response { + t.Helper() + + sockPath := filepath.Join(t.TempDir(), "test.sock") + readyFlag := &atomic.Bool{} + readyFlag.Store(ready) + + srv := NewServer(tr, readyFlag) + if err := srv.Listen(sockPath); err != nil { + t.Fatal(err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error, 1) + go func() { + done <- srv.Serve(ctx) + }() + + time.Sleep(10 * time.Millisecond) + + conn, err := net.Dial("unix", sockPath) + if err != nil { + t.Fatal(err) + } + defer conn.Close() + + payload, _ := json.Marshal(req) + if err := WriteFrame(conn, payload); err != nil { + t.Fatal(err) + } + + resp, err := ReadResponse(conn) + if err != nil { + t.Fatal(err) + } + + cancel() + + if _, err := os.Stat(sockPath); err == nil { + os.Remove(sockPath) + } + + return resp +} diff --git a/src/yangerd/internal/iwmonitor/ap_test.go b/src/yangerd/internal/iwmonitor/ap_test.go new file mode 100644 index 000000000..ccdd3238d --- /dev/null +++ b/src/yangerd/internal/iwmonitor/ap_test.go @@ -0,0 +1,126 @@ +package iwmonitor + +import ( + "encoding/json" + "testing" + + "github.com/kernelkit/infix/src/yangerd/internal/wpactrl" +) + +func TestFormatStations(t *testing.T) { + m := &IWMonitor{} + stas := []map[string]string{ + { + "addr": "02:00:00:00:00:01", + "signal": "-57", + "connected_time": "120", + "rx_packets": "1500", + "tx_packets": "2500", + "rx_bytes": "4825331939", + "tx_bytes": "216392802676", + "rx_rate_info": "1560 vhtmcs 8 vhtnss 2", + "tx_rate_info": "1733 vhtmcs 9 vhtnss 2", + }, + } + + raw, err := json.Marshal(m.formatStations(stas)) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var parsed struct { + Station []struct { + MAC string `json:"mac-address"` + Signal int16 `json:"signal-strength"` + ConnectedTime uint32 `json:"connected-time"` + RxPackets string `json:"rx-packets"` + TxPackets string `json:"tx-packets"` + RxBytes string `json:"rx-bytes"` + TxBytes string `json:"tx-bytes"` + RxSpeed uint32 `json:"rx-speed"` + TxSpeed uint32 `json:"tx-speed"` + } `json:"station"` + } + if err := json.Unmarshal(raw, &parsed); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if len(parsed.Station) != 1 { + t.Fatalf("got %d stations, want 1", len(parsed.Station)) + } + + s := parsed.Station[0] + if s.MAC != "02:00:00:00:00:01" { + t.Errorf("mac-address = %q", s.MAC) + } + if s.Signal != -57 { + t.Errorf("signal-strength = %d, want -57", s.Signal) + } + if s.ConnectedTime != 120 { + t.Errorf("connected-time = %d, want 120", s.ConnectedTime) + } + if s.RxBytes != "4825331939" || s.TxBytes != "216392802676" { + t.Errorf("bytes = %q/%q", s.RxBytes, s.TxBytes) + } + if s.RxPackets != "1500" || s.TxPackets != "2500" { + t.Errorf("packets = %q/%q", s.RxPackets, s.TxPackets) + } + // hostapd rate info is already in 100kbps units + if s.RxSpeed != 1560 || s.TxSpeed != 1733 { + t.Errorf("speed = %d/%d, want 1560/1733", s.RxSpeed, s.TxSpeed) + } +} + +func TestFilterAuthorized(t *testing.T) { + stas := []map[string]string{ + {"addr": "02:00:00:00:00:01", "flags": "[AUTH][ASSOC][AUTHORIZED]"}, + {"addr": "02:00:00:00:00:02", "flags": "[AUTH][ASSOC]"}, // mid-handshake + {"addr": "02:00:00:00:00:03", "flags": "[AUTH][ASSOC][AUTHORIZED][SHORT_PREAMBLE]"}, + } + + out := filterAuthorized(stas) + if len(out) != 2 { + t.Fatalf("got %d stations, want 2", len(out)) + } + if out[0]["addr"] != "02:00:00:00:00:01" || out[1]["addr"] != "02:00:00:00:00:03" { + t.Errorf("addrs = %q, %q", out[0]["addr"], out[1]["addr"]) + } +} + +func TestResolveSSIDHostapd(t *testing.T) { + // hostapd STATUS reports bss[N]= / ssid[N]= pairs; + // multi-BSS setups must resolve by interface name. + status := map[string]string{ + "state": "ENABLED", + "bss[0]": "wlan0", + "ssid[0]": "Lobby", + "bss[1]": "wlan0_1", + "ssid[1]": "Office", + } + + si := wpactrl.SocketInfo{Iface: "wlan0_1", Daemon: "hostapd"} + if got := resolveSSID("wlan0_1", si, status); got != "Office" { + t.Errorf("resolveSSID(wlan0_1) = %q, want Office", got) + } + si = wpactrl.SocketInfo{Iface: "wlan0", Daemon: "hostapd"} + if got := resolveSSID("wlan0", si, status); got != "Lobby" { + t.Errorf("resolveSSID(wlan0) = %q, want Lobby", got) + } + if got := resolveSSID("wlan9", si, status); got != "" { + t.Errorf("resolveSSID(wlan9) = %q, want empty", got) + } +} + +func TestParseBitrate(t *testing.T) { + cases := map[string]uint32{ + "1560 vhtmcs 8 vhtnss 2": 1560, // hostapd: 100kbps units + "866.7 MBit/s VHT-MCS 9": 8667, // iw: MBit/s -> 100kbps + "54.0 MBit/s": 540, + "": 0, + "garbage rate": 0, + } + for in, want := range cases { + if got := parseBitrate(in); got != want { + t.Errorf("parseBitrate(%q) = %d, want %d", in, got, want) + } + } +} diff --git a/src/yangerd/internal/iwmonitor/data.go b/src/yangerd/internal/iwmonitor/data.go new file mode 100644 index 000000000..47709a913 --- /dev/null +++ b/src/yangerd/internal/iwmonitor/data.go @@ -0,0 +1,271 @@ +package iwmonitor + +import ( + "encoding/json" + "strconv" + "strings" + + "github.com/kernelkit/infix/src/yangerd/internal/wpactrl" +) + +func parseIWInfo(output string) json.RawMessage { + info := make(map[string]string) + for _, line := range strings.Split(output, "\n") { + if k, v, ok := parseKV(strings.TrimSpace(line)); ok { + switch k { + case "ssid": + info["ssid"] = v + case "type": + info["type"] = v + case "channel": + info["channel"] = v + case "txpower": + info["tx-power"] = v + } + } + } + data, _ := json.Marshal(info) + return json.RawMessage(data) +} + +func parseIWDevList(output string) []string { + var ifaces []string + for _, line := range strings.Split(output, "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "Interface ") { + if name := strings.TrimPrefix(line, "Interface "); name != "" { + ifaces = append(ifaces, name) + } + } + } + return ifaces +} + +func parseKV(line string) (string, string, bool) { + idx := strings.Index(line, ":") + if idx < 0 { + return "", "", false + } + k := strings.TrimSpace(line[:idx]) + v := strings.TrimSpace(line[idx+1:]) + return k, v, k != "" +} + +func parseIWLink(output string) map[string]string { + m := make(map[string]string) + for _, line := range strings.Split(output, "\n") { + if k, v, ok := parseKV(strings.TrimSpace(line)); ok { + m[k] = v + } + } + return m +} + +func parseStationDump(output string) json.RawMessage { + type station struct { + MAC string `json:"mac"` + Signal string `json:"signal,omitempty"` + RxBytes string `json:"rx-bytes,omitempty"` + TxBytes string `json:"tx-bytes,omitempty"` + Connected string `json:"connected-time,omitempty"` + Inactive string `json:"inactive-time,omitempty"` + RxBitrate string `json:"rx-bitrate,omitempty"` + TxBitrate string `json:"tx-bitrate,omitempty"` + Authorized string `json:"authorized,omitempty"` + } + var stations []station + var current *station + + for _, line := range strings.Split(output, "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "Station ") { + parts := strings.Fields(line) + if len(parts) >= 2 { + s := station{MAC: parts[1]} + stations = append(stations, s) + current = &stations[len(stations)-1] + } + continue + } + if current == nil { + continue + } + if k, v, ok := parseKV(line); ok { + switch k { + case "signal": + current.Signal = v + case "rx bytes": + current.RxBytes = v + case "tx bytes": + current.TxBytes = v + case "connected time": + current.Connected = v + case "inactive time": + current.Inactive = v + case "rx bitrate": + current.RxBitrate = v + case "tx bitrate": + current.TxBitrate = v + case "authorized": + current.Authorized = v + } + } + } + + data, _ := json.Marshal(stations) + return json.RawMessage(data) +} + +// parseBitrate extracts the speed in 100kbps units from iw/hostapd rate info. +// iw link: "866.7 MBit/s VHT-MCS 9 ..." +// hostapd: "1560 vhtmcs 8 vhtnss 2" (value in 100kbps) +func parseBitrate(s string) uint32 { + s = strings.TrimSpace(s) + if s == "" { + return 0 + } + fields := strings.Fields(s) + if len(fields) == 0 { + return 0 + } + if strings.Contains(s, "MBit/s") { + val, err := strconv.ParseFloat(fields[0], 64) + if err != nil { + return 0 + } + return uint32(val * 10) + } + val, err := strconv.ParseUint(fields[0], 10, 32) + if err != nil { + return 0 + } + return uint32(val) +} + +func extractEncryption(flags string) []string { + flags = strings.ToUpper(flags) + var result []string + if strings.Contains(flags, "WPA3") || strings.Contains(flags, "SAE") { + result = append(result, "WPA3-Personal") + } + if strings.Contains(flags, "WPA2") { + if strings.Contains(flags, "EAP") { + result = append(result, "WPA2-Enterprise") + } else { + result = append(result, "WPA2-Personal") + } + } + if strings.Contains(flags, "WEP") { + return []string{"WEP"} + } + if len(result) == 0 && strings.Contains(flags, "ESS") { + return []string{"Open"} + } + if len(result) == 0 { + return []string{"Unknown"} + } + return result +} + +func formatScanResults(results []wpactrl.ScanResult) []map[string]any { + seen := make(map[string]int) + var out []map[string]any + + for _, r := range results { + if r.SSID == "" { + continue + } + entry := map[string]any{ + "ssid": r.SSID, + "bssid": r.BSSID, + "signal-strength": r.Signal, + "channel": wpactrl.FrequencyToChannel(r.Frequency), + } + if enc := extractEncryption(r.Flags); len(enc) > 0 { + entry["encryption"] = enc + } + + if idx, dup := seen[r.SSID]; dup { + prev := out[idx]["signal-strength"].(int) + if r.Signal > prev { + out[idx] = entry + } + continue + } + seen[r.SSID] = len(out) + out = append(out, entry) + } + return out +} + +// ParseIWEvent parses a single line from `iw event -t` output. +// Retained for tests; no longer used in the main event loop. +func ParseIWEvent(line string) (IWEvent, bool) { + parts := strings.SplitN(line, ": ", 3) + if len(parts) < 3 { + return IWEvent{}, false + } + + ts, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return IWEvent{}, false + } + + ifacePhy := parts[1] + parenIdx := strings.Index(ifacePhy, " (") + if parenIdx < 0 { + return IWEvent{}, false + } + iface := ifacePhy[:parenIdx] + phy := strings.Trim(ifacePhy[parenIdx+2:], ")") + + eventStr := parts[2] + ev := IWEvent{Timestamp: ts, Interface: iface, Phy: phy} + + switch { + case strings.HasPrefix(eventStr, "new station "): + ev.Type = "new station" + ev.Addr = strings.TrimPrefix(eventStr, "new station ") + case strings.HasPrefix(eventStr, "del station "): + ev.Type = "del station" + ev.Addr = strings.TrimPrefix(eventStr, "del station ") + case strings.HasPrefix(eventStr, "connected to "): + ev.Type = "connected" + ev.Addr = strings.TrimPrefix(eventStr, "connected to ") + case eventStr == "disconnected": + ev.Type = "disconnected" + case strings.HasPrefix(eventStr, "ch_switch_started_notify"): + ev.Type = "ch_switch_started_notify" + case eventStr == "scan started": + ev.Type = "scan started" + case eventStr == "scan aborted": + ev.Type = "scan aborted" + case strings.HasPrefix(eventStr, "reg_change"): + ev.Type = "reg_change" + case strings.HasPrefix(eventStr, "auth"): + ev.Type = "auth" + default: + ev.Type = eventStr + } + + return ev, true +} + +// resolveSSID extracts the SSID for an interface. +// wpa_supplicant STATUS has "ssid=". +// hostapd STATUS has "bss[N]=" / "ssid[N]=" pairs. +func resolveSSID(iface string, si wpactrl.SocketInfo, status map[string]string) string { + if v := status["ssid"]; v != "" { + return v + } + for i := 0; i < 16; i++ { + idx := strconv.Itoa(i) + if status["bss["+idx+"]"] == iface { + if v := status["ssid["+idx+"]"]; v != "" { + return v + } + break + } + } + return "" +} diff --git a/src/yangerd/internal/iwmonitor/iwmonitor.go b/src/yangerd/internal/iwmonitor/iwmonitor.go new file mode 100644 index 000000000..54a972af2 --- /dev/null +++ b/src/yangerd/internal/iwmonitor/iwmonitor.go @@ -0,0 +1,471 @@ +package iwmonitor + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "math" + "net" + "strconv" + "strings" + "sync" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/wpactrl" + "github.com/mdlayher/genetlink" + "github.com/mdlayher/netlink" + "golang.org/x/sys/unix" +) + +const ( + reconnectInitial = 500 * time.Millisecond + reconnectMax = 30 * time.Second + reconnectFactor = 2.0 + queryTimeout = 5 * time.Second +) + +// IWEvent is retained for ParseIWEvent compatibility (used in tests). +type IWEvent struct { + Timestamp float64 + Interface string + Phy string + Type string + Addr string +} + +type IWMonitor struct { + log *slog.Logger + onUpdate func(ifname string, data json.RawMessage) + onPhyChange func() + + mu sync.Mutex + attached map[string]context.CancelFunc +} + +func New(log *slog.Logger) *IWMonitor { + return &IWMonitor{ + log: log, + attached: make(map[string]context.CancelFunc), + } +} + +func (m *IWMonitor) SetOnUpdate(fn func(string, json.RawMessage)) { + m.onUpdate = fn +} + +func (m *IWMonitor) SetOnPhyChange(fn func()) { + m.onPhyChange = fn +} + +func (m *IWMonitor) Run(ctx context.Context) error { + conn, family, err := m.dialNL80211() + if err != nil { + return fmt.Errorf("nl80211 setup: %w", err) + } + defer conn.Close() + + m.refreshAllInterfaces(ctx) + + go func() { + <-ctx.Done() + conn.Close() + }() + + for { + msgs, _, err := conn.Receive() + if err != nil { + if ctx.Err() != nil { + return ctx.Err() + } + return fmt.Errorf("nl80211 receive: %w", err) + } + + for _, msg := range msgs { + m.handleNL80211(ctx, msg, family) + } + } +} + +func (m *IWMonitor) dialNL80211() (*genetlink.Conn, genetlink.Family, error) { + conn, err := genetlink.Dial(nil) + if err != nil { + return nil, genetlink.Family{}, fmt.Errorf("dial genetlink: %w", err) + } + + family, err := conn.GetFamily(unix.NL80211_GENL_NAME) + if err != nil { + conn.Close() + return nil, genetlink.Family{}, fmt.Errorf("resolve nl80211: %w", err) + } + + groups := map[string]bool{ + unix.NL80211_MULTICAST_GROUP_MLME: true, + unix.NL80211_MULTICAST_GROUP_REG: true, + unix.NL80211_MULTICAST_GROUP_CONFIG: true, + } + for _, g := range family.Groups { + if groups[g.Name] { + if err := conn.JoinGroup(g.ID); err != nil { + conn.Close() + return nil, genetlink.Family{}, fmt.Errorf("join %s: %w", g.Name, err) + } + m.log.Info("nl80211: joined multicast group", "name", g.Name, "id", g.ID) + } + } + + return conn, family, nil +} + +func (m *IWMonitor) handleNL80211(ctx context.Context, msg genetlink.Message, family genetlink.Family) { + ifname := m.extractIfname(msg.Data) + cmd := msg.Header.Command + + switch cmd { + case unix.NL80211_CMD_NEW_STATION, unix.NL80211_CMD_DEL_STATION: + if ifname != "" { + m.log.Debug("nl80211: station event", "cmd", cmd, "iface", ifname) + m.refreshInterface(ctx, ifname) + } + case unix.NL80211_CMD_CONNECT: + if ifname != "" { + m.log.Debug("nl80211: connect", "iface", ifname) + m.refreshInterface(ctx, ifname) + } + case unix.NL80211_CMD_DISCONNECT: + if ifname != "" { + m.log.Debug("nl80211: disconnect", "iface", ifname) + m.publishWifi(ifname, nil) + } + case unix.NL80211_CMD_REG_CHANGE: + m.log.Debug("nl80211: reg_change") + m.refreshAllInterfaces(ctx) + case unix.NL80211_CMD_NEW_INTERFACE: + if ifname != "" { + m.log.Info("nl80211: new interface", "iface", ifname) + m.startAttach(ctx, ifname) + m.refreshInterface(ctx, ifname) + } + case unix.NL80211_CMD_DEL_INTERFACE: + if ifname != "" { + m.log.Info("nl80211: del interface", "iface", ifname) + m.stopAttach(ifname) + m.publishWifi(ifname, nil) + } + case unix.NL80211_CMD_NEW_WIPHY, unix.NL80211_CMD_DEL_WIPHY: + m.log.Info("nl80211: phy change", "cmd", cmd) + if m.onPhyChange != nil { + m.onPhyChange() + } + } +} + +func (m *IWMonitor) extractIfname(data []byte) string { + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return "" + } + for ad.Next() { + if ad.Type() == unix.NL80211_ATTR_IFINDEX { + iface, err := net.InterfaceByIndex(int(ad.Uint32())) + if err != nil { + return "" + } + return iface.Name + } + } + return "" +} + +func (m *IWMonitor) startAttach(ctx context.Context, ifname string) { + m.mu.Lock() + if _, exists := m.attached[ifname]; exists { + m.mu.Unlock() + return + } + attachCtx, cancel := context.WithCancel(ctx) + m.attached[ifname] = cancel + m.mu.Unlock() + + go m.attachLoop(attachCtx, ifname) +} + +func (m *IWMonitor) stopAttach(ifname string) { + m.mu.Lock() + if cancel, ok := m.attached[ifname]; ok { + cancel() + delete(m.attached, ifname) + } + m.mu.Unlock() +} + +func (m *IWMonitor) attachLoop(ctx context.Context, ifname string) { + delay := reconnectInitial + + for { + if ctx.Err() != nil { + return + } + + socks := wpactrl.ScanSockets() + si, ok := socks[ifname] + if !ok { + select { + case <-ctx.Done(): + return + case <-time.After(delay): + } + delay = nextDelay(delay) + continue + } + + ac, err := wpactrl.Attach(si.Path) + if err != nil { + m.log.Debug("attach failed", "iface", ifname, "err", err) + select { + case <-ctx.Done(): + return + case <-time.After(delay): + } + delay = nextDelay(delay) + continue + } + + delay = reconnectInitial + m.log.Info("attached to control socket", "iface", ifname, "daemon", si.Daemon) + m.refreshInterface(ctx, ifname) + + ac.SetHandler(func(ev wpactrl.Event) { + m.handleAttachEvent(ctx, ifname, ev) + }) + + err = ac.Run(ctx) + ac.Close() + + if ctx.Err() != nil { + return + } + + m.log.Warn("control socket lost", "iface", ifname, "err", err) + m.publishWifi(ifname, nil) + } +} + +func (m *IWMonitor) handleAttachEvent(ctx context.Context, ifname string, ev wpactrl.Event) { + switch { + case ev.Name == "AP-STA-CONNECTED" || ev.Name == "AP-STA-DISCONNECTED": + m.refreshInterface(ctx, ifname) + case ev.Name == "CTRL-EVENT-CONNECTED": + m.refreshInterface(ctx, ifname) + case ev.Name == "CTRL-EVENT-DISCONNECTED": + m.publishWifi(ifname, nil) + case ev.Name == "CTRL-EVENT-SCAN-RESULTS": + m.refreshInterface(ctx, ifname) + case ev.Name == "CTRL-EVENT-SIGNAL-CHANGE": + m.handleSignalChange(ifname, ev.Data) + case ev.Name == "CTRL-EVENT-TERMINATING": + // Daemon is shutting down; the read loop will get an error next + } +} + +func (m *IWMonitor) handleSignalChange(ifname string, data string) { + // TODO(lazzer): update signal in-place without full rebuild + // For now, this is a no-op; signal is read during refreshInterface. + _ = ifname + _ = data +} + +func (m *IWMonitor) refreshInterface(ctx context.Context, iface string) { + wifi := m.buildWifiData(ctx, iface) + m.publishWifi(iface, wifi) +} + +func (m *IWMonitor) buildWifiData(ctx context.Context, iface string) map[string]any { + socks := wpactrl.ScanSockets() + si, ok := socks[iface] + + var status map[string]string + if ok { + conn, err := wpactrl.Dial(si.Path) + if err == nil { + status, _ = conn.Status() + conn.Close() + } + } + + mode := m.detectMode(si, status) + result := make(map[string]any) + + if mode == "ap" { + result["access-point"] = m.buildAPData(ctx, iface, si, status) + } else { + result["station"] = m.buildStationData(ctx, iface, si, status) + } + + return result +} + +func (m *IWMonitor) detectMode(si wpactrl.SocketInfo, status map[string]string) string { + if si.Daemon == "hostapd" { + return "ap" + } + return "station" +} + +func (m *IWMonitor) buildAPData(ctx context.Context, iface string, si wpactrl.SocketInfo, status map[string]string) map[string]any { + ap := make(map[string]any) + + ssid := resolveSSID(iface, si, status) + if ssid != "" { + ap["ssid"] = ssid + } + + if si.Daemon == "hostapd" { + conn, err := wpactrl.Dial(si.Path) + if err != nil { + m.log.Warn("hostapd dial for stations", "iface", iface, "err", err) + } else { + stas, err := conn.AllStations() + conn.Close() + if err != nil { + m.log.Warn("hostapd AllStations", "iface", iface, "err", err) + } + if err == nil { + stas = filterAuthorized(stas) + if len(stas) > 0 { + ap["stations"] = m.formatStations(stas) + } + } + } + } + + return ap +} + +func (m *IWMonitor) buildStationData(ctx context.Context, iface string, si wpactrl.SocketInfo, status map[string]string) map[string]any { + sta := make(map[string]any) + + ssid := resolveSSID(iface, si, status) + if ssid != "" { + sta["ssid"] = ssid + } + + if si.Daemon == "wpa_supplicant" { + conn, err := wpactrl.Dial(si.Path) + if err == nil { + poll, err := conn.SignalPoll() + if err == nil { + if v, ok := poll["RSSI"]; ok { + if sig, err := strconv.Atoi(v); err == nil { + sta["signal-strength"] = sig + } + } + if v, ok := poll["LINKSPEED"]; ok { + if speed, err := strconv.ParseUint(v, 10, 32); err == nil { + sta["tx-speed"] = uint32(speed * 10) + } + } + } + results, err := conn.ScanResults() + conn.Close() + if err == nil && len(results) > 0 { + sta["scan-results"] = formatScanResults(results) + } + } + } + + return sta +} + +func (m *IWMonitor) formatStations(stas []map[string]string) map[string]any { + type stationEntry struct { + MAC string `json:"mac-address"` + Signal int16 `json:"signal-strength,omitempty"` + ConnectedTime uint32 `json:"connected-time,omitempty"` + RxPackets string `json:"rx-packets,omitempty"` + TxPackets string `json:"tx-packets,omitempty"` + RxBytes string `json:"rx-bytes,omitempty"` + TxBytes string `json:"tx-bytes,omitempty"` + RxSpeed uint32 `json:"rx-speed,omitempty"` + TxSpeed uint32 `json:"tx-speed,omitempty"` + } + + var out []stationEntry + for _, st := range stas { + s := stationEntry{MAC: st["addr"]} + if v := st["signal"]; v != "" { + if sig, err := strconv.ParseInt(v, 10, 16); err == nil { + s.Signal = int16(sig) + } + } + if v := st["connected_time"]; v != "" { + if ct, err := strconv.ParseUint(v, 10, 32); err == nil { + s.ConnectedTime = uint32(ct) + } + } + if v := st["rx_packets"]; v != "" { + s.RxPackets = v + } + if v := st["tx_packets"]; v != "" { + s.TxPackets = v + } + if v := st["rx_bytes"]; v != "" { + s.RxBytes = v + } + if v := st["tx_bytes"]; v != "" { + s.TxBytes = v + } + if v := st["rx_rate_info"]; v != "" { + if speed := parseBitrate(v); speed > 0 { + s.RxSpeed = speed + } + } + if v := st["tx_rate_info"]; v != "" { + if speed := parseBitrate(v); speed > 0 { + s.TxSpeed = speed + } + } + out = append(out, s) + } + return map[string]any{"station": out} +} + +func (m *IWMonitor) publishWifi(iface string, data map[string]any) { + if m.onUpdate == nil { + return + } + + if data == nil { + m.onUpdate(iface, json.RawMessage(`{}`)) + return + } + + raw, err := json.Marshal(data) + if err != nil { + m.log.Warn("marshal wifi data", "iface", iface, "err", err) + return + } + m.onUpdate(iface, json.RawMessage(raw)) +} + +func (m *IWMonitor) refreshAllInterfaces(ctx context.Context) { + for ifname := range wpactrl.ScanSockets() { + m.startAttach(ctx, ifname) + m.refreshInterface(ctx, ifname) + } +} + +func nextDelay(current time.Duration) time.Duration { + d := time.Duration(math.Min(float64(current)*reconnectFactor, float64(reconnectMax))) + return d +} + +func filterAuthorized(stas []map[string]string) []map[string]string { + var out []map[string]string + for _, st := range stas { + if strings.Contains(st["flags"], "AUTHORIZED") { + out = append(out, st) + } + } + return out +} diff --git a/src/yangerd/internal/iwmonitor/iwmonitor_test.go b/src/yangerd/internal/iwmonitor/iwmonitor_test.go new file mode 100644 index 000000000..4044fa1f4 --- /dev/null +++ b/src/yangerd/internal/iwmonitor/iwmonitor_test.go @@ -0,0 +1,221 @@ +package iwmonitor + +import ( + "encoding/json" + "reflect" + "testing" +) + +func TestParseIWEvent(t *testing.T) { + tests := []struct { + name string + line string + wantOK bool + wantType string + wantIface string + wantPhy string + wantAddr string + }{ + { + name: "new station", + line: "1234567890.123456: wlan0 (phy#0): new station aa:bb:cc:dd:ee:ff", + wantOK: true, + wantType: "new station", + wantIface: "wlan0", + wantPhy: "phy#0", + wantAddr: "aa:bb:cc:dd:ee:ff", + }, + { + name: "del station", + line: "1234567890.123456: wlan0 (phy#0): del station aa:bb:cc:dd:ee:ff", + wantOK: true, + wantType: "del station", + wantIface: "wlan0", + wantPhy: "phy#0", + wantAddr: "aa:bb:cc:dd:ee:ff", + }, + { + name: "connected", + line: "1234567890.123456: wlan0 (phy#0): connected to aa:bb:cc:dd:ee:ff", + wantOK: true, + wantType: "connected", + wantIface: "wlan0", + wantPhy: "phy#0", + wantAddr: "aa:bb:cc:dd:ee:ff", + }, + { + name: "disconnected", + line: "1234567890.123456: wlan0 (phy#0): disconnected", + wantOK: true, + wantType: "disconnected", + wantIface: "wlan0", + wantPhy: "phy#0", + }, + { + name: "channel switch", + line: "1234567890.123456: wlan0 (phy#0): ch_switch_started_notify", + wantOK: true, + wantType: "ch_switch_started_notify", + wantIface: "wlan0", + wantPhy: "phy#0", + }, + { + name: "scan started", + line: "1234567890.123456: wlan0 (phy#0): scan started", + wantOK: true, + wantType: "scan started", + wantIface: "wlan0", + wantPhy: "phy#0", + }, + { + name: "reg change", + line: "1234567890.123456: wlan0 (phy#0): reg_change", + wantOK: true, + wantType: "reg_change", + wantIface: "wlan0", + wantPhy: "phy#0", + }, + { + name: "malformed missing separators", + line: "1234567890.123456 wlan0 (phy#0) new station aa:bb:cc:dd:ee:ff", + wantOK: false, + }, + { + name: "malformed bad timestamp", + line: "not-a-float: wlan0 (phy#0): disconnected", + wantOK: false, + }, + { + name: "malformed missing phy", + line: "1234567890.123456: wlan0 phy#0: disconnected", + wantOK: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := ParseIWEvent(tt.line) + if ok != tt.wantOK { + t.Fatalf("ok = %v, want %v", ok, tt.wantOK) + } + if !tt.wantOK { + return + } + + if got.Type != tt.wantType { + t.Fatalf("Type = %q, want %q", got.Type, tt.wantType) + } + if got.Interface != tt.wantIface { + t.Fatalf("Interface = %q, want %q", got.Interface, tt.wantIface) + } + if got.Phy != tt.wantPhy { + t.Fatalf("Phy = %q, want %q", got.Phy, tt.wantPhy) + } + if got.Addr != tt.wantAddr { + t.Fatalf("Addr = %q, want %q", got.Addr, tt.wantAddr) + } + }) + } +} + +func TestParseStationDump(t *testing.T) { + input := `Station aa:bb:cc:dd:ee:ff (on wlan0) + inactive time: 10 ms + rx bytes: 1234 + tx bytes: 5678 + connected time: 42 seconds + signal: -40 dBm + rx bitrate: 6.5 MBit/s + tx bitrate: 130.0 MBit/s + authorized: yes + +Station 11:22:33:44:55:66 (on wlan0) + inactive time: 20 ms + rx bytes: 9876 + tx bytes: 5432 + connected time: 84 seconds + signal: -55 dBm + authorized: no` + + got := parseStationDump(input) + + var gotDecoded []map[string]string + if err := json.Unmarshal(got, &gotDecoded); err != nil { + t.Fatalf("unmarshal got: %v", err) + } + + want := []map[string]string{ + { + "mac": "aa:bb:cc:dd:ee:ff", + "inactive-time": "10 ms", + "rx-bytes": "1234", + "tx-bytes": "5678", + "connected-time": "42 seconds", + "signal": "-40 dBm", + "rx-bitrate": "6.5 MBit/s", + "tx-bitrate": "130.0 MBit/s", + "authorized": "yes", + }, + { + "mac": "11:22:33:44:55:66", + "inactive-time": "20 ms", + "rx-bytes": "9876", + "tx-bytes": "5432", + "connected-time": "84 seconds", + "signal": "-55 dBm", + "authorized": "no", + }, + } + + if !reflect.DeepEqual(gotDecoded, want) { + t.Fatalf("parseStationDump mismatch\n got: %#v\nwant: %#v", gotDecoded, want) + } +} + +func TestParseIWInfo(t *testing.T) { + input := `Interface wlan0 + ifindex: 4 + wdev: 0x1 + addr: 12:34:56:78:9a:bc + ssid: MyWiFi + type: managed + channel: 11 (2462 MHz), width: 20 MHz, center1: 2462 MHz + txpower: 20.00 dBm` + + got := parseIWInfo(input) + + var gotDecoded map[string]string + if err := json.Unmarshal(got, &gotDecoded); err != nil { + t.Fatalf("unmarshal got: %v", err) + } + + want := map[string]string{ + "ssid": "MyWiFi", + "type": "managed", + "channel": "11 (2462 MHz), width: 20 MHz, center1: 2462 MHz", + "tx-power": "20.00 dBm", + } + + if !reflect.DeepEqual(gotDecoded, want) { + t.Fatalf("parseIWInfo mismatch\n got: %#v\nwant: %#v", gotDecoded, want) + } +} + +func TestParseIWDevList(t *testing.T) { + input := `phy#0 + Interface wlan0 + ifindex 4 + wdev 0x1 + +phy#1 + Interface wlan1 + ifindex 5 + wdev 0x2` + + got := parseIWDevList(input) + want := []string{"wlan0", "wlan1"} + + if !reflect.DeepEqual(got, want) { + t.Fatalf("parseIWDevList mismatch\n got: %#v\nwant: %#v", got, want) + } +} diff --git a/src/yangerd/internal/lldpmonitor/lldpmonitor.go b/src/yangerd/internal/lldpmonitor/lldpmonitor.go new file mode 100644 index 000000000..1214db5dd --- /dev/null +++ b/src/yangerd/internal/lldpmonitor/lldpmonitor.go @@ -0,0 +1,460 @@ +// Package lldpmonitor keeps the LLDP neighbor table in the tree in sync +// with lldpd. A persistent `lldpcli -f json0 watch` subprocess is used +// purely as a change trigger -- its events carry only the changed +// neighbor, so they cannot be used to rebuild state (a delete event +// would re-add the neighbor, and an event for one port would wipe the +// others). On every event the full table is re-read with +// `lldpcli -f json0 show neighbors` and the subtree replaced, so removed +// neighbors disappear and neighbors present before yangerd started are +// picked up. +package lldpmonitor + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "log/slog" + "os/exec" + "regexp" + "strconv" + "strings" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/backoff" + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +const ( + lldpMulticastMAC = "01:80:C2:00:00:0E" + treeKey = "ieee802-dot1ab-lldp:lldp" + + // debounceDelay coalesces bursts of watch events into one re-read. + debounceDelay = 200 * time.Millisecond + queryTimeout = 5 * time.Second +) + +// LLDPMonitor subscribes to LLDP neighbor events via a persistent +// lldpcli watch subprocess and re-reads the full neighbor table on +// every event. +type LLDPMonitor struct { + tree *tree.Tree + log *slog.Logger + refresh chan struct{} + + // query returns the current full neighbor table; overridable in tests. + query func(ctx context.Context) ([]byte, error) +} + +// New creates an LLDPMonitor. +func New(t *tree.Tree, log *slog.Logger) *LLDPMonitor { + if log == nil { + log = slog.Default() + } + return &LLDPMonitor{ + tree: t, + log: log, + refresh: make(chan struct{}, 1), + query: queryNeighbors, + } +} + +func queryNeighbors(ctx context.Context) ([]byte, error) { + ctx, cancel := context.WithTimeout(ctx, queryTimeout) + defer cancel() + return exec.CommandContext(ctx, "lldpcli", "-f", "json0", "show", "neighbors").Output() +} + +// Run starts the LLDP monitor. It blocks until ctx is cancelled. +func (m *LLDPMonitor) Run(ctx context.Context) error { + go m.refreshLoop(ctx) + + bo := backoff.Default() + delay := bo.Initial + + for { + err := m.runOnce(ctx) + if ctx.Err() != nil { + return ctx.Err() + } + + m.log.Warn("lldp monitor: subprocess exited, restarting", + "err", err, "delay", delay) + if err := backoff.Sleep(ctx, delay); err != nil { + return err + } + delay = bo.Next(delay) + } +} + +func (m *LLDPMonitor) runOnce(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "lldpcli", "-f", "json0", "watch") + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("start lldpcli watch: %w", err) + } + defer cmd.Wait() + + // Pick up neighbors that existed before we attached. + m.triggerRefresh() + + scanner := bufio.NewScanner(stdout) + scanner.Buffer(make([]byte, 0, 1*1024*1024), 1*1024*1024) + + var buf bytes.Buffer + braceDepth := 0 + + for scanner.Scan() { + line := scanner.Text() + + // Blank-line framing: object separator. + if strings.TrimSpace(line) == "" && braceDepth == 0 { + if buf.Len() > 0 { + m.processEvent(buf.Bytes()) + buf.Reset() + } + continue + } + + buf.WriteString(line) + buf.WriteByte('\n') + + for _, ch := range line { + switch ch { + case '{': + braceDepth++ + case '}': + braceDepth-- + } + } + + if braceDepth == 0 && buf.Len() > 0 { + m.processEvent(buf.Bytes()) + buf.Reset() + } + } + if err := scanner.Err(); err != nil { + return fmt.Errorf("read lldpcli: %w", err) + } + return fmt.Errorf("lldpcli watch process exited") +} + +// processEvent inspects a watch event and triggers a full table re-read. +// The event payload itself is never used to build state. +func (m *LLDPMonitor) processEvent(data []byte) { + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + m.log.Warn("lldp monitor: parse event", "err", err) + return + } + + for key := range raw { + switch key { + case "lldp-added", "lldp-updated", "lldp-deleted": + m.log.Debug("lldp monitor: neighbor change", "event", key) + m.triggerRefresh() + return + } + } + m.log.Debug("lldp monitor: unknown event keys", "keys", keysOf(raw)) +} + +// triggerRefresh requests a table re-read; the buffered channel collapses +// pending requests into one. +func (m *LLDPMonitor) triggerRefresh() { + select { + case m.refresh <- struct{}{}: + default: + } +} + +func (m *LLDPMonitor) refreshLoop(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-m.refresh: + } + + // Let a burst of events settle before reading. + select { + case <-ctx.Done(): + return + case <-time.After(debounceDelay): + } + select { + case <-m.refresh: + default: + } + + m.updateTree(ctx) + } +} + +// updateTree reads the full neighbor table and replaces the subtree. +// On a query error the previous data is left untouched. +func (m *LLDPMonitor) updateTree(ctx context.Context) { + out, err := m.query(ctx) + if err != nil { + m.log.Warn("lldp monitor: show neighbors", "err", err) + return + } + + m.tree.Set(treeKey, transformNeighbors(out)) + m.log.Debug("lldp monitor: tree updated") +} + +// j0ID is a chassis/port id element: {"type": "mac", "value": "..."}. +type j0ID struct { + Type string `json:"type"` + Value string `json:"value"` +} + +// j0Iface is one neighbor entry on an interface. In json0 format the +// interface name, rid and age are plain string fields and chassis/port +// are arrays. In the older keyed json format chassis/port are objects; +// the custom unmarshallers accept both. +type j0Iface struct { + Name string `json:"name"` + RID interface{} `json:"rid"` + Age string `json:"age"` + Chassis j0IDHolder `json:"chassis"` + Port j0IDHolder `json:"port"` +} + +// j0IDHolder extracts the first id from a chassis/port node in either +// json0 (array) or json (object) form. +type j0IDHolder struct { + ID j0ID +} + +func (h *j0IDHolder) UnmarshalJSON(data []byte) error { + var asArray []struct { + ID json.RawMessage `json:"id"` + } + if err := json.Unmarshal(data, &asArray); err == nil { + for _, e := range asArray { + if id, ok := parseID(e.ID); ok { + h.ID = id + return nil + } + } + return nil + } + + var asObject struct { + ID json.RawMessage `json:"id"` + } + if err := json.Unmarshal(data, &asObject); err != nil { + return nil // tolerate unknown shapes + } + if id, ok := parseID(asObject.ID); ok { + h.ID = id + } + return nil +} + +// parseID accepts an id as object {"type","value"} or array of such. +func parseID(raw json.RawMessage) (j0ID, bool) { + if len(raw) == 0 { + return j0ID{}, false + } + var one j0ID + if err := json.Unmarshal(raw, &one); err == nil && (one.Type != "" || one.Value != "") { + return one, true + } + var many []j0ID + if err := json.Unmarshal(raw, &many); err == nil && len(many) > 0 { + return many[0], true + } + return j0ID{}, false +} + +// collectIfaces extracts all neighbor interface entries from a show +// neighbors document, accepting both json0 ("lldp" is an array, entries +// carry a "name" field) and json ("lldp" is an object, entries are keyed +// by interface name) output formats. +func collectIfaces(data []byte) []j0Iface { + var ifaces []j0Iface + + addRaw := func(name string, raw json.RawMessage) { + var iface j0Iface + if err := json.Unmarshal(raw, &iface); err != nil { + return + } + if iface.Name == "" { + iface.Name = name + } + if iface.Name != "" { + ifaces = append(ifaces, iface) + } + } + + collectInterfaceNode := func(raw json.RawMessage) { + // json0: array of {"name": "eth0", ...} + var asArray []json.RawMessage + if err := json.Unmarshal(raw, &asArray); err == nil { + for _, e := range asArray { + // Either a direct entry with "name", or a keyed map + // {"eth0": {...}} from the older json format. + var iface j0Iface + if err := json.Unmarshal(e, &iface); err == nil && iface.Name != "" { + ifaces = append(ifaces, iface) + continue + } + var keyed map[string]json.RawMessage + if err := json.Unmarshal(e, &keyed); err == nil { + for name, v := range keyed { + addRaw(name, v) + } + } + } + return + } + // json: single keyed map {"eth0": {...}} + var keyed map[string]json.RawMessage + if err := json.Unmarshal(raw, &keyed); err == nil { + for name, v := range keyed { + addRaw(name, v) + } + } + } + + var doc map[string]json.RawMessage + if err := json.Unmarshal(data, &doc); err != nil { + return nil + } + lldpRaw, ok := doc["lldp"] + if !ok { + return nil + } + + // json0: "lldp" is an array of {"interface": [...]}; json: an object. + var lldpArray []map[string]json.RawMessage + if err := json.Unmarshal(lldpRaw, &lldpArray); err == nil { + for _, entry := range lldpArray { + if ifRaw, ok := entry["interface"]; ok { + collectInterfaceNode(ifRaw) + } + } + return ifaces + } + var lldpObject map[string]json.RawMessage + if err := json.Unmarshal(lldpRaw, &lldpObject); err == nil { + if ifRaw, ok := lldpObject["interface"]; ok { + collectInterfaceNode(ifRaw) + } + } + return ifaces +} + +// transformNeighbors converts `lldpcli show neighbors` output to the +// YANG ieee802-dot1ab-lldp subtree (unwrapped -- the IPC layer adds the +// module envelope when serving the key). +func transformNeighbors(data []byte) json.RawMessage { + type remoteEntry struct { + TimeMark int `json:"time-mark"` + RemoteIndex int `json:"remote-index"` + ChassisIDSubtype string `json:"chassis-id-subtype"` + ChassisID string `json:"chassis-id"` + PortIDSubtype string `json:"port-id-subtype"` + PortID string `json:"port-id"` + } + type portEntry struct { + Name string `json:"name"` + DestMACAddress string `json:"dest-mac-address"` + RemoteSystems []remoteEntry `json:"remote-systems-data"` + } + + portMap := make(map[string]*portEntry) + var order []string + + for _, iface := range collectIfaces(data) { + port, ok := portMap[iface.Name] + if !ok { + port = &portEntry{ + Name: iface.Name, + DestMACAddress: lldpMulticastMAC, + } + portMap[iface.Name] = port + order = append(order, iface.Name) + } + + rid := 0 + switch v := iface.RID.(type) { + case float64: + rid = int(v) + case string: + rid, _ = strconv.Atoi(v) + } + + port.RemoteSystems = append(port.RemoteSystems, remoteEntry{ + TimeMark: parseAge(iface.Age), + RemoteIndex: rid, + ChassisIDSubtype: chassisIDSubtype(iface.Chassis.ID.Type), + ChassisID: iface.Chassis.ID.Value, + PortIDSubtype: portIDSubtype(iface.Port.ID.Type), + PortID: iface.Port.ID.Value, + }) + } + + ports := make([]portEntry, 0, len(order)) + for _, name := range order { + ports = append(ports, *portMap[name]) + } + + if len(ports) == 0 { + return json.RawMessage(`{}`) + } + + out, _ := json.Marshal(map[string]interface{}{"port": ports}) + return json.RawMessage(out) +} + +var idSubtypeMap = map[string]string{ + "ifalias": "interface-alias", + "mac": "mac-address", + "ip": "network-address", + "ifname": "interface-name", + "local": "local", +} + +func chassisIDSubtype(t string) string { + if v, ok := idSubtypeMap[t]; ok { + return v + } + return "unknown" +} + +func portIDSubtype(t string) string { + if v, ok := idSubtypeMap[t]; ok { + return v + } + return "unknown" +} + +var ageRe = regexp.MustCompile(`(\d+)\s*day[s]*,\s*(\d+):(\d+):(\d+)`) + +func parseAge(s string) int { + m := ageRe.FindStringSubmatch(s) + if m == nil { + return 0 + } + days, _ := strconv.Atoi(m[1]) + hours, _ := strconv.Atoi(m[2]) + mins, _ := strconv.Atoi(m[3]) + secs, _ := strconv.Atoi(m[4]) + return days*86400 + hours*3600 + mins*60 + secs +} + +func keysOf(m map[string]json.RawMessage) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +} diff --git a/src/yangerd/internal/lldpmonitor/lldpmonitor_test.go b/src/yangerd/internal/lldpmonitor/lldpmonitor_test.go new file mode 100644 index 000000000..13d906c3a --- /dev/null +++ b/src/yangerd/internal/lldpmonitor/lldpmonitor_test.go @@ -0,0 +1,292 @@ +package lldpmonitor + +import ( + "context" + "encoding/json" + "errors" + "reflect" + "testing" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +type remote struct { + TimeMark int `json:"time-mark"` + RemoteIndex int `json:"remote-index"` + ChassisIDSubtype string `json:"chassis-id-subtype"` + ChassisID string `json:"chassis-id"` + PortIDSubtype string `json:"port-id-subtype"` + PortID string `json:"port-id"` +} +type port struct { + Name string `json:"name"` + DestMAC string `json:"dest-mac-address"` + RemoteSystems []remote `json:"remote-systems-data"` +} + +// outShape is the stored (unwrapped) subtree: the IPC layer adds the +// module envelope when serving the key. +type outShape struct { + Port []port `json:"port"` +} + +// json0 format: "lldp" is an array, interface entries carry a "name" +// field, rid/age are string attributes, chassis/port/id are arrays. +const showNeighborsJSON0 = `{ + "lldp": [{ + "interface": [ + { + "name": "eth0", + "via": "LLDP", + "rid": "7", + "age": "0 day, 00:05:30", + "chassis": [{ + "id": [{"type": "mac", "value": "aa:bb:cc:dd:ee:ff"}], + "name": [{"value": "switch1"}] + }], + "port": [{ + "id": [{"type": "ifname", "value": "swp1"}] + }] + }, + { + "name": "eth1", + "via": "LLDP", + "rid": "9", + "age": "1 day, 02:30:15", + "chassis": [{ + "id": [{"type": "local", "value": "Chassis ID 007"}] + }], + "port": [{ + "id": [{"type": "mac", "value": "02:01:02:03:04:05"}] + }] + } + ] + }] +}` + +// Older keyed json format: "lldp" is an object, interfaces are keyed by +// name, chassis/port are objects. +const showNeighborsJSON = `{ + "lldp": { + "interface": [ + { + "eth0": { + "rid": 7, + "age": "0 day, 00:05:30", + "chassis": {"id": {"type": "mac", "value": "aa:bb:cc:dd:ee:ff"}}, + "port": {"id": {"type": "ifname", "value": "swp1"}} + } + } + ] + } +}` + +func decode(t *testing.T, raw json.RawMessage) outShape { + t.Helper() + var out outShape + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal output: %v", err) + } + return out +} + +func TestTransformNeighborsJSON0(t *testing.T) { + out := decode(t, transformNeighbors([]byte(showNeighborsJSON0))) + + if len(out.Port) != 2 { + t.Fatalf("port count = %d, want 2", len(out.Port)) + } + + byIf := make(map[string]port) + for _, p := range out.Port { + if p.DestMAC != lldpMulticastMAC { + t.Fatalf("dest-mac-address = %q, want %q", p.DestMAC, lldpMulticastMAC) + } + byIf[p.Name] = p + } + + eth0, ok := byIf["eth0"] + if !ok || len(eth0.RemoteSystems) != 1 { + t.Fatalf("eth0 missing or wrong neighbor count: %#v", byIf) + } + want := remote{ + TimeMark: 330, + RemoteIndex: 7, + ChassisIDSubtype: "mac-address", + ChassisID: "aa:bb:cc:dd:ee:ff", + PortIDSubtype: "interface-name", + PortID: "swp1", + } + if !reflect.DeepEqual(eth0.RemoteSystems[0], want) { + t.Fatalf("eth0 remote\n got: %#v\nwant: %#v", eth0.RemoteSystems[0], want) + } + + eth1 := byIf["eth1"] + if len(eth1.RemoteSystems) != 1 { + t.Fatalf("eth1 neighbor count = %d", len(eth1.RemoteSystems)) + } + r := eth1.RemoteSystems[0] + if r.ChassisIDSubtype != "local" || r.ChassisID != "Chassis ID 007" { + t.Errorf("eth1 chassis = %s/%s", r.ChassisIDSubtype, r.ChassisID) + } + if r.PortIDSubtype != "mac-address" || r.PortID != "02:01:02:03:04:05" { + t.Errorf("eth1 port = %s/%s", r.PortIDSubtype, r.PortID) + } + if r.RemoteIndex != 9 || r.TimeMark != 95415 { + t.Errorf("eth1 rid/age = %d/%d", r.RemoteIndex, r.TimeMark) + } +} + +func TestTransformNeighborsKeyedJSON(t *testing.T) { + out := decode(t, transformNeighbors([]byte(showNeighborsJSON))) + + if len(out.Port) != 1 { + t.Fatalf("port count = %d, want 1", len(out.Port)) + } + p := out.Port[0] + if p.Name != "eth0" || len(p.RemoteSystems) != 1 { + t.Fatalf("unexpected port: %#v", p) + } + r := p.RemoteSystems[0] + if r.ChassisID != "aa:bb:cc:dd:ee:ff" || r.PortID != "swp1" || r.RemoteIndex != 7 { + t.Fatalf("unexpected remote: %#v", r) + } +} + +func TestTransformNeighborsEmpty(t *testing.T) { + for name, in := range map[string]string{ + "empty table json0": `{"lldp": [{}]}`, + "empty object": `{}`, + "malformed": `{not-json`, + } { + raw := transformNeighbors([]byte(in)) + if string(raw) != "{}" { + t.Errorf("%s: got %s, want {}", name, raw) + } + } +} + +// A neighbor that disappears between reads must vanish from the tree: +// every update is a full-table replace. +func TestUpdateTreeClearsRemovedNeighbors(t *testing.T) { + tr := tree.New() + m := New(tr, nil) + + m.query = func(context.Context) ([]byte, error) { + return []byte(showNeighborsJSON0), nil + } + m.updateTree(context.Background()) + if out := decode(t, tr.Get(treeKey)); len(out.Port) != 2 { + t.Fatalf("expected 2 ports after first read, got %d", len(out.Port)) + } + + m.query = func(context.Context) ([]byte, error) { + return []byte(`{"lldp": [{}]}`), nil + } + m.updateTree(context.Background()) + if out := decode(t, tr.Get(treeKey)); len(out.Port) != 0 { + t.Fatalf("stale neighbors not cleared: %d ports remain", len(out.Port)) + } +} + +// A failing query must leave the previous data untouched. +func TestUpdateTreeQueryErrorKeepsData(t *testing.T) { + tr := tree.New() + m := New(tr, nil) + + m.query = func(context.Context) ([]byte, error) { + return []byte(showNeighborsJSON0), nil + } + m.updateTree(context.Background()) + before := string(tr.Get(treeKey)) + + m.query = func(context.Context) ([]byte, error) { + return nil, errors.New("lldpcli gone") + } + m.updateTree(context.Background()) + + if after := string(tr.Get(treeKey)); after != before { + t.Fatal("query error overwrote existing lldp data") + } +} + +// Watch events are triggers only: added/updated/deleted all request a +// refresh, unknown events do not. +func TestProcessEventTriggers(t *testing.T) { + m := New(tree.New(), nil) + + drain := func() { + select { + case <-m.refresh: + default: + } + } + + for _, ev := range []string{"lldp-added", "lldp-updated", "lldp-deleted"} { + drain() + m.processEvent([]byte(`{"` + ev + `": {"lldp": {}}}`)) + select { + case <-m.refresh: + default: + t.Errorf("%s did not trigger refresh", ev) + } + } + + drain() + m.processEvent([]byte(`{"lldp-unknown": {}}`)) + select { + case <-m.refresh: + t.Error("unknown event triggered refresh") + default: + } +} + +func TestParseAge(t *testing.T) { + tests := []struct { + name string + in string + want int + }{ + {name: "zero day", in: "0 day, 00:05:30", want: 330}, + {name: "one day", in: "1 day, 02:30:15", want: 95415}, + {name: "ten days plural", in: "10 days, 00:00:00", want: 864000}, + {name: "empty", in: "", want: 0}, + {name: "invalid", in: "n/a", want: 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := parseAge(tt.in); got != tt.want { + t.Fatalf("parseAge(%q) = %d, want %d", tt.in, got, tt.want) + } + }) + } +} + +func TestSubtypeMappings(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {name: "ifalias", in: "ifalias", want: "interface-alias"}, + {name: "mac", in: "mac", want: "mac-address"}, + {name: "ip", in: "ip", want: "network-address"}, + {name: "ifname", in: "ifname", want: "interface-name"}, + {name: "local", in: "local", want: "local"}, + {name: "unknown", in: "foo", want: "unknown"}, + } + + for _, tt := range tests { + t.Run("chassis_"+tt.name, func(t *testing.T) { + if got := chassisIDSubtype(tt.in); got != tt.want { + t.Fatalf("chassisIDSubtype(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + t.Run("port_"+tt.name, func(t *testing.T) { + if got := portIDSubtype(tt.in); got != tt.want { + t.Fatalf("portIDSubtype(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} diff --git a/src/yangerd/internal/monitor/monitor.go b/src/yangerd/internal/monitor/monitor.go new file mode 100644 index 000000000..2fdd575df --- /dev/null +++ b/src/yangerd/internal/monitor/monitor.go @@ -0,0 +1,1171 @@ +package monitor + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "net" + "sort" + "strings" + "sync" + "syscall" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/bridgebatch" + "github.com/kernelkit/infix/src/yangerd/internal/iface" + "github.com/kernelkit/infix/src/yangerd/internal/ipbatch" + "github.com/kernelkit/infix/src/yangerd/internal/stpquery" + "github.com/kernelkit/infix/src/yangerd/internal/tree" + "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" +) + +// treeKey is the single YANG module key where the complete +// ietf-interfaces document is stored. +const treeKey = "ietf-interfaces:interfaces" + +// NLMonitor subscribes to netlink link/address/neighbor events and +// keeps interface operational data in the in-memory tree up to date. +// It is the central coordinator for all interface data — raw ip-json +// staging data is transformed via iface.Transform() and augmented +// with ethernet/wifi/bridge data before being stored as a single +// complete YANG document. +type NLMonitor struct { + linkBatch *ipbatch.IPBatch + addrBatch *ipbatch.IPBatch + neighBatch *ipbatch.IPBatch + brBatch *bridgebatch.BridgeBatch + tree *tree.Tree + ethRefresh func(string) + log *slog.Logger + fc iface.FileChecker + + // initDone is closed after the first initialDump completes. + // initDoneOnce ensures it is only closed once across restarts. + initDone chan struct{} + initDoneOnce sync.Once + + // redumpCh is written by the errorCallback goroutine to ask the + // event loop goroutine to run initialDump. Buffer of 1 so signals + // are coalesced — if a re-dump is already pending there is no point + // queuing another one. + redumpCh chan struct{} + + // staging holds raw ip-json data used as input to iface.Transform(). + // Protected by mu. + mu sync.Mutex + links json.RawMessage // ip -json -s -d link show (includes stats+details) + addrs json.RawMessage // ip -json -d addr show (details only, no stats) + neighs json.RawMessage // ip -json neigh show + fdb map[string]json.RawMessage + mdb map[string]json.RawMessage + ethernet map[string]json.RawMessage // ifname → ethtool JSON + wifi map[string]json.RawMessage // ifname → wifi JSON + wireguard map[string]json.RawMessage // ifname → WireGuard peer-status JSON + + lastOperStatus map[string]string + + // stpMu guards lastSTP, a fingerprint of the most recent mstpd STP + // query, letting the periodic poll rebuild only on actual change. + stpMu sync.Mutex + lastSTP string +} + +// New creates a netlink monitor backed by ip/bridge batch query workers. +// linkBatch should include -s -d flags; addrBatch should include -d only +// (no -s, which causes multi-line output for link commands). +func New(linkBatch, addrBatch, neighBatch *ipbatch.IPBatch, brBatch *bridgebatch.BridgeBatch, t *tree.Tree, fc iface.FileChecker, log *slog.Logger) *NLMonitor { + return &NLMonitor{ + linkBatch: linkBatch, + addrBatch: addrBatch, + neighBatch: neighBatch, + brBatch: brBatch, + tree: t, + fc: fc, + log: log, + initDone: make(chan struct{}), + redumpCh: make(chan struct{}, 1), + fdb: make(map[string]json.RawMessage), + mdb: make(map[string]json.RawMessage), + ethernet: make(map[string]json.RawMessage), + wifi: make(map[string]json.RawMessage), + wireguard: make(map[string]json.RawMessage), + lastOperStatus: make(map[string]string), + } +} + +// SetEthRefresh sets an optional callback used to refresh ethtool data +// when interface link events are received. +func (m *NLMonitor) SetEthRefresh(fn func(string)) { + m.ethRefresh = fn +} + +// WaitReady returns a channel that is closed after initialDump completes. +func (m *NLMonitor) WaitReady() <-chan struct{} { + return m.initDone +} + +// SetEthernetData updates the staged ethernet data for an interface +// and triggers a full rebuild of the YANG document. +func (m *NLMonitor) SetEthernetData(ifname string, data json.RawMessage) { + m.mu.Lock() + m.ethernet[ifname] = data + m.mu.Unlock() + m.rebuild() +} + +// SetWifiData updates the staged wifi data for an interface +// and triggers a full rebuild of the YANG document. +func (m *NLMonitor) SetWifiData(ifname string, data json.RawMessage) { + m.mu.Lock() + m.wifi[ifname] = data + m.mu.Unlock() + m.rebuild() +} + +// SetWireguardData updates the staged WireGuard peer-status data for an +// interface and triggers a full rebuild of the YANG document. +func (m *NLMonitor) SetWireguardData(ifname string, data json.RawMessage) { + m.mu.Lock() + m.wireguard[ifname] = data + m.mu.Unlock() + m.rebuild() +} + +// Links returns a copy of the current staged links data. +func (m *NLMonitor) Links() json.RawMessage { + m.mu.Lock() + cp := append(json.RawMessage{}, m.links...) + m.mu.Unlock() + return cp +} + +// Run starts the netlink monitor loop and returns on context cancellation, +// channel closure, or subscription errors. +func (m *NLMonitor) Run(ctx context.Context) error { + runCtx, cancel := context.WithCancel(ctx) + defer cancel() + + done := make(chan struct{}) + defer close(done) + + errorCallback := func(err error) { + if err == nil { + return + } + if errors.Is(err, syscall.ENOBUFS) { + // Kernel dropped events due to socket buffer overflow. + // Signal the event loop goroutine to do a full re-dump so + // that the snapshot is taken in the same goroutine as event + // processing — avoiding a race with concurrent refreshInterface + // calls that could otherwise be overwritten by the re-dump. + select { + case m.redumpCh <- struct{}{}: + default: // re-dump already pending + } + return + } + m.log.Error("netlink subscription error", "err", err) + cancel() + } + + linkCh := make(chan netlink.LinkUpdate, 64) + addrCh := make(chan netlink.AddrUpdate, 64) + neighCh := make(chan netlink.NeighUpdate, 64) + mdbCh := make(chan struct{}, 32) + + if err := netlink.LinkSubscribeWithOptions(linkCh, done, netlink.LinkSubscribeOptions{ + ErrorCallback: errorCallback, + ReceiveBufferSize: 32 * 1024 * 1024, + ReceiveBufferForceSize: true, + }); err != nil { + return fmt.Errorf("subscribe link updates: %w", err) + } + if err := netlink.AddrSubscribeWithOptions(addrCh, done, netlink.AddrSubscribeOptions{ + ErrorCallback: errorCallback, + ReceiveBufferSize: 32 * 1024 * 1024, + ReceiveBufferForceSize: true, + }); err != nil { + return fmt.Errorf("subscribe addr updates: %w", err) + } + if err := netlink.NeighSubscribeWithOptions(neighCh, done, netlink.NeighSubscribeOptions{ + ErrorCallback: errorCallback, + ReceiveBufferSize: 32 * 1024 * 1024, + ReceiveBufferForceSize: true, + }); err != nil { + return fmt.Errorf("subscribe neigh updates: %w", err) + } + if err := m.subscribeBridgeMDB(runCtx, mdbCh, errorCallback); err != nil { + return fmt.Errorf("subscribe bridge mdb updates: %w", err) + } + + if err := m.initialDump(); err != nil { + m.log.Error("initial dump failed", "err", err) + } + m.initDoneOnce.Do(func() { close(m.initDone) }) + + for { + select { + case <-runCtx.Done(): + if ctx.Err() != nil { + return ctx.Err() + } + return runCtx.Err() + case lu, ok := <-linkCh: + if !ok { + return fmt.Errorf("link update channel closed") + } + m.handleLinkUpdate(lu) + case au, ok := <-addrCh: + if !ok { + return fmt.Errorf("addr update channel closed") + } + m.handleAddrUpdate(au) + case nu, ok := <-neighCh: + if !ok { + return fmt.Errorf("neigh update channel closed") + } + m.handleNeighUpdate(nu) + case _, ok := <-mdbCh: + if !ok { + return fmt.Errorf("bridge mdb update channel closed") + } + m.handleMDBUpdate() + case <-m.redumpCh: + m.log.Warn("re-dumping all interfaces") + if err := m.initialDump(); err != nil { + m.log.Error("re-dump failed, will retry in 5s", "err", err) + time.AfterFunc(5*time.Second, m.requestRedump) + } + } + } +} + +func (m *NLMonitor) initialDump() error { + linkRaw, err := m.queryLink("link show") + if err != nil { + return err + } + addrRaw, err := m.queryAddr("addr show") + if err != nil { + return err + } + neighRaw, err := m.queryNeigh("neigh show") + if err != nil { + neighRaw = json.RawMessage(`[]`) + } + mdbRaw, err := m.queryBridge("mdb show") + if err != nil { + mdbRaw = json.RawMessage(`[]`) + } + + m.log.Debug("initialDump", "linkBytes", len(linkRaw), "addrBytes", len(addrRaw), "neighBytes", len(neighRaw)) + m.validateAddrData("initialDump", addrRaw) + + m.mu.Lock() + m.links = linkRaw + m.addrs = addrRaw + m.neighs = neighRaw + for _, bridgeName := range mdbBridgeNames(mdbRaw) { + m.mdb[bridgeName] = filterByMDBBridge(mdbRaw, bridgeName) + } + for _, name := range interfaceNames(linkRaw) { + if st, ok := extractOperStatus(filterByIfName(linkRaw, name)); ok { + m.lastOperStatus[name] = st + } + } + m.mu.Unlock() + + m.rebuild() + return nil +} + +func (m *NLMonitor) handleLinkUpdate(update netlink.LinkUpdate) { + name, ok := linkNameFromUpdate(update) + if !ok || name == "" { + m.log.Warn("link update without interface name", "index", int(update.Index)) + return + } + + if update.Header.Type == syscall.RTM_DELLINK { + m.removeInterface(name) + return + } + + m.refreshInterface(name) + if m.ethRefresh != nil { + m.ethRefresh(name) + } +} + +func (m *NLMonitor) handleAddrUpdate(update netlink.AddrUpdate) { + ifname, err := ifNameByIndex(update.LinkIndex) + if err != nil { + m.log.Warn("addr update: resolve interface", "index", update.LinkIndex, "err", err) + return + } + + m.log.Debug("handleAddrUpdate", "ifname", ifname) + raw, err := m.queryAddr("addr show dev " + ifname) + if err != nil { + if errors.Is(err, ipbatch.ErrBatchDead) { + m.requestRedump() + } + m.log.Error("handleAddrUpdate queryAddr failed", "ifname", ifname, "err", err) + return + } + + if !m.validateAddrData("handleAddrUpdate/"+ifname, raw) { + m.log.Error("handleAddrUpdate: REFUSING to store invalid addr data", "ifname", ifname) + return + } + + m.mu.Lock() + m.addrs = replaceByIfName(m.addrs, ifname, raw) + m.mu.Unlock() + + m.rebuild() +} + +func (m *NLMonitor) handleNeighUpdate(update netlink.NeighUpdate) { + if isBridgeFDB(update) { + bridgeName, ok := bridgeNameFromNeigh(update) + if !ok { + m.log.Warn("fdb update: bridge name not found", "link-index", update.LinkIndex) + return + } + + raw, err := m.queryBridge("fdb show br " + bridgeName) + if err != nil { + if errors.Is(err, bridgebatch.ErrBatchDead) { + m.requestRedump() + } + return + } + + m.mu.Lock() + m.fdb[bridgeName] = raw + m.mu.Unlock() + + m.rebuild() + return + } + + raw, err := m.queryNeigh("neigh show") + if err != nil { + if errors.Is(err, ipbatch.ErrBatchDead) { + m.requestRedump() + } + return + } + + m.mu.Lock() + m.neighs = raw + m.mu.Unlock() + + m.rebuild() +} + +func (m *NLMonitor) handleMDBUpdate() { + raw, err := m.queryBridge("mdb show") + if err != nil { + return + } + + m.mu.Lock() + for _, bridgeName := range mdbBridgeNames(raw) { + m.mdb[bridgeName] = filterByMDBBridge(raw, bridgeName) + } + m.mu.Unlock() + + m.rebuild() +} + +// removeInterface purges all staged data for the named interface and +// triggers a rebuild. Used for RTM_DELLINK events where querying the +// device via ip-batch would produce no output and hang the batch process. +func (m *NLMonitor) removeInterface(name string) { + m.mu.Lock() + m.links = replaceByIfName(m.links, name, json.RawMessage(`[]`)) + m.addrs = replaceByIfName(m.addrs, name, json.RawMessage(`[]`)) + delete(m.fdb, name) + delete(m.mdb, name) + delete(m.ethernet, name) + delete(m.wifi, name) + delete(m.wireguard, name) + delete(m.lastOperStatus, name) + m.mu.Unlock() + + m.log.Debug("removeInterface", "ifname", name) + m.rebuild() +} + +func (m *NLMonitor) requestRedump() { + select { + case m.redumpCh <- struct{}{}: + default: + } +} + +func (m *NLMonitor) refreshInterface(name string) { + linkRaw, err := m.queryLink("link show dev " + name) + if err != nil { + if errors.Is(err, ipbatch.ErrBatchDead) { + m.requestRedump() + } + return + } + + addrRaw, err := m.queryAddr("addr show dev " + name) + if err != nil { + addrRaw = nil + } + if addrRaw != nil && !m.validateAddrData("refreshInterface/"+name, addrRaw) { + m.log.Error("refreshInterface: REFUSING to store invalid addr data", "ifname", name) + addrRaw = nil + } + + m.mu.Lock() + m.updateOperStatus(name, linkRaw) + m.links = replaceByIfName(m.links, name, linkRaw) + if addrRaw != nil { + m.addrs = replaceByIfName(m.addrs, name, addrRaw) + } + m.mu.Unlock() + + m.rebuild() +} + +// rebuild runs iface.Transform on all staged data, merges augments +// (ethernet, wifi, bridge fdb/mdb), and stores the result. +// Caller must NOT hold m.mu. +func (m *NLMonitor) rebuild() { + m.mu.Lock() + linksCopy := append(json.RawMessage{}, m.links...) + addrsCopy := append(json.RawMessage{}, m.addrs...) + neighsCopy := append(json.RawMessage{}, m.neighs...) + doc := iface.Transform(linksCopy, addrsCopy, linksCopy, neighsCopy, m.fc) + eth := copyStringMap(m.ethernet) + wfi := copyStringMap(m.wifi) + fdb := copyStringMap(m.fdb) + mdb := copyStringMap(m.mdb) + wg := copyStringMap(m.wireguard) + m.mu.Unlock() + + var brSTP, ptSTP map[string]json.RawMessage + resolver := stpquery.NewLinksIfIndexResolver(linksCopy) + brSTP, ptSTP = stpquery.Query(linksCopy, resolver) + + doc = mergeAugments(doc, eth, wfi, fdb, mdb, brSTP, ptSTP, wg) + m.tree.Set(treeKey, doc) +} + +// RefreshSTP re-queries mstpd and rebuilds only when STP data changed. +// mstpd's control socket is request/response with no event channel, and +// the bridge-level root-id settles via BPDU exchange without any netlink +// event, so STP state must be polled to stay current. No bridges or an +// unchanged result costs one cheap mstpd query and no document re-marshal. +func (m *NLMonitor) RefreshSTP() { + links := m.Links() + resolver := stpquery.NewLinksIfIndexResolver(links) + brSTP, ptSTP := stpquery.Query(links, resolver) + if len(brSTP) == 0 && len(ptSTP) == 0 { + return + } + + fp := stpFingerprint(brSTP, ptSTP) + m.stpMu.Lock() + changed := fp != m.lastSTP + m.lastSTP = fp + m.stpMu.Unlock() + + if changed { + m.rebuild() + } +} + +func stpFingerprint(brSTP, ptSTP map[string]json.RawMessage) string { + var b strings.Builder + writeSortedRaw(&b, "b", brSTP) + writeSortedRaw(&b, "p", ptSTP) + return b.String() +} + +func writeSortedRaw(b *strings.Builder, prefix string, m map[string]json.RawMessage) { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + b.WriteString(prefix) + b.WriteByte(':') + b.WriteString(k) + b.WriteByte('=') + b.Write(m[k]) + b.WriteByte('\n') + } +} + +// mergeAugments adds ethernet, wifi, and bridge data into the +// complete ietf-interfaces document produced by iface.Transform(). +func mergeAugments(doc json.RawMessage, ethernet, wifi, fdb, mdb, bridgeSTP, portSTP, wireguard map[string]json.RawMessage) json.RawMessage { + if len(ethernet) == 0 && len(wifi) == 0 && len(fdb) == 0 && len(mdb) == 0 && len(bridgeSTP) == 0 && len(portSTP) == 0 && len(wireguard) == 0 { + return doc + } + + var root map[string]any + if err := json.Unmarshal(doc, &root); err != nil { + return doc + } + + ifaceList, ok := root["interface"] + if !ok { + return doc + } + ifaceArr, ok := ifaceList.([]any) + if !ok { + return doc + } + + for i, entry := range ifaceArr { + ifaceObj, ok := entry.(map[string]any) + if !ok { + continue + } + name, _ := ifaceObj["name"].(string) + if name == "" { + continue + } + + if ethData, ok := ethernet[name]; ok { + var wrapper map[string]json.RawMessage + if err := json.Unmarshal(ethData, &wrapper); err == nil { + if ethRaw, ok := wrapper["ethernet"]; ok { + var ethObj any + if err := json.Unmarshal(ethRaw, ðObj); err == nil { + ifaceObj["ieee802-ethernet-interface:ethernet"] = ethObj + } + } + if speedRaw, ok := wrapper["speed"]; ok { + var speed string + if err := json.Unmarshal(speedRaw, &speed); err == nil { + ifaceObj["speed"] = speed + } + } + } + } + + if wifiData, ok := wifi[name]; ok { + var wifiObj any + if err := json.Unmarshal(wifiData, &wifiObj); err == nil { + ifaceObj["infix-interfaces:wifi"] = wifiObj + } + } + + if fdbData, ok := fdb[name]; ok { + bridgeObj := ensureBridgeAugment(ifaceObj) + var fdbObj any + if err := json.Unmarshal(fdbData, &fdbObj); err == nil { + bridgeObj["fdb"] = fdbObj + } + } + + if mdbData, ok := mdb[name]; ok { + bridgeObj := ensureBridgeAugment(ifaceObj) + if mf := transformMDB(mdbData); mf != nil { + bridgeObj["multicast-filters"] = mf + } + } + + if stpData, ok := bridgeSTP[name]; ok { + bridgeObj := ensureBridgeAugment(ifaceObj) + var stpObj any + if err := json.Unmarshal(stpData, &stpObj); err == nil { + bridgeObj["stp"] = stpObj + } + } + + if stpData, ok := portSTP[name]; ok { + bpObj := ensureBridgePortAugment(ifaceObj) + var stpObj any + if err := json.Unmarshal(stpData, &stpObj); err == nil { + deepMergeSTP(bpObj, stpObj) + } + } + + if wgData, ok := wireguard[name]; ok { + var wgObj any + if err := json.Unmarshal(wgData, &wgObj); err == nil { + ifaceObj["infix-interfaces:wireguard"] = wgObj + } + } + + ifaceArr[i] = ifaceObj + } + + out, err := json.Marshal(root) + if err != nil { + return doc + } + return json.RawMessage(out) +} + +// ensureBridgeAugment returns the bridge augment object within an +// interface, creating it if necessary. +func ensureBridgeAugment(ifaceObj map[string]any) map[string]any { + key := "infix-interfaces:bridge" + if existing, ok := ifaceObj[key]; ok { + if m, ok := existing.(map[string]any); ok { + return m + } + } + bridgeObj := map[string]any{} + ifaceObj[key] = bridgeObj + return bridgeObj +} + +func ensureBridgePortAugment(ifaceObj map[string]any) map[string]any { + key := "infix-interfaces:bridge-port" + if existing, ok := ifaceObj[key]; ok { + if m, ok := existing.(map[string]any); ok { + return m + } + } + obj := map[string]any{} + ifaceObj[key] = obj + return obj +} + +// deepMergeSTP merges mstpd STP data into the bridge-port augment. +// The kernel already provides stp.cist.state via iface.Transform; +// mstpd adds role, port-id, designated, etc. We deep-merge to +// preserve the kernel state field while adding mstpd fields. +func deepMergeSTP(bpObj map[string]any, stpData any) { + stpMap, ok := stpData.(map[string]any) + if !ok { + return + } + + existing, _ := bpObj["stp"].(map[string]any) + if existing == nil { + bpObj["stp"] = stpMap + return + } + + if newCist, ok := stpMap["cist"].(map[string]any); ok { + existingCist, _ := existing["cist"].(map[string]any) + if existingCist == nil { + existing["cist"] = newCist + } else { + for k, v := range newCist { + existingCist[k] = v + } + } + } + + for k, v := range stpMap { + if k != "cist" { + existing[k] = v + } + } +} + +func copyStringMap(m map[string]json.RawMessage) map[string]json.RawMessage { + if len(m) == 0 { + return nil + } + cp := make(map[string]json.RawMessage, len(m)) + for k, v := range m { + cp[k] = v + } + return cp +} + +func (m *NLMonitor) updateOperStatus(ifname string, raw json.RawMessage) { + status, ok := extractOperStatus(raw) + if !ok { + return + } + + prev, had := m.lastOperStatus[ifname] + m.lastOperStatus[ifname] = status + if had && prev != status { + m.log.Info("oper-status transition", "ifname", ifname, "from", prev, "to", status) + } +} + +func (m *NLMonitor) queryLink(command string) (json.RawMessage, error) { + raw, err := m.linkBatch.Query(command) + if err != nil { + if errors.Is(err, ipbatch.ErrBatchDead) { + m.log.Warn("link batch dead", "command", command, "err", err) + return nil, err + } + m.log.Error("link batch query failed", "command", command, "err", err) + return nil, err + } + return raw, nil +} + +func (m *NLMonitor) queryAddr(command string) (json.RawMessage, error) { + raw, err := m.addrBatch.Query(command) + if err != nil { + if errors.Is(err, ipbatch.ErrBatchDead) { + m.log.Warn("addr batch dead", "command", command, "err", err) + return nil, err + } + m.log.Error("addr batch query failed", "command", command, "err", err) + return nil, err + } + return raw, nil +} + +func (m *NLMonitor) queryNeigh(command string) (json.RawMessage, error) { + raw, err := m.neighBatch.Query(command) + if err != nil { + if errors.Is(err, ipbatch.ErrBatchDead) { + m.log.Warn("neigh batch dead", "command", command, "err", err) + return nil, err + } + m.log.Error("neigh batch query failed", "command", command, "err", err) + return nil, err + } + return raw, nil +} + +func (m *NLMonitor) queryBridge(command string) (json.RawMessage, error) { + raw, err := m.brBatch.Query(command) + if err != nil { + if errors.Is(err, bridgebatch.ErrBatchDead) { + m.log.Warn("bridge batch dead", "command", command, "err", err) + return nil, err + } + m.log.Error("bridge batch query failed", "command", command, "err", err) + return nil, err + } + return raw, nil +} + +func (m *NLMonitor) subscribeBridgeMDB(ctx context.Context, ch chan<- struct{}, errorCallback func(error)) error { + sock, err := nl.Subscribe(syscall.NETLINK_ROUTE, 26) + if err != nil { + return err + } + + go func() { + defer close(ch) + defer sock.Close() + + for { + select { + case <-ctx.Done(): + return + default: + } + + msgs, _, err := sock.Receive() + if err != nil { + if ctx.Err() != nil { + return + } + errorCallback(err) + return + } + if len(msgs) == 0 { + continue + } + + select { + case ch <- struct{}{}: + default: + } + } + }() + + return nil +} + +func ifNameByIndex(index int) (string, error) { + iface, err := net.InterfaceByIndex(index) + if err != nil { + return "", err + } + return iface.Name, nil +} + +func linkNameFromUpdate(update netlink.LinkUpdate) (string, bool) { + if update.Link != nil && update.Link.Attrs() != nil && update.Link.Attrs().Name != "" { + return update.Link.Attrs().Name, true + } + if update.Index <= 0 { + return "", false + } + name, err := ifNameByIndex(int(update.Index)) + if err != nil { + return "", false + } + return name, true +} + +func isBridgeFDB(update netlink.NeighUpdate) bool { + if update.Family == syscall.AF_BRIDGE { + return true + } + if update.MasterIndex > 0 { + return true + } + if update.Flags&netlink.NTF_MASTER != 0 { + return true + } + return false +} + +func bridgeNameFromNeigh(update netlink.NeighUpdate) (string, bool) { + if update.MasterIndex > 0 { + name, err := ifNameByIndex(update.MasterIndex) + if err == nil { + return name, true + } + } + + if update.LinkIndex <= 0 { + return "", false + } + link, err := netlink.LinkByIndex(update.LinkIndex) + if err == nil && link != nil && link.Attrs() != nil && link.Attrs().MasterIndex > 0 { + name, err := ifNameByIndex(link.Attrs().MasterIndex) + if err == nil { + return name, true + } + } + return "", false +} + +// validateAddrData checks whether a JSON response from "addr show" contains +// addr_info entries. "ip -json addr show" always includes an "addr_info" +// array for every interface object; its absence means we got link-format +// data instead. Returns true if the data looks valid (has addr_info). +func (m *NLMonitor) validateAddrData(caller string, raw json.RawMessage) bool { + if len(raw) == 0 { + m.log.Error("addr data is EMPTY", "caller", caller) + return false + } + + var rows []map[string]json.RawMessage + if err := json.Unmarshal(raw, &rows); err != nil { + m.log.Error("addr data unmarshal failed", "caller", caller, "err", err, "raw", string(raw)) + return false + } + + if len(rows) == 0 { + // Empty array is valid — interface exists but has no addresses. + return true + } + + for _, row := range rows { + ifnRaw, _ := row["ifname"] + var ifn string + json.Unmarshal(ifnRaw, &ifn) + + if _, ok := row["addr_info"]; !ok { + m.log.Error("addr data MISSING addr_info — got link-format data", + "caller", caller, + "ifname", ifn, + "keys", mapKeys(row), + "raw", string(raw), + ) + return false + } + } + return true +} + +// mapKeys returns the JSON object keys from a map for diagnostic logging. +func mapKeys(m map[string]json.RawMessage) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +} + +func extractOperStatus(raw json.RawMessage) (string, bool) { + var rows []map[string]json.RawMessage + if err := json.Unmarshal(raw, &rows); err != nil || len(rows) == 0 { + return "", false + } + stateRaw, ok := rows[0]["operstate"] + if !ok { + return "", false + } + var state string + if err := json.Unmarshal(stateRaw, &state); err != nil || state == "" { + return "", false + } + return state, true +} + +func interfaceNames(raw json.RawMessage) []string { + var rows []map[string]json.RawMessage + if err := json.Unmarshal(raw, &rows); err != nil { + return nil + } + + names := make([]string, 0, len(rows)) + seen := make(map[string]struct{}, len(rows)) + for _, row := range rows { + ifnRaw, ok := row["ifname"] + if !ok { + continue + } + var ifname string + if err := json.Unmarshal(ifnRaw, &ifname); err != nil || ifname == "" { + continue + } + if _, ok := seen[ifname]; ok { + continue + } + seen[ifname] = struct{}{} + names = append(names, ifname) + } + return names +} + +func filterByIfName(raw json.RawMessage, ifname string) json.RawMessage { + var rows []map[string]json.RawMessage + if err := json.Unmarshal(raw, &rows); err != nil { + return json.RawMessage(`[]`) + } + + filtered := make([]map[string]json.RawMessage, 0, 1) + for _, row := range rows { + ifnRaw, ok := row["ifname"] + if !ok { + continue + } + var name string + if err := json.Unmarshal(ifnRaw, &name); err != nil { + continue + } + if name == ifname { + filtered = append(filtered, row) + } + } + + out, err := json.Marshal(filtered) + if err != nil { + return json.RawMessage(`[]`) + } + return json.RawMessage(out) +} + +// replaceByIfName replaces all entries for ifname in the bulk array +// with entries from perIface, and returns the updated full array. +func replaceByIfName(bulk json.RawMessage, ifname string, perIface json.RawMessage) json.RawMessage { + var bulkRows []json.RawMessage + if err := json.Unmarshal(bulk, &bulkRows); err != nil { + return perIface + } + + kept := make([]json.RawMessage, 0, len(bulkRows)) + for _, row := range bulkRows { + var obj map[string]json.RawMessage + if err := json.Unmarshal(row, &obj); err != nil { + kept = append(kept, row) + continue + } + ifnRaw, ok := obj["ifname"] + if !ok { + kept = append(kept, row) + continue + } + var name string + if err := json.Unmarshal(ifnRaw, &name); err != nil || name != ifname { + kept = append(kept, row) + } + } + + var newRows []json.RawMessage + if err := json.Unmarshal(perIface, &newRows); err == nil { + kept = append(kept, newRows...) + } + + out, err := json.Marshal(kept) + if err != nil { + return bulk + } + return json.RawMessage(out) +} + +func bridgeNames(raw json.RawMessage) []string { + var rows []map[string]json.RawMessage + if err := json.Unmarshal(raw, &rows); err != nil { + return nil + } + + names := make([]string, 0, len(rows)) + seen := make(map[string]struct{}, len(rows)) + for _, row := range rows { + brRaw, ok := row["br"] + if !ok { + continue + } + var name string + if err := json.Unmarshal(brRaw, &name); err != nil || name == "" { + continue + } + if _, ok := seen[name]; ok { + continue + } + seen[name] = struct{}{} + names = append(names, name) + } + return names +} + +func filterByBridge(raw json.RawMessage, bridgeName string) json.RawMessage { + var rows []map[string]json.RawMessage + if err := json.Unmarshal(raw, &rows); err != nil { + return json.RawMessage(`[]`) + } + + filtered := make([]map[string]json.RawMessage, 0, 1) + for _, row := range rows { + brRaw, ok := row["br"] + if !ok { + continue + } + var br string + if err := json.Unmarshal(brRaw, &br); err != nil { + continue + } + if br == bridgeName { + filtered = append(filtered, row) + } + } + + out, err := json.Marshal(filtered) + if err != nil { + return json.RawMessage(`[]`) + } + return json.RawMessage(out) +} + +// parseMDBEntries extracts the flat list of MDB entries from the +// bridge batch output format: [{"mdb":[{entries...}],"router":{}}] +func parseMDBEntries(raw json.RawMessage) []map[string]any { + var wrappers []map[string]json.RawMessage + if err := json.Unmarshal(raw, &wrappers); err != nil { + return nil + } + + var all []map[string]any + for _, w := range wrappers { + mdbRaw, ok := w["mdb"] + if !ok { + continue + } + var entries []map[string]any + if err := json.Unmarshal(mdbRaw, &entries); err != nil { + continue + } + all = append(all, entries...) + } + return all +} + +func mdbBridgeNames(raw json.RawMessage) []string { + entries := parseMDBEntries(raw) + seen := make(map[string]struct{}, len(entries)) + var names []string + for _, e := range entries { + name, _ := e["dev"].(string) + if name == "" { + continue + } + if _, ok := seen[name]; ok { + continue + } + seen[name] = struct{}{} + names = append(names, name) + } + return names +} + +func filterByMDBBridge(raw json.RawMessage, bridgeName string) json.RawMessage { + entries := parseMDBEntries(raw) + var filtered []map[string]any + for _, e := range entries { + if dev, _ := e["dev"].(string); dev == bridgeName { + filtered = append(filtered, e) + } + } + if len(filtered) == 0 { + return json.RawMessage(`[]`) + } + out, err := json.Marshal(filtered) + if err != nil { + return json.RawMessage(`[]`) + } + return json.RawMessage(out) +} + +func transformMDB(raw json.RawMessage) map[string]any { + var entries []map[string]any + if err := json.Unmarshal(raw, &entries); err != nil || len(entries) == 0 { + return nil + } + + type portEntry struct { + Port string `json:"port"` + State string `json:"state"` + } + + groups := make(map[string][]portEntry) + var order []string + + for _, e := range entries { + grp, _ := e["grp"].(string) + port, _ := e["port"].(string) + state, _ := e["state"].(string) + if grp == "" || port == "" { + continue + } + + if _, seen := groups[grp]; !seen { + order = append(order, grp) + } + groups[grp] = append(groups[grp], portEntry{ + Port: port, + State: mdbStateToYANG(state), + }) + } + + if len(groups) == 0 { + return nil + } + + filters := make([]map[string]any, 0, len(order)) + for _, grp := range order { + filters = append(filters, map[string]any{ + "group": grp, + "ports": groups[grp], + }) + } + + return map[string]any{"multicast-filter": filters} +} + +func mdbStateToYANG(state string) string { + switch state { + case "temp": + return "temporary" + case "permanent": + return "permanent" + default: + return state + } +} diff --git a/src/yangerd/internal/monitor/monitor_test.go b/src/yangerd/internal/monitor/monitor_test.go new file mode 100644 index 000000000..bd2de1ef5 --- /dev/null +++ b/src/yangerd/internal/monitor/monitor_test.go @@ -0,0 +1,458 @@ +package monitor + +import ( + "encoding/json" + "reflect" + "syscall" + "testing" + + "github.com/vishvananda/netlink" +) + +func TestExtractOperStatus(t *testing.T) { + tests := []struct { + name string + raw json.RawMessage + want string + wantOK bool + }{ + { + name: "valid single entry", + raw: json.RawMessage(`[{"operstate":"UP"}]`), + want: "UP", + wantOK: true, + }, + { + name: "multiple entries first wins", + raw: json.RawMessage(`[{"operstate":"DOWN"},{"operstate":"UP"}]`), + want: "DOWN", + wantOK: true, + }, + { + name: "missing operstate", + raw: json.RawMessage(`[{"ifname":"eth0"}]`), + want: "", + wantOK: false, + }, + { + name: "empty array", + raw: json.RawMessage(`[]`), + want: "", + wantOK: false, + }, + { + name: "invalid json", + raw: json.RawMessage(`{`), + want: "", + wantOK: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := extractOperStatus(tt.raw) + if got != tt.want || ok != tt.wantOK { + t.Fatalf("extractOperStatus() = (%q, %v), want (%q, %v)", got, ok, tt.want, tt.wantOK) + } + }) + } +} + +func TestInterfaceNames(t *testing.T) { + tests := []struct { + name string + raw json.RawMessage + want []string + }{ + { + name: "deduplicate and keep order", + raw: json.RawMessage(`[ + {"ifname":"eth0"}, + {"ifname":"eth1"}, + {"ifname":"eth0"}, + {"x":"y"} + ]`), + want: []string{"eth0", "eth1"}, + }, + { + name: "empty array", + raw: json.RawMessage(`[]`), + want: []string{}, + }, + { + name: "objects without ifname skipped", + raw: json.RawMessage(`[{"name":"eth0"},{"foo":"bar"}]`), + want: []string{}, + }, + { + name: "invalid json", + raw: json.RawMessage(`{`), + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := interfaceNames(tt.raw) + if !reflect.DeepEqual(got, tt.want) { + t.Fatalf("interfaceNames() = %#v, want %#v", got, tt.want) + } + }) + } +} + +func TestFilterByIfName(t *testing.T) { + tests := []struct { + name string + raw json.RawMessage + ifname string + wantCount int + wantEmpty bool + }{ + { + name: "filters correctly", + raw: json.RawMessage(`[ + {"ifname":"eth0","x":1}, + {"ifname":"eth1","x":2}, + {"ifname":"eth0","x":3} + ]`), + ifname: "eth0", + wantCount: 2, + }, + { + name: "no matches returns empty array", + raw: json.RawMessage(`[{"ifname":"eth1"}]`), + ifname: "eth0", + wantCount: 0, + wantEmpty: true, + }, + { + name: "invalid json", + raw: json.RawMessage(`{`), + ifname: "eth0", + wantCount: 0, + wantEmpty: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := filterByIfName(tt.raw, tt.ifname) + + var rows []map[string]json.RawMessage + if err := json.Unmarshal(got, &rows); err != nil { + t.Fatalf("unmarshal filtered rows: %v", err) + } + if len(rows) != tt.wantCount { + t.Fatalf("row count = %d, want %d", len(rows), tt.wantCount) + } + if tt.wantEmpty && string(got) != "[]" { + t.Fatalf("expected [] got %s", string(got)) + } + }) + } +} + +func TestBridgeNames(t *testing.T) { + raw := json.RawMessage(`[ + {"br":"br0"}, + {"br":"br1"}, + {"br":"br0"}, + {"ifname":"eth0"} + ]`) + + got := bridgeNames(raw) + want := []string{"br0", "br1"} + + if !reflect.DeepEqual(got, want) { + t.Fatalf("bridgeNames() = %#v, want %#v", got, want) + } +} + +func TestFilterByBridge(t *testing.T) { + tests := []struct { + name string + raw json.RawMessage + bridge string + wantCount int + wantEmpty bool + }{ + { + name: "filters correctly", + raw: json.RawMessage(`[ + {"br":"br0","grp":"239.1.1.1"}, + {"br":"br1","grp":"239.1.1.2"}, + {"br":"br0","grp":"239.1.1.3"} + ]`), + bridge: "br0", + wantCount: 2, + }, + { + name: "no matches returns empty array", + raw: json.RawMessage(`[{"br":"br1"}]`), + bridge: "br0", + wantCount: 0, + wantEmpty: true, + }, + { + name: "invalid json", + raw: json.RawMessage(`{`), + bridge: "br0", + wantCount: 0, + wantEmpty: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := filterByBridge(tt.raw, tt.bridge) + + var rows []map[string]json.RawMessage + if err := json.Unmarshal(got, &rows); err != nil { + t.Fatalf("unmarshal filtered rows: %v", err) + } + if len(rows) != tt.wantCount { + t.Fatalf("row count = %d, want %d", len(rows), tt.wantCount) + } + if tt.wantEmpty && string(got) != "[]" { + t.Fatalf("expected [] got %s", string(got)) + } + }) + } +} + +func TestIsBridgeFDB(t *testing.T) { + tests := []struct { + name string + update netlink.NeighUpdate + want bool + }{ + { + name: "bridge family", + update: netlink.NeighUpdate{Neigh: netlink.Neigh{Family: syscall.AF_BRIDGE}}, + want: true, + }, + { + name: "master index set", + update: netlink.NeighUpdate{Neigh: netlink.Neigh{MasterIndex: 10}}, + want: true, + }, + { + name: "master flag set", + update: netlink.NeighUpdate{Neigh: netlink.Neigh{Flags: netlink.NTF_MASTER}}, + want: true, + }, + { + name: "non-bridge", + update: netlink.NeighUpdate{}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isBridgeFDB(tt.update); got != tt.want { + t.Fatalf("isBridgeFDB() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestReplaceByIfName(t *testing.T) { + tests := []struct { + name string + bulk json.RawMessage + ifname string + perIface json.RawMessage + want int + }{ + { + name: "replace existing", + bulk: json.RawMessage(`[{"ifname":"eth0","x":1},{"ifname":"eth1","x":2}]`), + ifname: "eth0", + perIface: json.RawMessage(`[{"ifname":"eth0","x":99}]`), + want: 2, + }, + { + name: "add new", + bulk: json.RawMessage(`[{"ifname":"eth0","x":1}]`), + ifname: "eth1", + perIface: json.RawMessage(`[{"ifname":"eth1","x":2}]`), + want: 2, + }, + { + name: "empty bulk", + bulk: json.RawMessage(`[]`), + ifname: "eth0", + perIface: json.RawMessage(`[{"ifname":"eth0","x":1}]`), + want: 1, + }, + { + name: "invalid bulk", + bulk: json.RawMessage(`{`), + ifname: "eth0", + perIface: json.RawMessage(`[{"ifname":"eth0","x":1}]`), + want: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := replaceByIfName(tt.bulk, tt.ifname, tt.perIface) + var rows []json.RawMessage + if err := json.Unmarshal(got, &rows); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if len(rows) != tt.want { + t.Fatalf("row count = %d, want %d (raw: %s)", len(rows), tt.want, string(got)) + } + }) + } +} + +func TestReplaceByIfNamePreservesUpdatedData(t *testing.T) { + bulk := json.RawMessage(`[{"ifname":"eth0","x":1},{"ifname":"eth1","x":2}]`) + updated := replaceByIfName(bulk, "eth0", json.RawMessage(`[{"ifname":"eth0","x":99}]`)) + + var rows []map[string]json.RawMessage + if err := json.Unmarshal(updated, &rows); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + for _, row := range rows { + var name string + json.Unmarshal(row["ifname"], &name) + if name == "eth0" { + var x int + json.Unmarshal(row["x"], &x) + if x != 99 { + t.Fatalf("eth0.x = %d, want 99", x) + } + return + } + } + t.Fatal("eth0 not found in result") +} + +func TestMergeAugments(t *testing.T) { + doc := json.RawMessage(`{"interface":[{"name":"eth0","type":"infix-if-type:ethernet"},{"name":"br0","type":"infix-if-type:bridge"}]}`) + + eth := map[string]json.RawMessage{ + "eth0": json.RawMessage(`{"ethernet":{"speed":"1.000","duplex":"full"},"speed":"1000000000"}`), + } + fdb := map[string]json.RawMessage{ + "br0": json.RawMessage(`[{"mac":"00:11:22:33:44:55"}]`), + } + + got := mergeAugments(doc, eth, nil, fdb, nil, nil, nil, nil) + + var root map[string]any + if err := json.Unmarshal(got, &root); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + ifaces := root["interface"].([]any) + eth0 := ifaces[0].(map[string]any) + if _, ok := eth0["ieee802-ethernet-interface:ethernet"]; !ok { + t.Fatal("ethernet augment not merged into eth0") + } + + br0 := ifaces[1].(map[string]any) + bridge, ok := br0["infix-interfaces:bridge"] + if !ok { + t.Fatal("bridge augment not created for br0") + } + bridgeMap := bridge.(map[string]any) + if _, ok := bridgeMap["fdb"]; !ok { + t.Fatal("fdb not merged into bridge augment") + } +} + +func TestMergeAugmentsNoOp(t *testing.T) { + doc := json.RawMessage(`{"interface":[{"name":"lo"}]}`) + got := mergeAugments(doc, nil, nil, nil, nil, nil, nil, nil) + if string(got) != string(doc) { + t.Fatalf("expected no-op, got %s", string(got)) + } +} + +func TestMergeAugmentsInvalidDoc(t *testing.T) { + doc := json.RawMessage(`{invalid`) + eth := map[string]json.RawMessage{"eth0": json.RawMessage(`{}`)} + got := mergeAugments(doc, eth, nil, nil, nil, nil, nil, nil) + if string(got) != string(doc) { + t.Fatalf("expected passthrough on invalid doc, got %s", string(got)) + } +} + +func TestTreeKey(t *testing.T) { + if treeKey != "ietf-interfaces:interfaces" { + t.Fatalf("treeKey = %q, want %q", treeKey, "ietf-interfaces:interfaces") + } +} + +func TestTransformMDB(t *testing.T) { + // transformMDB receives the output of filterByMDBBridge: a flat array of entries + raw := json.RawMessage(`[{"dev":"br0","port":"e3","grp":"224.1.1.1","state":"temp"},{"dev":"br0","port":"e4","grp":"224.1.1.1","state":"permanent"},{"dev":"br0","port":"e3","grp":"ff02::6a","state":"temp"}]`) + + result := transformMDB(raw) + if result == nil { + t.Fatal("expected non-nil result") + } + + filters, ok := result["multicast-filter"].([]map[string]any) + if !ok { + t.Fatalf("unexpected type: %T", result["multicast-filter"]) + } + if len(filters) != 2 { + t.Fatalf("expected 2 filters, got %d", len(filters)) + } + + if filters[0]["group"] != "224.1.1.1" { + t.Fatalf("unexpected group: %v", filters[0]["group"]) + } + + out, _ := json.Marshal(result) + if !json.Valid(out) { + t.Fatalf("invalid JSON: %s", out) + } +} + +func TestTransformMDBEmpty(t *testing.T) { + if transformMDB(json.RawMessage(`[]`)) != nil { + t.Fatal("expected nil for empty") + } + if transformMDB(json.RawMessage(`[{"dev":"br0","port":"br0","grp":"ff02::6a","state":"temp"}]`)) == nil { + t.Fatal("expected non-nil for router-only entry") + } +} + +func TestMDBBridgeNames(t *testing.T) { + raw := json.RawMessage(`[{"mdb":[{"dev":"br0","port":"e3","grp":"224.1.1.1","state":"temp"}],"router":{}},{"mdb":[{"dev":"br1","port":"e5","grp":"ff02::1","state":"temp"}],"router":{}}]`) + names := mdbBridgeNames(raw) + if len(names) != 2 || names[0] != "br0" || names[1] != "br1" { + t.Fatalf("unexpected names: %v", names) + } +} + +func TestSTPFingerprintDeterministic(t *testing.T) { + br1 := map[string]json.RawMessage{ + "br0": json.RawMessage(`{"root-id":"1.000.00:a0:85:00:01:00"}`), + "br1": json.RawMessage(`{"root-id":"2.000.00:a0:85:00:02:00"}`), + } + br2 := map[string]json.RawMessage{ + "br1": json.RawMessage(`{"root-id":"2.000.00:a0:85:00:02:00"}`), + "br0": json.RawMessage(`{"root-id":"1.000.00:a0:85:00:01:00"}`), + } + pt1 := map[string]json.RawMessage{"e1": json.RawMessage(`{"state":"forwarding"}`)} + pt2 := map[string]json.RawMessage{"e1": json.RawMessage(`{"state":"forwarding"}`)} + + if stpFingerprint(br1, pt1) != stpFingerprint(br2, pt2) { + t.Fatal("fingerprint must be independent of map iteration order") + } + + br2["br0"] = json.RawMessage(`{"root-id":"8.000.00:a0:85:00:01:00"}`) + if stpFingerprint(br1, pt1) == stpFingerprint(br2, pt2) { + t.Fatal("fingerprint must change when STP data changes") + } +} diff --git a/src/yangerd/internal/nl80211/nl80211.go b/src/yangerd/internal/nl80211/nl80211.go new file mode 100644 index 000000000..18ca885bb --- /dev/null +++ b/src/yangerd/internal/nl80211/nl80211.go @@ -0,0 +1,655 @@ +package nl80211 + +import ( + "encoding/binary" + "fmt" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + + "github.com/mdlayher/genetlink" + "github.com/mdlayher/netlink" + "golang.org/x/sys/unix" +) + +type Client struct { + conn *genetlink.Conn + family genetlink.Family +} + +func Dial() (*Client, error) { + conn, err := genetlink.Dial(nil) + if err != nil { + return nil, fmt.Errorf("dial genetlink: %w", err) + } + + family, err := conn.GetFamily("nl80211") + if err != nil { + _ = conn.Close() + return nil, fmt.Errorf("resolve nl80211 family: %w", err) + } + + return &Client{conn: conn, family: family}, nil +} + +func (c *Client) Close() error { + if c == nil || c.conn == nil { + return nil + } + return c.conn.Close() +} + +func (c *Client) ListPhys() ([]string, error) { + msgs, err := c.execute(unix.NL80211_CMD_GET_WIPHY, nil, netlink.Request|netlink.Dump) + if err != nil { + return nil, err + } + + set := make(map[string]bool) + for _, msg := range msgs { + attrs, err := netlink.NewAttributeDecoder(msg.Data) + if err != nil { + continue + } + for attrs.Next() { + if attrs.Type() != unix.NL80211_ATTR_WIPHY_NAME { + continue + } + name := attrs.String() + if name != "" { + set[name] = true + } + } + if err := attrs.Err(); err != nil { + continue + } + } + + out := make([]string, 0, len(set)) + for name := range set { + out = append(out, name) + } + sort.Strings(out) + + return out, nil +} + +func (c *Client) PhyInterfaces() (map[string][]string, error) { + msgs, err := c.execute(unix.NL80211_CMD_GET_INTERFACE, nil, netlink.Request|netlink.Dump) + if err != nil { + return nil, err + } + + out := make(map[string][]string) + for _, msg := range msgs { + ad, err := netlink.NewAttributeDecoder(msg.Data) + if err != nil { + continue + } + + phyIdx := -1 + ifname := "" + + for ad.Next() { + switch ad.Type() { + case unix.NL80211_ATTR_WIPHY: + phyIdx = int(ad.Uint32()) + case unix.NL80211_ATTR_IFNAME: + ifname = ad.String() + } + } + if err := ad.Err(); err != nil { + continue + } + if phyIdx < 0 || ifname == "" { + continue + } + + k := strconv.Itoa(phyIdx) + out[k] = append(out[k], ifname) + } + + for k := range out { + sort.Strings(out[k]) + } + + return out, nil +} + +func (c *Client) PhyInfo(phyName string) (map[string]interface{}, error) { + ae := netlink.NewAttributeEncoder() + ae.Flag(unix.NL80211_ATTR_SPLIT_WIPHY_DUMP, true) + req, _ := ae.Encode() + msgs, err := c.execute(unix.NL80211_CMD_GET_WIPHY, req, netlink.Request|netlink.Dump) + if err != nil { + return nil, err + } + + // Collect all messages belonging to the target PHY. The kernel + // identifies fragments by repeating NL80211_ATTR_WIPHY (index) + // or NL80211_ATTR_WIPHY_NAME in each fragment. + targetIdx := -1 + var phyMsgs [][]byte + for _, msg := range msgs { + idx, name := parseWiphyIdent(msg.Data) + if name == phyName { + targetIdx = idx + phyMsgs = append(phyMsgs, msg.Data) + } else if targetIdx >= 0 && idx == targetIdx { + phyMsgs = append(phyMsgs, msg.Data) + } + } + if len(phyMsgs) == 0 { + return nil, fmt.Errorf("phy %q not found", phyName) + } + + info := map[string]interface{}{ + "bands": []interface{}{}, + "driver": readDriver(phyName), + "manufacturer": readManufacturer(phyName), + "interface_combinations": []interface{}{}, + "max_txpower": 0, + "num_virtual_interfaces": 0, + } + + phyIdx := -1 + bandMap := make(map[uint16]*bandInfo) + + for _, data := range phyMsgs { + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + continue + } + for ad.Next() { + switch ad.Type() { + case unix.NL80211_ATTR_WIPHY: + phyIdx = int(ad.Uint32()) + case unix.NL80211_ATTR_WIPHY_BANDS: + mergeBands(bandMap, ad.Bytes()) + case unix.NL80211_ATTR_INTERFACE_COMBINATIONS: + if combs := parseInterfaceCombinations(ad.Bytes()); len(combs) > 0 { + info["interface_combinations"] = combs + } + case unix.NL80211_ATTR_WIPHY_TX_POWER_LEVEL: + info["max_txpower"] = int(ad.Uint32() / 100) + } + } + } + if bands := finalizeBands(bandMap); len(bands) > 0 { + info["bands"] = bands + } + + if phyIdx >= 0 { + ifs, err := c.PhyInterfaces() + if err == nil { + info["num_virtual_interfaces"] = len(ifs[strconv.Itoa(phyIdx)]) + } + } + + return info, nil +} + +func (c *Client) Survey(ifindex int) ([]map[string]interface{}, error) { + ae := netlink.NewAttributeEncoder() + ae.Uint32(unix.NL80211_ATTR_IFINDEX, uint32(ifindex)) + req, err := ae.Encode() + if err != nil { + return nil, fmt.Errorf("encode get_survey request: %w", err) + } + + msgs, err := c.execute(unix.NL80211_CMD_GET_SURVEY, req, netlink.Request|netlink.Dump) + if err != nil { + return nil, err + } + + out := make([]map[string]interface{}, 0) + for _, msg := range msgs { + ad, err := netlink.NewAttributeDecoder(msg.Data) + if err != nil { + continue + } + for ad.Next() { + if ad.Type() != unix.NL80211_ATTR_SURVEY_INFO { + continue + } + entry := parseSurveyEntry(ad.Bytes()) + if entry != nil { + out = append(out, entry) + } + } + if err := ad.Err(); err != nil { + continue + } + } + + return out, nil +} + +func (c *Client) execute(cmd uint8, data []byte, flags netlink.HeaderFlags) ([]genetlink.Message, error) { + msgs, err := c.conn.Execute( + genetlink.Message{Header: genetlink.Header{Command: cmd, Version: c.family.Version}, Data: data}, + c.family.ID, + flags, + ) + if err != nil { + return nil, fmt.Errorf("nl80211 command %d: %w", cmd, err) + } + + return msgs, nil +} + +func parseWiphyIdent(data []byte) (int, string) { + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return -1, "" + } + idx := -1 + name := "" + for ad.Next() { + switch ad.Type() { + case unix.NL80211_ATTR_WIPHY: + idx = int(ad.Uint32()) + case unix.NL80211_ATTR_WIPHY_NAME: + name = ad.String() + } + } + return idx, name +} + +type bandInfo struct { + frequencies []interface{} + htCapable bool + vhtCapable bool + heCapable bool +} + +func mergeBands(m map[uint16]*bandInfo, data []byte) { + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return + } + for ad.Next() { + bandType := ad.Type() + bi, ok := m[bandType] + if !ok { + bi = &bandInfo{} + m[bandType] = bi + } + mergeBandAttrs(bi, ad.Bytes()) + } +} + +func mergeBandAttrs(bi *bandInfo, data []byte) { + nad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return + } + for nad.Next() { + switch nad.Type() { + case unix.NL80211_BAND_ATTR_FREQS: + if freqs := parseBandFrequencies(nad.Bytes()); len(freqs) > 0 { + bi.frequencies = freqs + } + case unix.NL80211_BAND_ATTR_HT_CAPA: + if nad.Uint16() != 0 { + bi.htCapable = true + } + case unix.NL80211_BAND_ATTR_VHT_CAPA: + if nad.Uint32() != 0 { + bi.vhtCapable = true + } + case unix.NL80211_BAND_ATTR_IFTYPE_DATA: + if len(nad.Bytes()) > 0 { + bi.heCapable = true + } + } + } +} + +func finalizeBands(m map[uint16]*bandInfo) []interface{} { + keys := make([]int, 0, len(m)) + for k := range m { + keys = append(keys, int(k)) + } + sort.Ints(keys) + + out := make([]interface{}, 0, len(keys)) + for _, k := range keys { + bi := m[uint16(k)] + if len(bi.frequencies) == 0 && !bi.htCapable && !bi.vhtCapable && !bi.heCapable { + continue + } + out = append(out, map[string]interface{}{ + "band": k, + "name": detectBandName(bi.frequencies), + "ht_capable": bi.htCapable, + "vht_capable": bi.vhtCapable, + "he_capable": bi.heCapable, + "frequencies": bi.frequencies, + }) + } + return out +} + +func parseBandFrequencies(data []byte) []interface{} { + out := make([]interface{}, 0) + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return out + } + + for ad.Next() { + freq, ok := parseFrequencyEntry(ad.Bytes()) + if ok { + out = append(out, freq) + } + } + + return out +} + +func parseFrequencyEntry(data []byte) (int, bool) { + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return 0, false + } + + freq := 0 + disabled := false + + for ad.Next() { + switch ad.Type() { + case unix.NL80211_FREQUENCY_ATTR_FREQ: + freq = int(ad.Uint32()) + case unix.NL80211_FREQUENCY_ATTR_DISABLED: + disabled = true + } + } + + if disabled || freq == 0 { + return 0, false + } + + return freq, true +} + +func detectBandName(freqs []interface{}) string { + has24 := false + has5 := false + has6 := false + + for _, f := range freqs { + freq, ok := f.(int) + if !ok { + continue + } + switch { + case freq >= 2400 && freq <= 2500: + has24 = true + case freq >= 5000 && freq <= 5900: + has5 = true + case freq >= 5925 && freq <= 7125: + has6 = true + } + } + + switch { + case has24: + return "2.4 GHz" + case has5: + return "5 GHz" + case has6: + return "6 GHz" + default: + return "Unknown" + } +} + +func parseInterfaceCombinations(data []byte) []interface{} { + out := make([]interface{}, 0) + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return out + } + + for ad.Next() { + comb := parseInterfaceCombination(ad.Bytes()) + if comb != nil { + out = append(out, comb) + } + } + + return out +} + +func parseInterfaceCombination(data []byte) map[string]interface{} { + limits := make([]interface{}, 0) + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return nil + } + + for ad.Next() { + if ad.Type() != unix.NL80211_IFACE_COMB_LIMITS { + continue + } + limits = parseInterfaceLimits(ad.Bytes()) + } + + if len(limits) == 0 { + return nil + } + + return map[string]interface{}{"limits": limits} +} + +func parseInterfaceLimits(data []byte) []interface{} { + out := make([]interface{}, 0) + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return out + } + + for ad.Next() { + entry := parseInterfaceLimitEntry(ad.Bytes()) + if entry != nil { + out = append(out, entry) + } + } + + return out +} + +func parseInterfaceLimitEntry(data []byte) map[string]interface{} { + max := 0 + types := make([]interface{}, 0) + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return nil + } + + for ad.Next() { + switch ad.Type() { + case unix.NL80211_IFACE_LIMIT_MAX: + max = int(ad.Uint32()) + case unix.NL80211_IFACE_LIMIT_TYPES: + types = parseIfaceLimitTypes(ad.Bytes()) + } + } + + if max == 0 || len(types) == 0 { + return nil + } + + return map[string]interface{}{ + "max": max, + "types": types, + } +} + +func parseIfaceLimitTypes(data []byte) []interface{} { + out := make([]interface{}, 0) + seen := make(map[string]bool) + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return out + } + + for ad.Next() { + if len(ad.Bytes()) == 4 { + iftype := int(ad.Uint32()) + if s, ok := iftypeName(iftype); ok { + if !seen[s] { + seen[s] = true + out = append(out, s) + } + } + continue + } + + iftype := int(ad.Type()) + if s, ok := iftypeName(iftype); ok { + if !seen[s] { + seen[s] = true + out = append(out, s) + } + } + } + + sort.Slice(out, func(i, j int) bool { + return out[i].(string) < out[j].(string) + }) + + return out +} + +func iftypeName(v int) (string, bool) { + switch v { + case 0: + return "unspecified", true + case 1: + return "adhoc", true + case 2: + return "station", true + case 3: + return "AP", true + case 4: + return "AP_VLAN", true + case 5: + return "WDS", true + case 6: + return "monitor", true + case 7: + return "mesh_point", true + case 8: + return "P2P_client", true + case 9: + return "P2P_GO", true + case 10: + return "P2P_device", true + default: + return "", false + } +} + +func parseSurveyEntry(data []byte) map[string]interface{} { + entry := map[string]interface{}{ + "frequency": 0, + "noise": 0, + "in_use": false, + "active_time": 0, + "busy_time": 0, + "receive_time": 0, + "transmit_time": 0, + } + + ad, err := netlink.NewAttributeDecoder(data) + if err != nil { + return nil + } + + hasFrequency := false + + for ad.Next() { + switch ad.Type() { + case unix.NL80211_SURVEY_INFO_FREQUENCY: + entry["frequency"] = int(readUint(ad.Bytes())) + hasFrequency = true + case unix.NL80211_SURVEY_INFO_NOISE: + entry["noise"] = int(ad.Int8()) + case unix.NL80211_SURVEY_INFO_IN_USE: + entry["in_use"] = true + case unix.NL80211_SURVEY_INFO_TIME: + entry["active_time"] = int(readUint(ad.Bytes())) + case unix.NL80211_SURVEY_INFO_TIME_BUSY: + entry["busy_time"] = int(readUint(ad.Bytes())) + case unix.NL80211_SURVEY_INFO_TIME_RX: + entry["receive_time"] = int(readUint(ad.Bytes())) + case unix.NL80211_SURVEY_INFO_TIME_TX: + entry["transmit_time"] = int(readUint(ad.Bytes())) + } + } + + if !hasFrequency { + return nil + } + + return entry +} + +func readUint(b []byte) uint64 { + switch len(b) { + case 1: + return uint64(b[0]) + case 2: + return uint64(binary.NativeEndian.Uint16(b)) + case 4: + return uint64(binary.NativeEndian.Uint32(b)) + case 8: + return binary.NativeEndian.Uint64(b) + default: + return 0 + } +} + +func readDriver(phyName string) string { + path := filepath.Join("/sys/class/ieee80211", phyName, "device", "driver") + target, err := os.Readlink(path) + if err != nil { + return "" + } + if base := filepath.Base(target); base != "." && base != "/" { + return base + } + return "" +} + +func readManufacturer(phyName string) string { + driver := readDriver(phyName) + if driver == "" { + return "Unknown" + } + d := strings.ToLower(driver) + switch { + case strings.Contains(d, "mt") || strings.Contains(d, "mediatek"): + return "MediaTek Inc." + case strings.Contains(d, "rtw") || strings.Contains(d, "realtek"): + return "Realtek Semiconductor Corp." + case strings.Contains(d, "ath") || strings.Contains(d, "qca"): + return "Qualcomm Atheros" + case strings.Contains(d, "iwl") || strings.Contains(d, "intel"): + return "Intel Corporation" + case strings.Contains(d, "brcm") || strings.Contains(d, "broadcom"): + return "Broadcom Inc." + default: + return "Unknown" + } +} diff --git a/src/yangerd/internal/stpquery/stpquery.go b/src/yangerd/internal/stpquery/stpquery.go new file mode 100644 index 000000000..de5ffc452 --- /dev/null +++ b/src/yangerd/internal/stpquery/stpquery.go @@ -0,0 +1,636 @@ +// Package stpquery provides a native Go client for querying mstpd's +// operational data over its abstract Unix datagram socket. It decodes +// the binary wire protocol directly — no subprocess, no CGo. +package stpquery + +import ( + "encoding/binary" + "encoding/json" + "fmt" + "os" + "strconv" + "sync/atomic" + "syscall" + "time" + "unsafe" +) + +const ( + cmdGetCISTBridgeStatus = 101 + cmdGetCISTPortStatus = 105 + serverSocketName = ".mstp_server" +) + +type ctlMsgHdr struct { + Cmd, Lin, Lout, Llog, Res int32 +} + +const hdrSize = 20 + +type Client struct { + fd int +} + +// sockaddrUN is the full-size struct sockaddr_un used by mstpd. +// mstpd passes sizeof(struct sockaddr_un) to bind/connect, so the +// abstract name is zero-padded to fill the entire sun_path[108]. +// Go's net package uses minimal length, which produces a different +// abstract socket name. We must match mstpd's behavior exactly. +type sockaddrUN struct { + Family uint16 + Path [108]byte +} + +func setSockAddr(sa *sockaddrUN, name string) { + sa.Family = syscall.AF_UNIX + copy(sa.Path[1:], name) +} + +var connSeq atomic.Uint64 + +func New() (*Client, error) { + fd, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) + if err != nil { + return nil, fmt.Errorf("socket: %w", err) + } + + seq := connSeq.Add(1) + var local sockaddrUN + setSockAddr(&local, fmt.Sprintf("MSTPCTL_%d_%d", os.Getpid(), seq)) + _, _, errno := syscall.Syscall(syscall.SYS_BIND, uintptr(fd), + uintptr(unsafe.Pointer(&local)), unsafe.Sizeof(local)) + if errno != 0 { + syscall.Close(fd) + return nil, fmt.Errorf("bind: %w", errno) + } + + var remote sockaddrUN + setSockAddr(&remote, serverSocketName) + _, _, errno = syscall.Syscall(syscall.SYS_CONNECT, uintptr(fd), + uintptr(unsafe.Pointer(&remote)), unsafe.Sizeof(remote)) + if errno != 0 { + syscall.Close(fd) + return nil, fmt.Errorf("connect to mstpd: %w", errno) + } + + return &Client{fd: fd}, nil +} + +func (c *Client) Close() error { + if c.fd >= 0 { + err := syscall.Close(c.fd) + c.fd = -1 + return err + } + return nil +} + +func (c *Client) roundTrip(cmd int32, in []byte, outSize int) ([]byte, error) { + hdr := ctlMsgHdr{ + Cmd: cmd, + Lin: int32(len(in)), + Lout: int32(outSize), + } + + buf := make([]byte, hdrSize+len(in)) + binary.NativeEndian.PutUint32(buf[0:4], uint32(hdr.Cmd)) + binary.NativeEndian.PutUint32(buf[4:8], uint32(hdr.Lin)) + binary.NativeEndian.PutUint32(buf[8:12], uint32(hdr.Lout)) + binary.NativeEndian.PutUint32(buf[12:16], uint32(hdr.Llog)) + binary.NativeEndian.PutUint32(buf[16:20], uint32(hdr.Res)) + copy(buf[hdrSize:], in) + + tv := syscall.Timeval{Sec: 5} + syscall.SetsockoptTimeval(c.fd, syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, &tv) + syscall.SetsockoptTimeval(c.fd, syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, &tv) + + if err := syscall.Sendmsg(c.fd, buf, nil, nil, 0); err != nil { + return nil, fmt.Errorf("write to mstpd: %w", err) + } + + resp := make([]byte, hdrSize+outSize+4096) + n, _, _, _, err := syscall.Recvmsg(c.fd, resp, nil, 0) + if err != nil { + return nil, fmt.Errorf("read from mstpd: %w", err) + } + if n < hdrSize { + return nil, fmt.Errorf("mstpd response too short: %d bytes", n) + } + + resCode := int32(binary.NativeEndian.Uint32(resp[16:20])) + if resCode != 0 { + return nil, fmt.Errorf("mstpd error: res=%d", resCode) + } + + lout := int(int32(binary.NativeEndian.Uint32(resp[8:12]))) + if n < hdrSize+lout { + return nil, fmt.Errorf("mstpd response truncated: got %d, need %d", n, hdrSize+lout) + } + + return resp[hdrSize : hdrSize+lout], nil +} + +// CISTBridgeStatus holds decoded bridge-level STP data from mstpd. +type CISTBridgeStatus struct { + BridgeID BridgeID + TimeSinceTopologyChange uint32 + TopologyChangeCount uint32 + TopologyChange bool + TopologyChangePort string // max 16 chars + LastTopologyChangePort string // max 16 chars + DesignatedRoot BridgeID + RootPathCost uint32 + RootPortID PortID + RootMaxAge uint8 + RootForwardDelay uint8 + BridgeMaxAge uint8 + BridgeForwardDelay uint8 + TxHoldCount uint32 + ProtocolVersion uint32 + RegionalRoot BridgeID + InternalPathCost uint32 + Enabled bool + AgeingTime uint32 + MaxHops uint8 + BridgeHelloTime uint8 + RootPortName string // from get_cist_bridge_status_OUT tail +} + +// CISTPortStatus holds decoded port-level STP data from mstpd. +type CISTPortStatus struct { + Uptime uint32 + State uint32 + PortID PortID + AdminExternalPortPathCost uint32 + ExternalPortPathCost uint32 + DesignatedRoot BridgeID + DesignatedExternalCost uint32 + DesignatedBridge BridgeID + DesignatedPort PortID + TcAck bool + PortHelloTime uint8 + AdminEdgePort bool + AutoEdgePort bool + OperEdgePort bool + Enabled bool + AdminP2P uint32 + OperP2P bool + RestrictedRole bool + RestrictedTCN bool + Role uint32 + Disputed bool + DesignatedRegionalRoot BridgeID + DesignatedInternalCost uint32 + AdminInternalPortPathCost uint32 + InternalPortPathCost uint32 + BPDUGuardPort bool + BPDUGuardError bool + BPDUFilterPort bool + NetworkPort bool + BAInconsistent bool + NumRxBPDUFiltered uint32 + NumRxBPDU uint32 + NumRxTCN uint32 + NumTxBPDU uint32 + NumTxTCN uint32 + NumTransFwd uint32 + NumTransBlk uint32 + RcvdBpdu bool + RcvdRSTP bool + RcvdSTP bool + RcvdTcAck bool + RcvdTcn bool + SendRSTP bool +} + +// BridgeID is an 8-byte STP bridge identifier. +type BridgeID [8]byte + +// Priority returns the 4-bit priority value (0-15). +func (b BridgeID) Priority() int { + return int(b[0]) >> 4 +} + +// SystemID returns the 12-bit system extension. +func (b BridgeID) SystemID() int { + return (int(b[0])&0x0f)<<8 | int(b[1]) +} + +// Address returns the 6-byte MAC address as a colon-separated string. +func (b BridgeID) Address() string { + return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", b[2], b[3], b[4], b[5], b[6], b[7]) +} + +// PortID is a 2-byte STP port identifier (big-endian on wire). +type PortID [2]byte + +// Priority returns the 4-bit port priority (0-15). +func (p PortID) Priority() int { + return int(p[0]) >> 4 +} + +// Number returns the 12-bit port number. +func (p PortID) Number() int { + return (int(p[0])&0x0f)<<8 | int(p[1]) +} + +// GetBridgeStatus queries mstpd for CIST bridge status. +// brIndex is the kernel interface index of the bridge. +func (c *Client) GetBridgeStatus(brIndex int) (*CISTBridgeStatus, error) { + // Input: 4-byte int32 br_index + in := make([]byte, 4) + binary.NativeEndian.PutUint32(in, uint32(int32(brIndex))) + + // Output: 128 bytes = 112 (CIST_BridgeStatus) + 16 (root_port_name) + out, err := c.roundTrip(cmdGetCISTBridgeStatus, in, 128) + if err != nil { + return nil, err + } + if len(out) < 128 { + return nil, fmt.Errorf("bridge status response too short: %d", len(out)) + } + + s := &CISTBridgeStatus{} + copy(s.BridgeID[:], out[0:8]) + s.TimeSinceTopologyChange = binary.NativeEndian.Uint32(out[8:12]) + s.TopologyChangeCount = binary.NativeEndian.Uint32(out[12:16]) + s.TopologyChange = out[16] != 0 + s.TopologyChangePort = cString(out[17:33]) + s.LastTopologyChangePort = cString(out[33:49]) + copy(s.DesignatedRoot[:], out[56:64]) + s.RootPathCost = binary.NativeEndian.Uint32(out[64:68]) + s.RootPortID = PortID{out[68], out[69]} + s.RootMaxAge = out[70] + s.RootForwardDelay = out[71] + s.BridgeMaxAge = out[72] + s.BridgeForwardDelay = out[73] + s.TxHoldCount = binary.NativeEndian.Uint32(out[76:80]) + s.ProtocolVersion = binary.NativeEndian.Uint32(out[80:84]) + copy(s.RegionalRoot[:], out[88:96]) + s.InternalPathCost = binary.NativeEndian.Uint32(out[96:100]) + s.Enabled = out[100] != 0 + s.AgeingTime = binary.NativeEndian.Uint32(out[104:108]) + s.MaxHops = out[108] + s.BridgeHelloTime = out[109] + // Bytes 112..127 = root_port_name[16] + s.RootPortName = cString(out[112:128]) + + return s, nil +} + +// GetPortStatus queries mstpd for CIST port status. +// brIndex and portIndex are kernel interface indices. +func (c *Client) GetPortStatus(brIndex, portIndex int) (*CISTPortStatus, error) { + // Input: 8 bytes = 2x int32 + in := make([]byte, 8) + binary.NativeEndian.PutUint32(in[0:4], uint32(int32(brIndex))) + binary.NativeEndian.PutUint32(in[4:8], uint32(int32(portIndex))) + + // Output: 136 bytes (CIST_PortStatus) + out, err := c.roundTrip(cmdGetCISTPortStatus, in, 136) + if err != nil { + return nil, err + } + if len(out) < 136 { + return nil, fmt.Errorf("port status response too short: %d", len(out)) + } + + s := &CISTPortStatus{} + s.Uptime = binary.NativeEndian.Uint32(out[0:4]) + s.State = binary.NativeEndian.Uint32(out[4:8]) + s.PortID = PortID{out[8], out[9]} + s.AdminExternalPortPathCost = binary.NativeEndian.Uint32(out[12:16]) + s.ExternalPortPathCost = binary.NativeEndian.Uint32(out[16:20]) + copy(s.DesignatedRoot[:], out[24:32]) + s.DesignatedExternalCost = binary.NativeEndian.Uint32(out[32:36]) + copy(s.DesignatedBridge[:], out[40:48]) + s.DesignatedPort = PortID{out[48], out[49]} + s.TcAck = out[50] != 0 + s.PortHelloTime = out[51] + s.AdminEdgePort = out[52] != 0 + s.AutoEdgePort = out[53] != 0 + s.OperEdgePort = out[54] != 0 + s.Enabled = out[55] != 0 + s.AdminP2P = binary.NativeEndian.Uint32(out[56:60]) + s.OperP2P = out[60] != 0 + s.RestrictedRole = out[61] != 0 + s.RestrictedTCN = out[62] != 0 + s.Role = binary.NativeEndian.Uint32(out[64:68]) + s.Disputed = out[68] != 0 + copy(s.DesignatedRegionalRoot[:], out[72:80]) + s.DesignatedInternalCost = binary.NativeEndian.Uint32(out[80:84]) + s.AdminInternalPortPathCost = binary.NativeEndian.Uint32(out[84:88]) + s.InternalPortPathCost = binary.NativeEndian.Uint32(out[88:92]) + s.BPDUGuardPort = out[92] != 0 + s.BPDUGuardError = out[93] != 0 + s.BPDUFilterPort = out[94] != 0 + s.NetworkPort = out[95] != 0 + s.BAInconsistent = out[96] != 0 + s.NumRxBPDUFiltered = binary.NativeEndian.Uint32(out[100:104]) + s.NumRxBPDU = binary.NativeEndian.Uint32(out[104:108]) + s.NumRxTCN = binary.NativeEndian.Uint32(out[108:112]) + s.NumTxBPDU = binary.NativeEndian.Uint32(out[112:116]) + s.NumTxTCN = binary.NativeEndian.Uint32(out[116:120]) + s.NumTransFwd = binary.NativeEndian.Uint32(out[120:124]) + s.NumTransBlk = binary.NativeEndian.Uint32(out[124:128]) + s.RcvdBpdu = out[128] != 0 + s.RcvdRSTP = out[129] != 0 + s.RcvdSTP = out[130] != 0 + s.RcvdTcAck = out[131] != 0 + s.RcvdTcn = out[132] != 0 + s.SendRSTP = out[133] != 0 + + return s, nil +} + +// cString extracts a NUL-terminated C string from a byte slice. +func cString(b []byte) string { + for i, c := range b { + if c == 0 { + return string(b[:i]) + } + } + return string(b) +} + +// protocolName maps mstpd protocol_version to YANG force-protocol value. +func protocolName(v uint32) string { + switch v { + case 0: + return "stp" + case 2: + return "rstp" + default: + return "rstp" + } +} + +// roleName maps mstpd port role to YANG role value. +func roleName(v uint32) string { + switch v { + case 0: + return "disabled" + case 1: + return "root" + case 2: + return "designated" + case 3: + return "alternate" + case 4: + return "backup" + case 5: + return "master" + default: + return "disabled" + } +} + +// bridgeIDMap returns a YANG bridge-id object. +func bridgeIDMap(b BridgeID) map[string]any { + return map[string]any{ + "priority": b.Priority(), + "system-id": b.SystemID(), + "address": b.Address(), + } +} + +// portIDMap returns a YANG port-id object. +func portIDMap(p PortID) map[string]any { + return map[string]any{ + "priority": p.Priority(), + "port-id": p.Number(), + } +} + +// IfIndexResolver looks up kernel interface indices by name. +type IfIndexResolver interface { + IfIndex(name string) (int, bool) +} + +// Query queries mstpd for STP data on all bridges found in the ip-json +// links data. Returns per-bridge and per-port STP JSON fragments ready +// for merging into the YANG interface tree. +// +// Query connects to mstpd, queries STP data for all bridges in links, +// and returns per-bridge and per-port STP JSON fragments. A fresh +// connection is established per call so that late-starting or restarted +// mstpd instances are handled gracefully. +func Query(links json.RawMessage, resolver IfIndexResolver) (bridgeSTP, portSTP map[string]json.RawMessage) { + brs := findBridges(links) + if len(brs) == 0 { + return nil, nil + } + + client, err := New() + if err != nil { + return nil, nil + } + defer client.Close() + + bridgeSTP = make(map[string]json.RawMessage) + portSTP = make(map[string]json.RawMessage) + + for _, br := range brs { + brIdx, ok := resolver.IfIndex(br.name) + if !ok { + continue + } + + bs, err := client.GetBridgeStatus(brIdx) + if err != nil { + continue + } + + stp := buildBridgeSTP(bs) + if data, err := json.Marshal(stp); err == nil { + bridgeSTP[br.name] = data + } + + for _, port := range br.ports { + portIdx, ok := resolver.IfIndex(port) + if !ok { + continue + } + ps, err := client.GetPortStatus(brIdx, portIdx) + if err != nil { + continue + } + pstp := buildPortSTP(ps) + if data, err := json.Marshal(pstp); err == nil { + portSTP[port] = data + } + } + } + + return bridgeSTP, portSTP +} + +func buildBridgeSTP(bs *CISTBridgeStatus) map[string]any { + cist := map[string]any{ + "bridge-id": bridgeIDMap(bs.BridgeID), + "root-id": bridgeIDMap(bs.DesignatedRoot), + } + + bid := bridgeIDMap(bs.BridgeID) + if prio, ok := bid["priority"]; ok { + cist["priority"] = prio + } + + if bs.RootPortName != "" { + cist["root-port"] = bs.RootPortName + } + + if bs.TopologyChangeCount > 0 { + tc := map[string]any{ + "count": bs.TopologyChangeCount, + "in-progress": bs.TopologyChange, + } + if bs.TopologyChangePort != "" { + tc["port"] = bs.TopologyChangePort + } + if bs.TimeSinceTopologyChange > 0 { + tc["time"] = time.Now().UTC().Add(-time.Duration(bs.TimeSinceTopologyChange) * time.Second).Format(time.RFC3339) + } + cist["topology-change"] = tc + } + + stp := map[string]any{ + "force-protocol": protocolName(bs.ProtocolVersion), + "hello-time": int(bs.BridgeHelloTime), + "forward-delay": int(bs.BridgeForwardDelay), + "max-age": int(bs.BridgeMaxAge), + "transmit-hold-count": int(bs.TxHoldCount), + "max-hops": int(bs.MaxHops), + "cist": cist, + } + + return stp +} + +func buildPortSTP(ps *CISTPortStatus) map[string]any { + cist := map[string]any{ + "port-id": portIDMap(ps.PortID), + "role": roleName(ps.Role), + "disputed": ps.Disputed, + "external-path-cost": int(ps.ExternalPortPathCost), + "designated": map[string]any{ + "bridge-id": bridgeIDMap(ps.DesignatedBridge), + "port-id": portIDMap(ps.DesignatedPort), + }, + } + + stp := map[string]any{ + "edge": ps.OperEdgePort, + "cist": cist, + "statistics": map[string]any{ + "in-bpdus": strconv.FormatUint(uint64(ps.NumRxBPDU), 10), + "in-bpdus-filtered": strconv.FormatUint(uint64(ps.NumRxBPDUFiltered), 10), + "in-tcns": strconv.FormatUint(uint64(ps.NumRxTCN), 10), + "out-bpdus": strconv.FormatUint(uint64(ps.NumTxBPDU), 10), + "out-tcns": strconv.FormatUint(uint64(ps.NumTxTCN), 10), + "to-blocking": strconv.FormatUint(uint64(ps.NumTransBlk), 10), + "to-forwarding": strconv.FormatUint(uint64(ps.NumTransFwd), 10), + }, + } + + return stp +} + +type bridgeInfo struct { + name string + ports []string +} + +func findBridges(links json.RawMessage) []bridgeInfo { + var ifaces []map[string]any + if json.Unmarshal(links, &ifaces) != nil { + return nil + } + + bridges := make(map[string]*bridgeInfo) + for _, iface := range ifaces { + linkinfo, _ := iface["linkinfo"].(map[string]any) + if linkinfo == nil { + continue + } + + name, _ := iface["ifname"].(string) + if name == "" { + continue + } + + if kind, _ := linkinfo["info_kind"].(string); kind == "bridge" { + if bridges[name] == nil { + bridges[name] = &bridgeInfo{name: name} + } + } + + if master, _ := iface["master"].(string); master != "" { + br := bridges[master] + if br == nil { + br = &bridgeInfo{name: master} + bridges[master] = br + } + br.ports = append(br.ports, name) + } + } + + var result []bridgeInfo + for _, br := range bridges { + if len(br.ports) > 0 { + result = append(result, *br) + } + } + return result +} + +// LinksIfIndexResolver resolves interface names to indices from ip-json link data. +type LinksIfIndexResolver struct { + idx map[string]int +} + +// NewLinksIfIndexResolver builds a resolver from ip-json link data. +func NewLinksIfIndexResolver(links json.RawMessage) *LinksIfIndexResolver { + r := &LinksIfIndexResolver{idx: make(map[string]int)} + var ifaces []map[string]any + if json.Unmarshal(links, &ifaces) != nil { + return r + } + for _, iface := range ifaces { + name, _ := iface["ifname"].(string) + if name == "" { + continue + } + switch v := iface["ifindex"].(type) { + case float64: + r.idx[name] = int(v) + case int: + r.idx[name] = v + } + } + return r +} + +// IfIndex returns the kernel interface index for the given name. +func (r *LinksIfIndexResolver) IfIndex(name string) (int, bool) { + idx, ok := r.idx[name] + return idx, ok +} + +// FindBridges is exported for testing. It extracts bridge info from +// ip-json link data. +func FindBridges(links json.RawMessage) []struct { + Name string + Ports []string +} { + brs := findBridges(links) + out := make([]struct { + Name string + Ports []string + }, len(brs)) + for i, br := range brs { + out[i].Name = br.name + out[i].Ports = br.ports + } + return out +} diff --git a/src/yangerd/internal/sysreaders/sysreaders.go b/src/yangerd/internal/sysreaders/sysreaders.go new file mode 100644 index 000000000..b103d9791 --- /dev/null +++ b/src/yangerd/internal/sysreaders/sysreaders.go @@ -0,0 +1,276 @@ +package sysreaders + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" +) + +var gmtOffsetRe = regexp.MustCompile(`Etc/GMT([+-]\d{1,2})$`) + +var zonePrefixes = []string{ + "/usr/share/zoneinfo/posix/", + "/usr/share/zoneinfo/right/", + "/usr/share/zoneinfo/", +} + +var userShellMap = map[string]string{ + "/bin/bash": "infix-system:bash", + "/bin/sh": "infix-system:sh", + "/usr/bin/clish": "infix-system:clish", + "/bin/false": "infix-system:false", + "/sbin/nologin": "infix-system:false", + "/usr/sbin/nologin": "infix-system:false", +} + +const SSHDKeysDir = "/var/run/sshd" + +func ReadHostname(path string) (json.RawMessage, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + name := strings.TrimSpace(string(data)) + return json.Marshal(map[string]string{"hostname": name}) +} + +func ReadTimezone(path string) (json.RawMessage, error) { + target, err := filepath.EvalSymlinks(path) + if err != nil { + return nil, err + } + + var tz string + for _, p := range zonePrefixes { + if strings.HasPrefix(target, p) { + tz = target[len(p):] + break + } + } + if tz == "" { + return nil, fmt.Errorf("unrecognized zoneinfo path: %s", target) + } + + clock := make(map[string]interface{}) + if m := gmtOffsetRe.FindStringSubmatch(tz); m != nil { + offset, _ := strconv.Atoi(m[1]) + clock["timezone-utc-offset"] = -offset + } else if tz == "Etc/UTC" { + clock["timezone-utc-offset"] = 0 + } else { + clock["timezone-name"] = tz + } + + return json.Marshal(map[string]interface{}{"clock": clock}) +} + +func ReadUsers(_ string) (json.RawMessage, error) { + passwdData, err := os.ReadFile("/etc/passwd") + if err != nil { + return nil, err + } + + passwdUsers := make(map[string]string) + scanner := bufio.NewScanner(bytes.NewReader(passwdData)) + for scanner.Scan() { + parts := strings.Split(scanner.Text(), ":") + if len(parts) < 7 { + continue + } + uid, err := strconv.Atoi(parts[2]) + if err != nil || uid < 1000 || uid >= 10000 { + continue + } + shell := strings.TrimSpace(parts[6]) + mapped, ok := userShellMap[shell] + if !ok { + mapped = "infix-system:false" + } + passwdUsers[parts[0]] = mapped + } + + shadowHashes := make(map[string]string) + shadowData, err := os.ReadFile("/etc/shadow") + if err == nil { + scanner = bufio.NewScanner(bytes.NewReader(shadowData)) + for scanner.Scan() { + parts := strings.SplitN(scanner.Text(), ":", 3) + if len(parts) < 2 { + continue + } + hash := parts[1] + if hash == "" || strings.HasPrefix(hash, "*") || strings.HasPrefix(hash, "!") { + continue + } + shadowHashes[parts[0]] = hash + } + } + + users := make([]interface{}, 0) + for username, shell := range passwdUsers { + user := map[string]interface{}{ + "name": username, + "infix-system:shell": shell, + } + if hash, ok := shadowHashes[username]; ok { + user["password"] = hash + } + + keysData, err := os.ReadFile(filepath.Join(SSHDKeysDir, username+".keys")) + if err == nil { + var authKeys []interface{} + for _, line := range strings.Split(string(keysData), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + parts := strings.SplitN(line, " ", 3) + if len(parts) < 2 { + continue + } + keyName := fmt.Sprintf("%s-key-%d", username, len(authKeys)) + if len(parts) > 2 { + keyName = parts[2] + } + authKeys = append(authKeys, map[string]interface{}{ + "name": keyName, + "algorithm": parts[0], + "key-data": parts[1], + }) + } + if len(authKeys) > 0 { + user["authorized-key"] = authKeys + } + } + users = append(users, user) + } + + return json.Marshal(map[string]interface{}{ + "authentication": map[string]interface{}{ + "user": users, + }, + }) +} + +func ReadDNSResolver(_ string) (json.RawMessage, error) { + servers := make([]interface{}, 0) + var search []string + options := make(map[string]interface{}) + seen := make(map[string]bool) + + for _, path := range []string{"/etc/resolv.conf.head", "/var/lib/misc/resolv.conf"} { + data, err := os.ReadFile(path) + if err != nil { + continue + } + ParseResolvConf(string(data), &servers, &search, options, seen) + } + + dns := make(map[string]interface{}) + dns["server"] = servers + if len(search) > 0 { + dns["search"] = search + } + if len(options) > 0 { + dns["options"] = options + } + + return json.Marshal(map[string]interface{}{"infix-system:dns-resolver": dns}) +} + +func ParseResolvConf(data string, servers *[]interface{}, search *[]string, options map[string]interface{}, seen map[string]bool) { + for _, line := range strings.Split(data, "\n") { + line = strings.TrimSpace(line) + switch { + case strings.HasPrefix(line, "nameserver"): + ip := strings.TrimSpace(strings.TrimPrefix(line, "nameserver")) + if ip != "" && ip != "127.0.0.1" && ip != "::1" && !seen[ip] { + seen[ip] = true + *servers = append(*servers, map[string]interface{}{ + "address": ip, + }) + } + case strings.HasPrefix(line, "search"): + *search = append(*search, strings.Fields(line)[1:]...) + case strings.HasPrefix(line, "options"): + for _, opt := range strings.Fields(line)[1:] { + if strings.HasPrefix(opt, "timeout:") { + if v, err := strconv.Atoi(strings.TrimPrefix(opt, "timeout:")); err == nil { + options["timeout"] = v + } + } else if strings.HasPrefix(opt, "attempts:") { + if v, err := strconv.Atoi(strings.TrimPrefix(opt, "attempts:")); err == nil { + options["attempts"] = v + } + } + } + } + } +} + +// ForwardingAggregator tracks all /proc/sys/net/ipv{4,6}/conf/*/forwarding +// files and rebuilds the complete interfaces list on every change. +type ForwardingAggregator struct { + mu sync.Mutex +} + +func NewForwardingAggregator() *ForwardingAggregator { + return &ForwardingAggregator{} +} + +func (fa *ForwardingAggregator) HandleForwardingChange(_ string) (json.RawMessage, error) { + fa.mu.Lock() + defer fa.mu.Unlock() + + enabled := make(map[string]bool) + + for _, family := range []string{"ipv4", "ipv6"} { + sysctl := "forwarding" + if family == "ipv6" { + sysctl = "force_forwarding" + } + pattern := fmt.Sprintf("/proc/sys/net/%s/conf/*/%s", family, sysctl) + matches, err := filepath.Glob(pattern) + if err != nil { + continue + } + for _, path := range matches { + b, err := os.ReadFile(path) + if err != nil { + continue + } + if strings.TrimSpace(string(b)) != "1" { + continue + } + parts := strings.Split(filepath.Clean(path), string(os.PathSeparator)) + if len(parts) >= 7 { + ifname := parts[len(parts)-2] + if ifname != "all" && ifname != "default" && ifname != "lo" { + enabled[ifname] = true + } + } + } + } + + ifnames := make([]string, 0) + for name := range enabled { + ifnames = append(ifnames, name) + } + + data, err := json.Marshal(map[string]interface{}{ + "interfaces": map[string]interface{}{ + "interface": ifnames, + }, + }) + if err != nil { + return nil, err + } + return json.RawMessage(data), nil +} diff --git a/src/yangerd/internal/testutil/mock.go b/src/yangerd/internal/testutil/mock.go new file mode 100644 index 000000000..b5217b78a --- /dev/null +++ b/src/yangerd/internal/testutil/mock.go @@ -0,0 +1,49 @@ +package testutil + +import ( + "context" + "fmt" +) + +// MockRunner records command invocations and returns pre-configured output. +type MockRunner struct { + Results map[string][]byte + Errors map[string]error +} + +// Run returns the pre-configured result for the command name. +func (m *MockRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) { + key := name + for _, a := range args { + key += " " + a + } + if err, ok := m.Errors[key]; ok { + return nil, err + } + if data, ok := m.Results[key]; ok { + return data, nil + } + return nil, fmt.Errorf("mock: no result for %q", key) +} + +// MockFileReader returns pre-configured file contents. +type MockFileReader struct { + Files map[string][]byte + Globs map[string][]string +} + +// ReadFile returns pre-configured data for the path. +func (m *MockFileReader) ReadFile(path string) ([]byte, error) { + if data, ok := m.Files[path]; ok { + return data, nil + } + return nil, fmt.Errorf("mock: file not found: %s", path) +} + +// Glob returns pre-configured matches for the pattern. +func (m *MockFileReader) Glob(pattern string) ([]string, error) { + if matches, ok := m.Globs[pattern]; ok { + return matches, nil + } + return nil, nil +} diff --git a/src/yangerd/internal/tree/tree.go b/src/yangerd/internal/tree/tree.go new file mode 100644 index 000000000..5873af875 --- /dev/null +++ b/src/yangerd/internal/tree/tree.go @@ -0,0 +1,258 @@ +// Package tree provides a concurrent in-memory store for per-module +// YANG operational data, keyed by module-qualified names like +// "ietf-system:system-state". +// +// Each module has its own read-write mutex, so writers for different +// modules never block each other. All methods are safe for concurrent +// use. +package tree + +import ( + "encoding/json" + "sync" + "time" +) + +// OnDemandFunc returns a JSON blob computed at call time. +// Registered providers are invoked on every Get/GetMulti to supply +// fields that must always be fresh (e.g. uptime, current-datetime). +type OnDemandFunc func() json.RawMessage + +// modelEntry holds a single YANG module's pre-serialized JSON blob +// and its own read-write mutex. +type modelEntry struct { + mu sync.RWMutex + data json.RawMessage + updated time.Time +} + +// Tree holds the operational YANG data in per-module JSON blobs. +type Tree struct { + mu sync.RWMutex // protects the models map itself + models map[string]*modelEntry + providers map[string]OnDemandFunc // on-demand overlay providers +} + +// New creates an empty Tree. +func New() *Tree { + return &Tree{ + models: make(map[string]*modelEntry), + providers: make(map[string]OnDemandFunc), + } +} + +// RegisterProvider adds an on-demand overlay for the given key. +// When Get or GetMulti reads this key the provider is called and its +// result is shallow-merged on top of the cached data. The cached +// entry is never mutated — a merged copy is returned. +func (t *Tree) RegisterProvider(key string, fn OnDemandFunc) { + t.mu.Lock() + t.providers[key] = fn + t.mu.Unlock() +} + +// Set replaces the entire subtree at the given YANG module key. +// Only the target module's write lock is held; other modules remain +// readable and writable. +func (t *Tree) Set(key string, v json.RawMessage) { + t.mu.RLock() + entry, ok := t.models[key] + t.mu.RUnlock() + if !ok { + t.mu.Lock() + entry, ok = t.models[key] + if !ok { + entry = &modelEntry{} + t.models[key] = entry + } + t.mu.Unlock() + } + entry.mu.Lock() + entry.data = v + entry.updated = time.Now() + entry.mu.Unlock() +} + +// Get returns the raw JSON for the given module key. +// If a provider is registered for key its output is shallow-merged on +// top of the cached data without mutating the cache. +func (t *Tree) Get(key string) json.RawMessage { + t.mu.RLock() + entry, ok := t.models[key] + provider := t.providers[key] + t.mu.RUnlock() + if !ok { + return nil + } + entry.mu.RLock() + data := entry.data + entry.mu.RUnlock() + + if provider == nil { + return data + } + return shallowMerge(data, provider()) +} + +// GetMulti returns the raw JSON for multiple module keys. +// Each module's read lock is acquired and released individually — +// the result is eventually consistent, not a snapshot. +// Providers are applied per-key, same as Get. +func (t *Tree) GetMulti(keys []string) []json.RawMessage { + result := make([]json.RawMessage, 0, len(keys)) + t.mu.RLock() + defer t.mu.RUnlock() + for _, key := range keys { + entry, ok := t.models[key] + if !ok { + continue + } + entry.mu.RLock() + data := entry.data + entry.mu.RUnlock() + + if provider, has := t.providers[key]; has { + data = shallowMerge(data, provider()) + } + result = append(result, data) + } + return result +} + +// Keys returns all registered module keys. +func (t *Tree) Keys() []string { + t.mu.RLock() + defer t.mu.RUnlock() + keys := make([]string, 0, len(t.models)) + for k := range t.models { + keys = append(keys, k) + } + return keys +} + +// ModelInfo holds metadata for a single model key. +type ModelInfo struct { + LastUpdated time.Time + SizeBytes int +} + +// Merge performs a shallow first-level JSON merge of partial into +// the existing blob at key. If the key does not exist yet, partial +// becomes the entire value. Each top-level field in partial +// overwrites the corresponding field in the existing object; fields +// not mentioned in partial are preserved. +// +// Both the existing data and partial must be JSON objects (maps). +// If either is not a valid JSON object, Merge falls back to Set. +func (t *Tree) Merge(key string, partial json.RawMessage) { + t.mu.RLock() + entry, ok := t.models[key] + t.mu.RUnlock() + + if !ok { + // No existing entry — just set. + t.Set(key, partial) + return + } + + entry.mu.Lock() + defer entry.mu.Unlock() + + // Unmarshal existing data. + var base map[string]json.RawMessage + if len(entry.data) == 0 || json.Unmarshal(entry.data, &base) != nil { + base = make(map[string]json.RawMessage) + } + + // Unmarshal partial. + var overlay map[string]json.RawMessage + if json.Unmarshal(partial, &overlay) != nil { + // partial is not a JSON object — fall back to full replace. + entry.data = partial + entry.updated = time.Now() + return + } + + for k, v := range overlay { + base[k] = v + } + + merged, err := json.Marshal(base) + if err != nil { + // Should never happen with valid JSON inputs. + entry.data = partial + entry.updated = time.Now() + return + } + entry.data = merged + entry.updated = time.Now() +} + +// Delete removes a key from the tree entirely. +func (t *Tree) Delete(key string) { + t.mu.Lock() + delete(t.models, key) + t.mu.Unlock() +} + +// shallowMerge overlays the top-level fields of overlay onto base and +// returns a new JSON blob. Neither input is modified. If either +// is not a valid JSON object the overlay wins outright. +func shallowMerge(base, overlay json.RawMessage) json.RawMessage { + if len(overlay) == 0 { + return base + } + if len(base) == 0 { + return overlay + } + + var bm map[string]json.RawMessage + if json.Unmarshal(base, &bm) != nil { + return overlay + } + var om map[string]json.RawMessage + if json.Unmarshal(overlay, &om) != nil { + return overlay + } + + for k, v := range om { + bm[k] = v + } + + merged, err := json.Marshal(bm) + if err != nil { + return overlay + } + return merged +} + +// GetCached returns the raw cached JSON for the given module key +// WITHOUT invoking any registered provider. This is safe to call +// from inside a provider closure (no recursion risk). +func (t *Tree) GetCached(key string) json.RawMessage { + t.mu.RLock() + entry, ok := t.models[key] + t.mu.RUnlock() + if !ok { + return nil + } + entry.mu.RLock() + defer entry.mu.RUnlock() + return entry.data +} + +// Info returns metadata for the given module key. +func (t *Tree) Info(key string) (ModelInfo, bool) { + t.mu.RLock() + entry, ok := t.models[key] + t.mu.RUnlock() + if !ok { + return ModelInfo{}, false + } + entry.mu.RLock() + defer entry.mu.RUnlock() + return ModelInfo{ + LastUpdated: entry.updated, + SizeBytes: len(entry.data), + }, true +} diff --git a/src/yangerd/internal/tree/tree_test.go b/src/yangerd/internal/tree/tree_test.go new file mode 100644 index 000000000..890563e25 --- /dev/null +++ b/src/yangerd/internal/tree/tree_test.go @@ -0,0 +1,325 @@ +package tree + +import ( + "encoding/json" + "sync" + "testing" +) + +func TestSetGet(t *testing.T) { + tr := New() + tr.Set("ietf-system:system", json.RawMessage(`{"hostname":"r1"}`)) + + got := tr.Get("ietf-system:system") + if string(got) != `{"hostname":"r1"}` { + t.Fatalf("unexpected: %s", got) + } +} + +func TestGetMissing(t *testing.T) { + tr := New() + if got := tr.Get("nonexistent"); got != nil { + t.Fatalf("expected nil, got: %s", got) + } +} + +func TestSetOverwrite(t *testing.T) { + tr := New() + tr.Set("key", json.RawMessage(`"v1"`)) + tr.Set("key", json.RawMessage(`"v2"`)) + + if got := tr.Get("key"); string(got) != `"v2"` { + t.Fatalf("expected v2, got: %s", got) + } +} + +func TestGetMulti(t *testing.T) { + tr := New() + tr.Set("a", json.RawMessage(`1`)) + tr.Set("b", json.RawMessage(`2`)) + tr.Set("c", json.RawMessage(`3`)) + + results := tr.GetMulti([]string{"a", "c"}) + if len(results) != 2 { + t.Fatalf("expected 2 results, got %d", len(results)) + } + if string(results[0]) != "1" || string(results[1]) != "3" { + t.Fatalf("unexpected results: %s, %s", results[0], results[1]) + } +} + +func TestGetMultiMissing(t *testing.T) { + tr := New() + tr.Set("a", json.RawMessage(`1`)) + + results := tr.GetMulti([]string{"a", "missing"}) + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } +} + +func TestKeys(t *testing.T) { + tr := New() + tr.Set("x", json.RawMessage(`1`)) + tr.Set("y", json.RawMessage(`2`)) + + keys := tr.Keys() + if len(keys) != 2 { + t.Fatalf("expected 2 keys, got %d", len(keys)) + } +} + +func TestInfo(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(`{"data":true}`)) + + info, ok := tr.Info("k") + if !ok { + t.Fatal("expected ok") + } + if info.SizeBytes != len(`{"data":true}`) { + t.Fatalf("expected size %d, got %d", len(`{"data":true}`), info.SizeBytes) + } + if info.LastUpdated.IsZero() { + t.Fatal("expected non-zero LastUpdated") + } +} + +func TestInfoMissing(t *testing.T) { + tr := New() + _, ok := tr.Info("missing") + if ok { + t.Fatal("expected !ok for missing key") + } +} + +func TestConcurrentSetGet(t *testing.T) { + tr := New() + var wg sync.WaitGroup + const N = 100 + + for i := 0; i < N; i++ { + wg.Add(2) + go func(i int) { + defer wg.Done() + tr.Set("shared", json.RawMessage(`{"i":`+string(rune('0'+i%10))+`}`)) + }(i) + go func() { + defer wg.Done() + tr.Get("shared") + }() + } + wg.Wait() + + if got := tr.Get("shared"); got == nil { + t.Fatal("expected non-nil after concurrent writes") + } +} + +func TestMerge(t *testing.T) { + tests := []struct { + name string + existing string + partial string + want map[string]json.RawMessage + }{ + { + name: "merge into existing preserves old fields", + existing: `{"a":"1","b":"2"}`, + partial: `{"c":"3"}`, + want: map[string]json.RawMessage{ + "a": json.RawMessage(`"1"`), + "b": json.RawMessage(`"2"`), + "c": json.RawMessage(`"3"`), + }, + }, + { + name: "merge overwrites overlapping field", + existing: `{"a":"old","b":"keep"}`, + partial: `{"a":"new"}`, + want: map[string]json.RawMessage{ + "a": json.RawMessage(`"new"`), + "b": json.RawMessage(`"keep"`), + }, + }, + { + name: "merge with complex nested values", + existing: `{"protocols":{"ospf":true}}`, + partial: `{"ribs":{"rib":[{"name":"ipv4"}]}}`, + want: map[string]json.RawMessage{ + "protocols": json.RawMessage(`{"ospf":true}`), + "ribs": json.RawMessage(`{"rib":[{"name":"ipv4"}]}`), + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(tc.existing)) + tr.Merge("k", json.RawMessage(tc.partial)) + + var got map[string]json.RawMessage + if err := json.Unmarshal(tr.Get("k"), &got); err != nil { + t.Fatalf("unmarshal result: %v", err) + } + for field, wantVal := range tc.want { + gotVal, ok := got[field] + if !ok { + t.Fatalf("missing field %q", field) + } + if string(gotVal) != string(wantVal) { + t.Fatalf("field %q: got %s, want %s", field, gotVal, wantVal) + } + } + if len(got) != len(tc.want) { + t.Fatalf("got %d fields, want %d", len(got), len(tc.want)) + } + }) + } +} + +func TestMergeIntoEmpty(t *testing.T) { + tr := New() + tr.Merge("new-key", json.RawMessage(`{"x":1}`)) + got := tr.Get("new-key") + if string(got) != `{"x":1}` { + t.Fatalf("expected {\"x\":1}, got %s", got) + } +} + +func TestMergeNonObjectFallback(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(`{"a":"1"}`)) + tr.Merge("k", json.RawMessage(`"plain string"`)) + got := tr.Get("k") + if string(got) != `"plain string"` { + t.Fatalf("expected plain string fallback, got %s", got) + } +} + +func TestDelete(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(`{"data":true}`)) + tr.Delete("k") + if got := tr.Get("k"); got != nil { + t.Fatalf("expected nil after delete, got %s", got) + } +} + +func TestDeleteMissing(t *testing.T) { + tr := New() + tr.Delete("nonexistent") +} + +func TestGetWithProvider(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(`{"cached":"yes","kept":"ok"}`)) + tr.RegisterProvider("k", func() json.RawMessage { + return json.RawMessage(`{"live":"data","cached":"overridden"}`) + }) + + got := tr.Get("k") + var m map[string]string + if err := json.Unmarshal(got, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if m["live"] != "data" { + t.Fatalf("expected live=data, got %q", m["live"]) + } + if m["cached"] != "overridden" { + t.Fatalf("expected provider to override cached field, got %q", m["cached"]) + } + if m["kept"] != "ok" { + t.Fatalf("expected kept=ok preserved, got %q", m["kept"]) + } +} + +func TestGetWithProviderDoesNotMutateCache(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(`{"a":"1"}`)) + tr.RegisterProvider("k", func() json.RawMessage { + return json.RawMessage(`{"b":"2"}`) + }) + + tr.Get("k") + + // Read the raw cached entry — remove the provider to bypass merge. + tr.RegisterProvider("k", nil) + raw := tr.Get("k") + if string(raw) != `{"a":"1"}` { + t.Fatalf("cache was mutated: %s", raw) + } +} + +func TestGetMultiWithProvider(t *testing.T) { + tr := New() + tr.Set("a", json.RawMessage(`{"x":"1"}`)) + tr.Set("b", json.RawMessage(`{"y":"2"}`)) + tr.RegisterProvider("a", func() json.RawMessage { + return json.RawMessage(`{"live":"yes"}`) + }) + + results := tr.GetMulti([]string{"a", "b"}) + if len(results) != 2 { + t.Fatalf("expected 2 results, got %d", len(results)) + } + + var m map[string]string + json.Unmarshal(results[0], &m) + if m["live"] != "yes" || m["x"] != "1" { + t.Fatalf("provider not applied to first result: %s", results[0]) + } + + // b has no provider — should return as-is + if string(results[1]) != `{"y":"2"}` { + t.Fatalf("unexpected second result: %s", results[1]) + } +} + +func TestGetWithProviderEmptyOverlay(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(`{"a":"1"}`)) + tr.RegisterProvider("k", func() json.RawMessage { + return nil + }) + + got := tr.Get("k") + if string(got) != `{"a":"1"}` { + t.Fatalf("nil overlay should return base, got %s", got) + } +} + +func TestGetWithProviderNoBaseData(t *testing.T) { + tr := New() + tr.Set("k", json.RawMessage(nil)) + tr.RegisterProvider("k", func() json.RawMessage { + return json.RawMessage(`{"live":"yes"}`) + }) + + got := tr.Get("k") + if string(got) != `{"live":"yes"}` { + t.Fatalf("expected overlay to win with empty base, got %s", got) + } +} + +func TestConcurrentMerge(t *testing.T) { + tr := New() + tr.Set("shared", json.RawMessage(`{}`)) + + var wg sync.WaitGroup + const N = 50 + for i := 0; i < N; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + tr.Merge("shared", json.RawMessage(`{"f`+string(rune('a'+i%26))+`":true}`)) + }(i) + } + wg.Wait() + + got := tr.Get("shared") + if got == nil { + t.Fatal("expected non-nil after concurrent merges") + } +} diff --git a/src/yangerd/internal/wgquery/wgquery.go b/src/yangerd/internal/wgquery/wgquery.go new file mode 100644 index 000000000..c9a35b24a --- /dev/null +++ b/src/yangerd/internal/wgquery/wgquery.go @@ -0,0 +1,107 @@ +package wgquery + +import ( + "encoding/json" + "strconv" + "time" + + "golang.zx2c4.com/wireguard/wgctrl" +) + +func Query(links json.RawMessage) map[string]json.RawMessage { + wgIfaces := findWireguardIfaces(links) + if len(wgIfaces) == 0 { + return nil + } + + client, err := wgctrl.New() + if err != nil { + return nil + } + defer client.Close() + + result := make(map[string]json.RawMessage) + now := time.Now().UTC() + + for _, ifname := range wgIfaces { + dev, err := client.Device(ifname) + if err != nil { + continue + } + if len(dev.Peers) == 0 { + continue + } + + var peers []map[string]any + for _, p := range dev.Peers { + peer := map[string]any{ + "public-key": p.PublicKey.String(), + "connection-status": connectionStatus(p.LastHandshakeTime, now), + } + + if !p.LastHandshakeTime.IsZero() { + peer["latest-handshake"] = p.LastHandshakeTime.UTC().Format("2006-01-02T15:04:05+00:00") + } + + if p.Endpoint != nil { + peer["endpoint-address"] = p.Endpoint.IP.String() + peer["endpoint-port"] = p.Endpoint.Port + } + + if p.TransmitBytes > 0 || p.ReceiveBytes > 0 { + peer["transfer"] = map[string]any{ + "tx-bytes": strconv.FormatInt(p.TransmitBytes, 10), + "rx-bytes": strconv.FormatInt(p.ReceiveBytes, 10), + } + } + + peers = append(peers, peer) + } + + if len(peers) == 0 { + continue + } + + out, err := json.Marshal(map[string]any{"peer-status": map[string]any{"peer": peers}}) + if err != nil { + continue + } + result[ifname] = out + } + + if len(result) == 0 { + return nil + } + return result +} + +func findWireguardIfaces(links json.RawMessage) []string { + var ifaces []map[string]any + if json.Unmarshal(links, &ifaces) != nil { + return nil + } + + var result []string + for _, iface := range ifaces { + linkinfo, _ := iface["linkinfo"].(map[string]any) + if linkinfo == nil { + continue + } + if kind, _ := linkinfo["info_kind"].(string); kind == "wireguard" { + if name, _ := iface["ifname"].(string); name != "" { + result = append(result, name) + } + } + } + return result +} + +func connectionStatus(handshake time.Time, now time.Time) string { + if handshake.IsZero() { + return "down" + } + if now.Sub(handshake) < 180*time.Second { + return "up" + } + return "down" +} diff --git a/src/yangerd/internal/wpactrl/allstations_test.go b/src/yangerd/internal/wpactrl/allstations_test.go new file mode 100644 index 000000000..459d762b1 --- /dev/null +++ b/src/yangerd/internal/wpactrl/allstations_test.go @@ -0,0 +1,142 @@ +package wpactrl + +import ( + "net" + "strings" + "testing" + "time" +) + +// fakeHostapd serves the hostapd control protocol for station +// enumeration: STA-FIRST returns the first station block, STA-NEXT +// the one after it, and an empty datagram past the last station. +func fakeHostapd(t *testing.T, stations []string) string { + t.Helper() + + dir := t.TempDir() + serverPath := dir + "/wlan0" + + serverAddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + server, err := net.ListenUnixgram("unixgram", serverAddr) + if err != nil { + t.Fatalf("listen: %v", err) + } + t.Cleanup(func() { server.Close() }) + + addrOf := func(block string) string { + return strings.SplitN(block, "\n", 2)[0] + } + + go func() { + buf := make([]byte, 4096) + for { + n, raddr, err := server.ReadFromUnix(buf) + if err != nil { + return + } + cmd := string(buf[:n]) + + var resp string + switch { + case cmd == "STA-FIRST": + if len(stations) > 0 { + resp = stations[0] + } + case strings.HasPrefix(cmd, "STA-NEXT "): + prev := strings.TrimPrefix(cmd, "STA-NEXT ") + for i, st := range stations { + if addrOf(st) == prev && i+1 < len(stations) { + resp = stations[i+1] + break + } + } + default: + resp = "UNKNOWN COMMAND\n" + } + server.WriteToUnix([]byte(resp), raddr) + } + }() + + return serverPath +} + +func TestAllStations(t *testing.T) { + sta1 := "02:00:00:00:00:01\nflags=[AUTH][ASSOC][AUTHORIZED]\n" + + "signal=-57\nconnected_time=120\nrx_bytes=1000\ntx_bytes=2000\n" + sta2 := "02:00:00:00:00:02\nflags=[AUTH][ASSOC][AUTHORIZED]\n" + + "signal=-78\nconnected_time=60\nrx_bytes=300\ntx_bytes=400\n" + + path := fakeHostapd(t, []string{sta1, sta2}) + + conn, err := DialTimeout(path, 2*time.Second) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + + stas, err := conn.AllStations() + if err != nil { + t.Fatalf("AllStations: %v", err) + } + if len(stas) != 2 { + t.Fatalf("got %d stations, want 2", len(stas)) + } + if stas[0]["addr"] != "02:00:00:00:00:01" || stas[1]["addr"] != "02:00:00:00:00:02" { + t.Errorf("addrs = %q, %q", stas[0]["addr"], stas[1]["addr"]) + } + if stas[0]["signal"] != "-57" { + t.Errorf("sta[0] signal = %q", stas[0]["signal"]) + } + if stas[1]["connected_time"] != "60" { + t.Errorf("sta[1] connected_time = %q", stas[1]["connected_time"]) + } +} + +func TestAllStationsNone(t *testing.T) { + path := fakeHostapd(t, nil) + + conn, err := DialTimeout(path, 2*time.Second) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + + stas, err := conn.AllStations() + if err != nil { + t.Fatalf("AllStations: %v", err) + } + if len(stas) != 0 { + t.Fatalf("got %d stations, want 0", len(stas)) + } +} + +func TestAllStationsUnsupported(t *testing.T) { + dir := t.TempDir() + serverPath := dir + "/wlan0" + + serverAddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + server, err := net.ListenUnixgram("unixgram", serverAddr) + if err != nil { + t.Fatalf("listen: %v", err) + } + defer server.Close() + + go func() { + buf := make([]byte, 4096) + n, raddr, err := server.ReadFromUnix(buf) + if err != nil || n == 0 { + return + } + server.WriteToUnix([]byte("UNKNOWN COMMAND\n"), raddr) + }() + + conn, err := DialTimeout(serverPath, 2*time.Second) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + + if _, err := conn.AllStations(); err == nil { + t.Fatal("expected error for UNKNOWN COMMAND") + } +} diff --git a/src/yangerd/internal/wpactrl/attach.go b/src/yangerd/internal/wpactrl/attach.go new file mode 100644 index 000000000..532235a31 --- /dev/null +++ b/src/yangerd/internal/wpactrl/attach.go @@ -0,0 +1,159 @@ +package wpactrl + +import ( + "context" + "fmt" + "net" + "os" + "strings" + "time" +) + +const attachBufSize = 4096 + +// Event is an unsolicited event from wpa_supplicant or hostapd, +// received after sending the ATTACH command. +type Event struct { + Priority int + Name string + Data string + Raw string +} + +// EventHandler is called for each unsolicited event. +type EventHandler func(Event) + +// AttachConn is a persistent event listener on a wpa_supplicant or +// hostapd control socket. After sending ATTACH, the daemon pushes +// unsolicited events like CTRL-EVENT-SIGNAL-CHANGE, AP-STA-CONNECTED, +// etc. The connection reads these in a loop and dispatches them to a +// handler. +type AttachConn struct { + conn *net.UnixConn + local string + handler EventHandler +} + +// Attach connects to the control socket at serverPath and sends the +// ATTACH command. On success, the daemon will send unsolicited events +// to this connection. Call Run to start reading them. +func Attach(serverPath string) (*AttachConn, error) { + seq := clientSeq.Add(1) + localPath := fmt.Sprintf("/tmp/wpactrl_attach_%d_%d", os.Getpid(), seq) + os.Remove(localPath) + + laddr := &net.UnixAddr{Name: localPath, Net: "unixgram"} + raddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + + conn, err := net.DialUnix("unixgram", laddr, raddr) + if err != nil { + os.Remove(localPath) + return nil, fmt.Errorf("dial %s: %w", serverPath, err) + } + + conn.SetDeadline(time.Now().Add(DefaultTimeout)) + if _, err := conn.Write([]byte("ATTACH")); err != nil { + conn.Close() + os.Remove(localPath) + return nil, fmt.Errorf("send ATTACH: %w", err) + } + + buf := make([]byte, 64) + n, err := conn.Read(buf) + if err != nil { + conn.Close() + os.Remove(localPath) + return nil, fmt.Errorf("read ATTACH response: %w", err) + } + resp := strings.TrimSpace(string(buf[:n])) + if resp != "OK" { + conn.Close() + os.Remove(localPath) + return nil, fmt.Errorf("ATTACH rejected: %q", resp) + } + + conn.SetDeadline(time.Time{}) + return &AttachConn{conn: conn, local: localPath}, nil +} + +// SetHandler sets the callback for received events. +func (a *AttachConn) SetHandler(fn EventHandler) { + a.handler = fn +} + +// Run reads events until ctx is cancelled or the socket errors (daemon +// died). Returns nil on context cancellation, error on socket failure. +func (a *AttachConn) Run(ctx context.Context) error { + done := make(chan struct{}) + go func() { + select { + case <-ctx.Done(): + a.conn.SetReadDeadline(time.Now()) + case <-done: + } + }() + defer close(done) + + buf := make([]byte, attachBufSize) + for { + n, err := a.conn.Read(buf) + if err != nil { + if ctx.Err() != nil { + return nil + } + return fmt.Errorf("read: %w", err) + } + if a.handler == nil { + continue + } + ev, ok := ParseEvent(string(buf[:n])) + if ok { + a.handler(ev) + } + } +} + +// Close sends DETACH and closes the connection. +func (a *AttachConn) Close() error { + a.conn.SetDeadline(time.Now().Add(DefaultTimeout)) + a.conn.Write([]byte("DETACH")) + err := a.conn.Close() + os.Remove(a.local) + return err +} + +// ParseEvent parses a single unsolicited event line. Format: +// EVENT-NAME optional-data +// where N is a priority digit (0-4). Some events like +// AP-STA-CONNECTED have no priority prefix. +func ParseEvent(line string) (Event, bool) { + line = strings.TrimSpace(line) + if line == "" { + return Event{}, false + } + + ev := Event{Raw: line} + + if len(line) >= 3 && line[0] == '<' { + end := strings.IndexByte(line, '>') + if end > 1 { + for _, c := range line[1:end] { + if c < '0' || c > '9' { + goto noPriority + } + } + fmt.Sscanf(line[1:end], "%d", &ev.Priority) + line = line[end+1:] + } + } +noPriority: + + if idx := strings.IndexByte(line, ' '); idx > 0 { + ev.Name = line[:idx] + ev.Data = line[idx+1:] + } else { + ev.Name = line + } + + return ev, ev.Name != "" +} diff --git a/src/yangerd/internal/wpactrl/attach_test.go b/src/yangerd/internal/wpactrl/attach_test.go new file mode 100644 index 000000000..9bed7412d --- /dev/null +++ b/src/yangerd/internal/wpactrl/attach_test.go @@ -0,0 +1,156 @@ +package wpactrl + +import ( + "context" + "net" + "os" + "testing" + "time" +) + +func TestParseEvent(t *testing.T) { + tests := []struct { + line string + wantOK bool + wantPri int + wantName string + wantData string + }{ + {"<3>CTRL-EVENT-SIGNAL-CHANGE above=0 signal=-88 noise=-92 txrate=6000", true, 3, "CTRL-EVENT-SIGNAL-CHANGE", "above=0 signal=-88 noise=-92 txrate=6000"}, + {"<3>CTRL-EVENT-CONNECTED - Connection to 02:00:00:00:01:00 completed", true, 3, "CTRL-EVENT-CONNECTED", "- Connection to 02:00:00:00:01:00 completed"}, + {"<3>CTRL-EVENT-SCAN-RESULTS ", true, 3, "CTRL-EVENT-SCAN-RESULTS", ""}, + {"<3>CTRL-EVENT-DISCONNECTED bssid=02:00:00:00:01:00 reason=3", true, 3, "CTRL-EVENT-DISCONNECTED", "bssid=02:00:00:00:01:00 reason=3"}, + {"<2>AP-STA-CONNECTED 9e:61:6b:cf:d8:15", true, 2, "AP-STA-CONNECTED", "9e:61:6b:cf:d8:15"}, + {"<2>AP-STA-DISCONNECTED 9e:61:6b:cf:d8:15", true, 2, "AP-STA-DISCONNECTED", "9e:61:6b:cf:d8:15"}, + {"AP-STA-CONNECTED 9e:61:6b:cf:d8:15", true, 0, "AP-STA-CONNECTED", "9e:61:6b:cf:d8:15"}, + {"<3>CTRL-EVENT-TERMINATING", true, 3, "CTRL-EVENT-TERMINATING", ""}, + {"", false, 0, "", ""}, + {" ", false, 0, "", ""}, + } + + for _, tt := range tests { + ev, ok := ParseEvent(tt.line) + if ok != tt.wantOK { + t.Errorf("ParseEvent(%q): ok=%v, want %v", tt.line, ok, tt.wantOK) + continue + } + if !ok { + continue + } + if ev.Priority != tt.wantPri { + t.Errorf("ParseEvent(%q): priority=%d, want %d", tt.line, ev.Priority, tt.wantPri) + } + if ev.Name != tt.wantName { + t.Errorf("ParseEvent(%q): name=%q, want %q", tt.line, ev.Name, tt.wantName) + } + if ev.Data != tt.wantData { + t.Errorf("ParseEvent(%q): data=%q, want %q", tt.line, ev.Data, tt.wantData) + } + } +} + +func TestAttachAndReceiveEvents(t *testing.T) { + dir := t.TempDir() + serverPath := dir + "/hostapd_test" + + serverAddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + server, err := net.ListenUnixgram("unixgram", serverAddr) + if err != nil { + t.Fatalf("listen: %v", err) + } + defer server.Close() + + go func() { + buf := make([]byte, 4096) + n, raddr, err := server.ReadFromUnix(buf) + if err != nil { + return + } + if string(buf[:n]) == "ATTACH" { + server.WriteToUnix([]byte("OK\n"), raddr) + } + time.Sleep(50 * time.Millisecond) + server.WriteToUnix([]byte("<2>AP-STA-CONNECTED 9e:61:6b:cf:d8:15"), raddr) + time.Sleep(50 * time.Millisecond) + server.WriteToUnix([]byte("<3>CTRL-EVENT-SIGNAL-CHANGE above=0 signal=-55"), raddr) + + n, _, err = server.ReadFromUnix(buf) + if err != nil { + return + } + if string(buf[:n]) == "DETACH" { + server.WriteToUnix([]byte("OK\n"), raddr) + } + }() + + ac, err := Attach(serverPath) + if err != nil { + t.Fatalf("Attach: %v", err) + } + defer ac.Close() + + var events []Event + ac.SetHandler(func(ev Event) { + events = append(events, ev) + }) + + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + + ac.Run(ctx) + + if len(events) < 2 { + t.Fatalf("got %d events, want >= 2", len(events)) + } + if events[0].Name != "AP-STA-CONNECTED" { + t.Errorf("events[0].Name = %q, want AP-STA-CONNECTED", events[0].Name) + } + if events[0].Data != "9e:61:6b:cf:d8:15" { + t.Errorf("events[0].Data = %q", events[0].Data) + } + if events[1].Name != "CTRL-EVENT-SIGNAL-CHANGE" { + t.Errorf("events[1].Name = %q, want CTRL-EVENT-SIGNAL-CHANGE", events[1].Name) + } + + os.Remove(ac.local) +} + +func TestAttachContextCancel(t *testing.T) { + dir := t.TempDir() + serverPath := dir + "/wpa_test" + + serverAddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + server, err := net.ListenUnixgram("unixgram", serverAddr) + if err != nil { + t.Fatalf("listen: %v", err) + } + defer server.Close() + + go func() { + buf := make([]byte, 4096) + n, raddr, err := server.ReadFromUnix(buf) + if err != nil { + return + } + if string(buf[:n]) == "ATTACH" { + server.WriteToUnix([]byte("OK\n"), raddr) + } + n, _, _ = server.ReadFromUnix(buf) + }() + + ac, err := Attach(serverPath) + if err != nil { + t.Fatalf("Attach: %v", err) + } + defer ac.Close() + + ac.SetHandler(func(ev Event) {}) + + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + + err = ac.Run(ctx) + if err != nil { + t.Errorf("expected nil on context cancel, got %v", err) + } +} diff --git a/src/yangerd/internal/wpactrl/parse.go b/src/yangerd/internal/wpactrl/parse.go new file mode 100644 index 000000000..31bad3cbc --- /dev/null +++ b/src/yangerd/internal/wpactrl/parse.go @@ -0,0 +1,143 @@ +package wpactrl + +import ( + "strconv" + "strings" +) + +// ScanResult is a single entry from SCAN_RESULTS. +type ScanResult struct { + BSSID string + Frequency int + Signal int + Flags string + SSID string +} + +// ParseKV parses a wpa_supplicant/hostapd key=value response. +func ParseKV(resp string) map[string]string { + m := make(map[string]string) + for _, line := range strings.Split(resp, "\n") { + line = strings.TrimSpace(line) + if idx := strings.IndexByte(line, '='); idx > 0 { + m[line[:idx]] = line[idx+1:] + } + } + return m +} + +// ParseScanResults parses wpa_supplicant SCAN_RESULTS output. +// Format: bssid / frequency / signal level / flags / ssid +// First line is a header, subsequent lines are tab-separated. +func ParseScanResults(resp string) []ScanResult { + var results []ScanResult + for _, line := range strings.Split(resp, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "bssid") { + continue + } + fields := strings.SplitN(line, "\t", 5) + if len(fields) < 4 { + continue + } + freq, _ := strconv.Atoi(fields[1]) + sig, _ := strconv.Atoi(fields[2]) + ssid := "" + if len(fields) >= 5 { + ssid = fields[4] + } + results = append(results, ScanResult{ + BSSID: fields[0], + Frequency: freq, + Signal: sig, + Flags: fields[3], + SSID: ssid, + }) + } + return results +} + +// ParseStationResp parses a hostapd STA-FIRST/STA-NEXT response. +// First line is the station MAC, subsequent lines are key=value pairs. +func ParseStationResp(resp string) map[string]string { + lines := strings.Split(resp, "\n") + if len(lines) == 0 { + return nil + } + m := make(map[string]string) + addr := strings.TrimSpace(lines[0]) + if addr != "" { + m["addr"] = addr + } + for _, line := range lines[1:] { + line = strings.TrimSpace(line) + if idx := strings.IndexByte(line, '='); idx > 0 { + m[line[:idx]] = line[idx+1:] + } + } + return m +} + +// ParseAllStations parses hostapd ALL_STA response containing multiple +// stations. Each station block starts with a MAC address line (xx:xx:xx:xx:xx:xx) +// followed by key=value lines. +func ParseAllStations(resp string) []map[string]string { + var stations []map[string]string + var current map[string]string + + for _, line := range strings.Split(resp, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + if isMACAddress(line) { + if current != nil { + stations = append(stations, current) + } + current = map[string]string{"addr": line} + continue + } + if current != nil { + if idx := strings.IndexByte(line, '='); idx > 0 { + current[line[:idx]] = line[idx+1:] + } + } + } + if current != nil { + stations = append(stations, current) + } + return stations +} + +func isMACAddress(s string) bool { + if len(s) != 17 { + return false + } + for i, c := range s { + if i%3 == 2 { + if c != ':' { + return false + } + } else { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + return false + } + } + } + return true +} + +// FrequencyToChannel converts a WiFi frequency in MHz to a channel number. +func FrequencyToChannel(freq int) int { + switch { + case freq == 2484: + return 14 + case freq >= 2412 && freq <= 2472: + return (freq-2412)/5 + 1 + case freq >= 5170 && freq <= 5825: + return (freq - 5000) / 5 + case freq >= 5955 && freq <= 7115: + return (freq - 5950) / 5 + } + return 0 +} diff --git a/src/yangerd/internal/wpactrl/wpactrl.go b/src/yangerd/internal/wpactrl/wpactrl.go new file mode 100644 index 000000000..866b7ded9 --- /dev/null +++ b/src/yangerd/internal/wpactrl/wpactrl.go @@ -0,0 +1,212 @@ +// Package wpactrl provides a native Go client for wpa_supplicant and +// hostapd control sockets. It speaks the same text-based protocol as +// wpa_cli/hostapd_cli — Unix datagram sockets with ASCII +// command/response framing. No subprocess, no CGo. +// +// wpa_supplicant listens at /var/run/wpa_supplicant/ +// hostapd listens at /var/run/hostapd/ +// +// The client binds its own temporary socket, sends a command string, +// and reads back the text response. +package wpactrl + +import ( + "fmt" + "net" + "os" + "path/filepath" + "strings" + "sync/atomic" + "time" +) + +const ( + DefaultTimeout = 5 * time.Second + + maxResponse = 64 * 1024 +) + +// WPADirs lists directories where wpa_supplicant control sockets may live. +var WPADirs = []string{"/run/wpa_supplicant", "/var/run/wpa_supplicant"} + +// HostapdDirs lists directories where hostapd control sockets may live. +var HostapdDirs = []string{"/run/hostapd", "/var/run/hostapd"} + +var clientSeq atomic.Uint64 + +// SocketInfo describes a discovered control socket. +type SocketInfo struct { + Path string + Iface string + Daemon string // "wpa_supplicant" or "hostapd" +} + +// ScanSockets discovers wpa_supplicant and hostapd control sockets by +// listing the well-known directories. Returns a map from interface +// name to SocketInfo. +func ScanSockets() map[string]SocketInfo { + result := make(map[string]SocketInfo) + for _, dir := range HostapdDirs { + scanDir(dir, "hostapd", result) + } + for _, dir := range WPADirs { + scanDir(dir, "wpa_supplicant", result) + } + return result +} + +func scanDir(dir, daemon string, out map[string]SocketInfo) { + entries, err := os.ReadDir(dir) + if err != nil { + return + } + for _, e := range entries { + name := e.Name() + if _, exists := out[name]; exists { + continue + } + path := filepath.Join(dir, name) + fi, err := os.Stat(path) + if err != nil { + continue + } + if fi.Mode()&os.ModeSocket != 0 { + out[name] = SocketInfo{ + Path: path, + Iface: name, + Daemon: daemon, + } + } + } +} + +// Conn is a connection to a wpa_supplicant or hostapd control socket. +type Conn struct { + conn *net.UnixConn + local string // path to our client socket (for cleanup) + timeout time.Duration +} + +// Dial connects to a wpa_supplicant or hostapd control socket at the +// given path (e.g. "/var/run/wpa_supplicant/wlan0"). The caller must +// call Close when done. +func Dial(serverPath string) (*Conn, error) { + return DialTimeout(serverPath, DefaultTimeout) +} + +// DialTimeout connects with a custom timeout. +func DialTimeout(serverPath string, timeout time.Duration) (*Conn, error) { + // Create a unique client socket path in /tmp. + seq := clientSeq.Add(1) + localPath := fmt.Sprintf("/tmp/wpactrl_%d_%d", os.Getpid(), seq) + + // Clean up stale socket file if it exists. + os.Remove(localPath) + + laddr := &net.UnixAddr{Name: localPath, Net: "unixgram"} + raddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + + conn, err := net.DialUnix("unixgram", laddr, raddr) + if err != nil { + os.Remove(localPath) + return nil, fmt.Errorf("dial %s: %w", serverPath, err) + } + + return &Conn{ + conn: conn, + local: localPath, + timeout: timeout, + }, nil +} + +// Close closes the connection and removes the client socket file. +func (c *Conn) Close() error { + err := c.conn.Close() + os.Remove(c.local) + return err +} + +// Command sends a command string and returns the response. +func (c *Conn) Command(cmd string) (string, error) { + c.conn.SetDeadline(time.Now().Add(c.timeout)) + + _, err := c.conn.Write([]byte(cmd)) + if err != nil { + return "", fmt.Errorf("write %q: %w", cmd, err) + } + + buf := make([]byte, maxResponse) + n, err := c.conn.Read(buf) + if err != nil { + return "", fmt.Errorf("read response to %q: %w", cmd, err) + } + + return string(buf[:n]), nil +} + +// Ping sends a PING command and returns true if the response is PONG. +func (c *Conn) Ping() bool { + resp, err := c.Command("PING") + return err == nil && len(resp) >= 4 && resp[:4] == "PONG" +} + +// Status sends the STATUS command and returns the parsed key=value pairs. +func (c *Conn) Status() (map[string]string, error) { + resp, err := c.Command("STATUS") + if err != nil { + return nil, err + } + return ParseKV(resp), nil +} + +// SignalPoll sends SIGNAL_POLL and returns parsed key=value pairs. +// Returns RSSI, LINKSPEED, NOISE, FREQUENCY, etc. +// Only meaningful for wpa_supplicant (station mode). +func (c *Conn) SignalPoll() (map[string]string, error) { + resp, err := c.Command("SIGNAL_POLL") + if err != nil { + return nil, err + } + return ParseKV(resp), nil +} + +// ScanResults sends SCAN_RESULTS and returns parsed results. +// This is only meaningful for wpa_supplicant (station mode). +func (c *Conn) ScanResults() ([]ScanResult, error) { + resp, err := c.Command("SCAN_RESULTS") + if err != nil { + return nil, err + } + return ParseScanResults(resp), nil +} + +// AllStations enumerates all associated stations via STA-FIRST/STA-NEXT. +// Only meaningful for hostapd. +func (c *Conn) AllStations() ([]map[string]string, error) { + resp, err := c.Command("STA-FIRST") + if err != nil { + return nil, fmt.Errorf("STA-FIRST: %w", err) + } + if resp == "" || resp == "\n" || resp == "FAIL\n" { + return nil, nil + } + if strings.HasPrefix(resp, "UNKNOWN") { + return nil, fmt.Errorf("STA-FIRST not supported: %q", strings.TrimSpace(resp)) + } + + var stations []map[string]string + st := ParseStationResp(resp) + for st != nil { + stations = append(stations, st) + addr := st["addr"] + if addr == "" { + break + } + resp, err = c.Command("STA-NEXT " + addr) + if err != nil || resp == "" || resp == "\n" || resp == "FAIL\n" || strings.HasPrefix(resp, "UNKNOWN") { + break + } + st = ParseStationResp(resp) + } + return stations, nil +} diff --git a/src/yangerd/internal/wpactrl/wpactrl_test.go b/src/yangerd/internal/wpactrl/wpactrl_test.go new file mode 100644 index 000000000..ea4379939 --- /dev/null +++ b/src/yangerd/internal/wpactrl/wpactrl_test.go @@ -0,0 +1,242 @@ +package wpactrl + +import ( + "net" + "os" + "testing" + "time" +) + +func TestParseKV(t *testing.T) { + resp := `bssid=02:00:00:00:01:00 +freq=2412 +ssid=TestNetwork +id=0 +mode=station +pairwise_cipher=CCMP +group_cipher=CCMP +key_mgmt=WPA2-PSK +wpa_state=COMPLETED +address=02:00:00:00:00:01 +` + m := ParseKV(resp) + if m["ssid"] != "TestNetwork" { + t.Errorf("ssid = %q, want TestNetwork", m["ssid"]) + } + if m["wpa_state"] != "COMPLETED" { + t.Errorf("wpa_state = %q, want COMPLETED", m["wpa_state"]) + } + if m["freq"] != "2412" { + t.Errorf("freq = %q, want 2412", m["freq"]) + } + if m["mode"] != "station" { + t.Errorf("mode = %q, want station", m["mode"]) + } +} + +func TestParseKVEmpty(t *testing.T) { + m := ParseKV("") + if len(m) != 0 { + t.Errorf("expected empty map, got %v", m) + } +} + +func TestParseScanResults(t *testing.T) { + resp := "bssid / frequency / signal level / flags / ssid\n" + + "02:00:00:00:01:00\t2412\t-50\t[WPA2-PSK-CCMP][ESS]\tMyNetwork\n" + + "02:00:00:00:02:00\t5180\t-70\t[WPA2-EAP-CCMP][ESS]\tOffice\n" + + "02:00:00:00:03:00\t2437\t-85\t[ESS]\t\n" + + results := ParseScanResults(resp) + if len(results) != 3 { + t.Fatalf("got %d results, want 3", len(results)) + } + + r := results[0] + if r.BSSID != "02:00:00:00:01:00" { + t.Errorf("bssid = %q", r.BSSID) + } + if r.Frequency != 2412 { + t.Errorf("freq = %d, want 2412", r.Frequency) + } + if r.Signal != -50 { + t.Errorf("signal = %d, want -50", r.Signal) + } + if r.SSID != "MyNetwork" { + t.Errorf("ssid = %q, want MyNetwork", r.SSID) + } + + if results[1].Frequency != 5180 { + t.Errorf("results[1].freq = %d, want 5180", results[1].Frequency) + } +} + +func TestParseScanResultsEmpty(t *testing.T) { + results := ParseScanResults("bssid / frequency / signal level / flags / ssid\n") + if len(results) != 0 { + t.Errorf("expected empty, got %d", len(results)) + } +} + +func TestParseStationResp(t *testing.T) { + resp := "02:00:00:00:00:01\nflags=[AUTH][ASSOC][AUTHORIZED]\naid=1\n" + + "rx_bytes=12345\ntx_bytes=67890\nconnected_time=120\n" + + m := ParseStationResp(resp) + if m["addr"] != "02:00:00:00:00:01" { + t.Errorf("addr = %q", m["addr"]) + } + if m["rx_bytes"] != "12345" { + t.Errorf("rx_bytes = %q", m["rx_bytes"]) + } + if m["connected_time"] != "120" { + t.Errorf("connected_time = %q", m["connected_time"]) + } +} + +func TestParseStationRespEmpty(t *testing.T) { + m := ParseStationResp("") + if m["addr"] != "" { + t.Errorf("expected empty addr, got %q", m["addr"]) + } +} + +func TestParseAllStations(t *testing.T) { + resp := "c8:69:cd:69:35:da\n" + + "flags=[AUTH][ASSOC][AUTHORIZED]\n" + + "rx_bytes=4825331939\n" + + "tx_bytes=216392802676\n" + + "signal=-57\n" + + "connected_time=1846085\n" + + "d8:3a:dd:72:8e:b1\n" + + "flags=[AUTH][ASSOC][AUTHORIZED]\n" + + "rx_bytes=237629088\n" + + "tx_bytes=190760338\n" + + "signal=-78\n" + + "connected_time=3435639\n" + + stas := ParseAllStations(resp) + if len(stas) != 2 { + t.Fatalf("got %d stations, want 2", len(stas)) + } + if stas[0]["addr"] != "c8:69:cd:69:35:da" { + t.Errorf("sta[0] addr = %q", stas[0]["addr"]) + } + if stas[0]["signal"] != "-57" { + t.Errorf("sta[0] signal = %q", stas[0]["signal"]) + } + if stas[1]["addr"] != "d8:3a:dd:72:8e:b1" { + t.Errorf("sta[1] addr = %q", stas[1]["addr"]) + } + if stas[1]["rx_bytes"] != "237629088" { + t.Errorf("sta[1] rx_bytes = %q", stas[1]["rx_bytes"]) + } +} + +func TestParseAllStationsEmpty(t *testing.T) { + stas := ParseAllStations("") + if len(stas) != 0 { + t.Errorf("expected empty, got %d", len(stas)) + } +} + +func TestIsMACAddress(t *testing.T) { + if !isMACAddress("c8:69:cd:69:35:da") { + t.Error("valid MAC rejected") + } + if isMACAddress("not-a-mac") { + t.Error("invalid string accepted") + } + if isMACAddress("signal=-57") { + t.Error("key=value accepted as MAC") + } +} + +func TestFrequencyToChannel(t *testing.T) { + tests := []struct { + freq int + ch int + }{ + {2412, 1}, + {2437, 6}, + {2462, 11}, + {2484, 14}, + {5180, 36}, + {5240, 48}, + {5745, 149}, + {5825, 165}, + {5955, 1}, + {6115, 33}, + {1000, 0}, + } + for _, tt := range tests { + got := FrequencyToChannel(tt.freq) + if got != tt.ch { + t.Errorf("FrequencyToChannel(%d) = %d, want %d", tt.freq, got, tt.ch) + } + } +} + +func TestDialAndCommand(t *testing.T) { + dir := t.TempDir() + serverPath := dir + "/test_server" + clientDone := make(chan struct{}) + + serverAddr := &net.UnixAddr{Name: serverPath, Net: "unixgram"} + server, err := net.ListenUnixgram("unixgram", serverAddr) + if err != nil { + t.Fatalf("listen: %v", err) + } + defer server.Close() + + go func() { + defer close(clientDone) + buf := make([]byte, 4096) + n, raddr, err := server.ReadFromUnix(buf) + if err != nil { + t.Errorf("server read: %v", err) + return + } + cmd := string(buf[:n]) + var resp string + switch cmd { + case "PING": + resp = "PONG\n" + case "STATUS": + resp = "wpa_state=COMPLETED\nssid=Test\n" + default: + resp = "UNKNOWN COMMAND\n" + } + server.WriteToUnix([]byte(resp), raddr) + + n, raddr, err = server.ReadFromUnix(buf) + if err != nil { + t.Errorf("server read 2: %v", err) + return + } + if string(buf[:n]) == "STATUS" { + server.WriteToUnix([]byte("wpa_state=COMPLETED\nssid=Test\n"), raddr) + } + }() + + conn, err := DialTimeout(serverPath, 2*time.Second) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + + if !conn.Ping() { + t.Error("Ping failed") + } + + status, err := conn.Status() + if err != nil { + t.Fatalf("Status: %v", err) + } + if status["ssid"] != "Test" { + t.Errorf("ssid = %q, want Test", status["ssid"]) + } + + <-clientDone + os.Remove(conn.local) +} diff --git a/src/yangerd/internal/zapi/zapi.go b/src/yangerd/internal/zapi/zapi.go new file mode 100644 index 000000000..3a4f12ea7 --- /dev/null +++ b/src/yangerd/internal/zapi/zapi.go @@ -0,0 +1,558 @@ +// Package zapi implements a minimal ZAPI v6 client for FRR 10.5. +// +// It speaks only the subset of the Zebra wire protocol needed by +// yangerd: Hello, RouterIDAdd, RedistributeAdd, and decoding of +// RedistributeRouteAdd/Del messages. +package zapi + +import ( + "encoding/binary" + "encoding/hex" + "fmt" + "io" + "log/slog" + "net" + "syscall" +) + +// Wire constants for ZAPI v6. +const ( + HeaderSize = 10 + HeaderMarker = 0xFE + HeaderVersion = 6 + + DefaultVrf uint32 = 0 +) + +// Command IDs for FRR 10.5 ZAPI v6 (from lib/zclient.h). +type Command uint16 + +const ( + CmdInterfaceAdd Command = 0 + CmdInterfaceDelete Command = 1 + CmdInterfaceAddrAdd Command = 2 + CmdInterfaceAddrDelete Command = 3 + CmdInterfaceUp Command = 4 + CmdInterfaceDown Command = 5 + CmdInterfaceSetMaster Command = 6 + CmdInterfaceSetARP Command = 7 // new in FRR 10.x + CmdInterfaceSetProtodown Command = 8 + CmdRouteAdd Command = 9 + CmdRouteDelete Command = 10 + CmdRouteNotifyOwner Command = 11 + CmdRedistributeAdd Command = 12 + CmdRedistributeDelete Command = 13 + CmdRedistDefaultAdd Command = 14 + CmdRedistDefaultDelete Command = 15 + CmdRouterIDAdd Command = 16 + CmdRouterIDDelete Command = 17 + CmdRouterIDUpdate Command = 18 + CmdHello Command = 19 + CmdCapabilities Command = 20 + CmdNexthopRegister Command = 21 + CmdNexthopUnregister Command = 22 + CmdNexthopUpdate Command = 23 + + CmdRedistRouteAdd Command = 31 + CmdRedistRouteDel Command = 32 +) + +// RouteType identifies the source protocol of a route. +type RouteType uint8 + +const ( + RouteSystem RouteType = 0 + RouteKernel RouteType = 1 + RouteConnect RouteType = 2 + RouteLocal RouteType = 3 + RouteStatic RouteType = 4 + RouteRIP RouteType = 5 + RouteRIPNG RouteType = 6 + RouteOSPF RouteType = 7 + RouteOSPF6 RouteType = 8 + RouteISIS RouteType = 9 + RouteBGP RouteType = 10 +) + +// AFI values. +const ( + AFIIPv4 uint8 = 1 + AFIIPv6 uint8 = 2 +) + +// Route flags from FRR (lib/zebra.h: ZEBRA_FLAG_*). +const ( + FlagSelected uint32 = 0x04 +) + +// Message flags (from struct zapi_route.message). +type MsgFlag uint32 + +const ( + MsgNexthop MsgFlag = 0x01 + MsgDistance MsgFlag = 0x02 + MsgMetric MsgFlag = 0x04 + MsgTag MsgFlag = 0x08 + MsgMTU MsgFlag = 0x10 + MsgSrcPfx MsgFlag = 0x20 + MsgBackupNH MsgFlag = 0x40 + MsgNHG MsgFlag = 0x80 + MsgTableID MsgFlag = 0x100 + MsgSRTE MsgFlag = 0x200 + MsgOpaque MsgFlag = 0x400 +) + +// Nexthop type values (from lib/nexthop.h). +type NHType uint8 + +const ( + NHIFIndex NHType = 1 + NHIPv4 NHType = 2 + NHIPv4IFIndex NHType = 3 + NHIPv6 NHType = 4 + NHIPv6IFIndex NHType = 5 + NHBlackhole NHType = 6 +) + +// Nexthop flags (from lib/zclient.h: ZAPI_NEXTHOP_FLAG_*). +const ( + nhFlagOnlink uint8 = 0x01 + nhFlagLabel uint8 = 0x02 + nhFlagWeight uint8 = 0x04 + nhFlagHasBackup uint8 = 0x08 + nhFlagSeg6 uint8 = 0x10 + nhFlagSeg6Local uint8 = 0x20 + nhFlagEVPN uint8 = 0x40 +) + +// Header is a ZAPI v6 message header. +type Header struct { + Length uint16 + Marker uint8 + Version uint8 + VrfID uint32 + Command Command +} + +// Nexthop holds one entry from the nexthop list in a route message. +type Nexthop struct { + Type NHType + Gate net.IP + Ifindex uint32 +} + +// Route is the decoded content of a RedistributeRouteAdd/Del message. +type Route struct { + Type RouteType + Flags uint32 + Message MsgFlag + Prefix net.IPNet + Nexthops []Nexthop + Distance uint8 + Metric uint32 + Tag uint32 + MTU uint32 +} + +// Message is a decoded ZAPI message received from zebra. +type Message struct { + Header Header + Route *Route // non-nil only for route messages +} + +// EncodeHeader serializes a ZAPI v6 header. +func EncodeHeader(length uint16, vrfID uint32, cmd Command) []byte { + buf := make([]byte, HeaderSize) + binary.BigEndian.PutUint16(buf[0:2], length) + buf[2] = HeaderMarker + buf[3] = HeaderVersion + binary.BigEndian.PutUint32(buf[4:8], vrfID) + binary.BigEndian.PutUint16(buf[8:10], uint16(cmd)) + return buf +} + +// DecodeHeader parses a ZAPI v6 header from exactly HeaderSize bytes. +func DecodeHeader(data []byte) (Header, error) { + if len(data) < HeaderSize { + return Header{}, fmt.Errorf("header too short: %d bytes", len(data)) + } + h := Header{ + Length: binary.BigEndian.Uint16(data[0:2]), + Marker: data[2], + Version: data[3], + VrfID: binary.BigEndian.Uint32(data[4:8]), + Command: Command(binary.BigEndian.Uint16(data[8:10])), + } + if h.Marker != HeaderMarker { + return Header{}, fmt.Errorf("bad marker: 0x%02x", h.Marker) + } + if h.Version != HeaderVersion { + return Header{}, fmt.Errorf("unsupported version: %d", h.Version) + } + return h, nil +} + +// EncodeHello builds a Hello message body. +// Fields: redistDefault(1), instance(2), sessionID(4), synchronous(1) = 8 bytes. +// We send zeros for everything (redistDefault=0 means ZEBRA_ROUTE_SYSTEM). +func EncodeHello() []byte { + return make([]byte, 8) +} + +// EncodeRouterIDAdd builds a RouterIDAdd message body. +// Body is just the AFI value (1 byte). +func EncodeRouterIDAdd(afi uint8) []byte { + return []byte{afi} +} + +// EncodeRedistributeAdd builds a RedistributeAdd body. +// Body: afi(1), routeType(1), instance(2). +func EncodeRedistributeAdd(afi uint8, rt RouteType) []byte { + buf := make([]byte, 4) + buf[0] = afi + buf[1] = uint8(rt) + // instance = 0 (already zeroed) + return buf +} + +// BuildMessage constructs a complete wire message from command and body. +func BuildMessage(cmd Command, vrfID uint32, body []byte) []byte { + length := uint16(HeaderSize + len(body)) + hdr := EncodeHeader(length, vrfID, cmd) + return append(hdr, body...) +} + +// ReadMessage reads one complete ZAPI message from the connection. +// It returns the header and the raw body bytes. +func ReadMessage(r io.Reader) (Header, []byte, error) { + hdrBuf := make([]byte, HeaderSize) + if _, err := io.ReadFull(r, hdrBuf); err != nil { + return Header{}, nil, fmt.Errorf("read header: %w", err) + } + + hdr, err := DecodeHeader(hdrBuf) + if err != nil { + return Header{}, nil, err + } + + bodyLen := int(hdr.Length) - HeaderSize + if bodyLen < 0 { + return Header{}, nil, fmt.Errorf("invalid message length: %d", hdr.Length) + } + if bodyLen == 0 { + return hdr, nil, nil + } + + body := make([]byte, bodyLen) + if _, err := io.ReadFull(r, body); err != nil { + return Header{}, nil, fmt.Errorf("read body: %w", err) + } + + return hdr, body, nil +} + +// DecodeRoute parses an IPRouteBody from the given body bytes. +// This handles the FRR 10.5 / ZAPI v6 (frr >= 7.5) format: +// +// type(1) instance(2) flags(4) message(4) safi(1) family(1) prefixlen(1) prefix(var) ... +func DecodeRoute(body []byte) (*Route, error) { + return DecodeRouteLog(body, nil) +} + +// DecodeRouteLog is like DecodeRoute but logs decode positions if log is non-nil. +func DecodeRouteLog(body []byte, log *slog.Logger) (*Route, error) { + if len(body) < 10 { + return nil, fmt.Errorf("route body too short: %d bytes", len(body)) + } + + if log != nil { + log.Debug("zapi decode: raw body", "len", len(body), "hex", hex.EncodeToString(body)) + } + + r := &Route{} + pos := 0 + + r.Type = RouteType(body[pos]) + pos++ + + // instance(2) + pos += 2 + + r.Flags = binary.BigEndian.Uint32(body[pos : pos+4]) + pos += 4 + + r.Message = MsgFlag(binary.BigEndian.Uint32(body[pos : pos+4])) + pos += 4 + + if log != nil { + log.Debug("zapi decode: header", "type", r.Type, "flags", r.Flags, "message", fmt.Sprintf("0x%x", r.Message), "pos", pos) + } + + // safi(1) + pos++ + + if pos >= len(body) { + return nil, fmt.Errorf("truncated at family") + } + family := body[pos] + pos++ + + addrLen, err := addressByteLen(family) + if err != nil { + return nil, err + } + + if pos >= len(body) { + return nil, fmt.Errorf("truncated at prefixlen") + } + prefixLen := body[pos] + pos++ + + byteLen := int((prefixLen + 7) / 8) + if pos+byteLen > len(body) { + return nil, fmt.Errorf("truncated at prefix data") + } + ipBuf := make([]byte, addrLen) + copy(ipBuf, body[pos:pos+byteLen]) + pos += byteLen + + var ip net.IP + if family == syscall.AF_INET { + ip = net.IP(ipBuf).To4() + } else { + ip = net.IP(ipBuf).To16() + } + mask := net.CIDRMask(int(prefixLen), addrLen*8) + r.Prefix = net.IPNet{IP: ip, Mask: mask} + + if log != nil { + log.Debug("zapi decode: prefix", "prefix", r.Prefix.String(), "pos", pos) + } + + if r.Message&MsgSrcPfx != 0 { + if pos >= len(body) { + return nil, fmt.Errorf("truncated at src prefix") + } + srcPfxLen := body[pos] + pos++ + srcByteLen := int((srcPfxLen + 7) / 8) + if pos+srcByteLen > len(body) { + return nil, fmt.Errorf("truncated at src prefix data") + } + pos += srcByteLen + } + + if r.Message&MsgNHG != 0 { + if pos+4 > len(body) { + return nil, fmt.Errorf("truncated at nhg") + } + pos += 4 + } + + if r.Message&MsgNexthop != 0 { + if pos+2 > len(body) { + return nil, fmt.Errorf("truncated at nexthop count") + } + numNH := binary.BigEndian.Uint16(body[pos : pos+2]) + pos += 2 + + if log != nil { + log.Debug("zapi decode: nexthops", "count", numNH, "pos", pos) + } + + r.Nexthops = make([]Nexthop, 0, numNH) + for i := uint16(0); i < numNH; i++ { + nh, n, err := decodeNexthop(body[pos:], r.Message) + if err != nil { + return nil, fmt.Errorf("nexthop %d: %w", i, err) + } + if log != nil { + log.Debug("zapi decode: nexthop", "i", i, "type", nh.Type, "gate", nh.Gate, "ifindex", nh.Ifindex, "consumed", n, "nextPos", pos+n) + } + r.Nexthops = append(r.Nexthops, nh) + pos += n + } + } + + if r.Message&MsgBackupNH != 0 { + if pos+2 > len(body) { + return nil, fmt.Errorf("truncated at backup nexthop count") + } + numBackup := binary.BigEndian.Uint16(body[pos : pos+2]) + pos += 2 + for i := uint16(0); i < numBackup; i++ { + _, n, err := decodeNexthop(body[pos:], r.Message) + if err != nil { + return nil, fmt.Errorf("backup nexthop %d: %w", i, err) + } + pos += n + } + } + + if r.Message&MsgDistance != 0 { + if pos >= len(body) { + return nil, fmt.Errorf("truncated at distance") + } + r.Distance = body[pos] + if log != nil { + log.Debug("zapi decode: distance", "distance", r.Distance, "pos", pos, "byte", fmt.Sprintf("0x%02x", body[pos])) + } + pos++ + } + + if r.Message&MsgMetric != 0 { + if pos+4 > len(body) { + return nil, fmt.Errorf("truncated at metric") + } + r.Metric = binary.BigEndian.Uint32(body[pos : pos+4]) + if log != nil { + log.Debug("zapi decode: metric", "metric", r.Metric, "pos", pos) + } + pos += 4 + } + + if r.Message&MsgTag != 0 { + if pos+4 > len(body) { + return nil, fmt.Errorf("truncated at tag") + } + r.Tag = binary.BigEndian.Uint32(body[pos : pos+4]) + pos += 4 + } + + if r.Message&MsgMTU != 0 { + if pos+4 > len(body) { + return nil, fmt.Errorf("truncated at mtu") + } + r.MTU = binary.BigEndian.Uint32(body[pos : pos+4]) + pos += 4 + } + + return r, nil +} + +// decodeNexthop decodes one nexthop matching FRR 10.5 zapi_nexthop_decode. +// Wire format: vrfID(4) type(1) flags(1) [gate/ifindex] [labels] [weight] +// [rmac] [srte_color] [backup] [seg6local] [seg6] +func decodeNexthop(data []byte, routeMsg MsgFlag) (Nexthop, int, error) { + nh := Nexthop{} + pos := 0 + + if pos+6 > len(data) { + return nh, 0, fmt.Errorf("truncated at nh header") + } + pos += 4 // vrfID + nh.Type = NHType(data[pos]) + pos++ + flags := data[pos] + pos++ + + switch nh.Type { + case NHBlackhole: + if pos >= len(data) { + return nh, 0, fmt.Errorf("truncated at blackhole type") + } + pos++ + case NHIPv4, NHIPv4IFIndex: + if pos+8 > len(data) { + return nh, 0, fmt.Errorf("truncated at ipv4 gate+ifindex") + } + nh.Gate = net.IP(data[pos : pos+4]).To4() + pos += 4 + nh.Ifindex = binary.BigEndian.Uint32(data[pos : pos+4]) + pos += 4 + case NHIFIndex: + if pos+4 > len(data) { + return nh, 0, fmt.Errorf("truncated at ifindex") + } + nh.Ifindex = binary.BigEndian.Uint32(data[pos : pos+4]) + pos += 4 + case NHIPv6, NHIPv6IFIndex: + if pos+20 > len(data) { + return nh, 0, fmt.Errorf("truncated at ipv6 gate+ifindex") + } + nh.Gate = net.IP(data[pos : pos+16]).To16() + pos += 16 + nh.Ifindex = binary.BigEndian.Uint32(data[pos : pos+4]) + pos += 4 + } + + if flags&nhFlagLabel != 0 { + if pos+2 > len(data) { + return nh, 0, fmt.Errorf("truncated at labels") + } + labelNum := int(data[pos]) + pos++ + pos++ // label_type (1 byte) + if labelNum > 16 { + labelNum = 16 + } + pos += labelNum * 4 + } + + if flags&nhFlagWeight != 0 { + if pos+8 > len(data) { + return nh, 0, fmt.Errorf("truncated at weight") + } + pos += 8 // uint64 + } + + if flags&nhFlagEVPN != 0 { + if pos+6 > len(data) { + return nh, 0, fmt.Errorf("truncated at evpn rmac") + } + pos += 6 // struct ethaddr + } + + if routeMsg&MsgSRTE != 0 { + if pos+4 > len(data) { + return nh, 0, fmt.Errorf("truncated at srte color") + } + pos += 4 + } + + if flags&nhFlagHasBackup != 0 { + if pos >= len(data) { + return nh, 0, fmt.Errorf("truncated at backup count") + } + backupNum := int(data[pos]) + pos++ + pos += backupNum + } + + // SEG6LOCAL comes before SEG6 in FRR's decode order + if flags&nhFlagSeg6Local != 0 { + // seg6local_action(4) + seg6local_context(sizeof(struct seg6local_context)) + // struct seg6local_context is 24 bytes in FRR (nh_seg.h) + if pos+28 > len(data) { + return nh, 0, fmt.Errorf("truncated at seg6local") + } + pos += 28 + } + + if flags&nhFlagSeg6 != 0 { + if pos >= len(data) { + return nh, 0, fmt.Errorf("truncated at seg6 count") + } + segNum := int(data[pos]) + pos++ + // segs(segNum*16) + behavior(4) + skip := segNum*16 + 4 + if pos+skip > len(data) { + return nh, 0, fmt.Errorf("truncated at seg6 data") + } + pos += skip + } + + return nh, pos, nil +} + +func addressByteLen(family uint8) (int, error) { + switch family { + case syscall.AF_INET: + return 4, nil + case syscall.AF_INET6: + return 16, nil + default: + return 0, fmt.Errorf("unsupported address family: %d", family) + } +} diff --git a/src/yangerd/internal/zapi/zapi_test.go b/src/yangerd/internal/zapi/zapi_test.go new file mode 100644 index 000000000..92521e004 --- /dev/null +++ b/src/yangerd/internal/zapi/zapi_test.go @@ -0,0 +1,462 @@ +package zapi + +import ( + "bytes" + "encoding/binary" + "net" + "syscall" + "testing" +) + +func TestEncodeDecodeHeader(t *testing.T) { + raw := EncodeHeader(42, 0, CmdHello) + hdr, err := DecodeHeader(raw) + if err != nil { + t.Fatal(err) + } + if hdr.Length != 42 { + t.Errorf("Length = %d, want 42", hdr.Length) + } + if hdr.Command != CmdHello { + t.Errorf("Command = %d, want %d", hdr.Command, CmdHello) + } + if hdr.VrfID != 0 { + t.Errorf("VrfID = %d, want 0", hdr.VrfID) + } +} + +func TestDecodeHeaderBadMarker(t *testing.T) { + raw := EncodeHeader(10, 0, CmdHello) + raw[2] = 0x00 + _, err := DecodeHeader(raw) + if err == nil { + t.Fatal("expected error for bad marker") + } +} + +func TestDecodeHeaderBadVersion(t *testing.T) { + raw := EncodeHeader(10, 0, CmdHello) + raw[3] = 5 + _, err := DecodeHeader(raw) + if err == nil { + t.Fatal("expected error for bad version") + } +} + +func TestBuildMessage(t *testing.T) { + body := EncodeHello() + msg := BuildMessage(CmdHello, DefaultVrf, body) + if len(msg) != HeaderSize+len(body) { + t.Errorf("message len = %d, want %d", len(msg), HeaderSize+len(body)) + } + hdr, err := DecodeHeader(msg[:HeaderSize]) + if err != nil { + t.Fatal(err) + } + if hdr.Command != CmdHello { + t.Errorf("Command = %d, want %d", hdr.Command, CmdHello) + } + if int(hdr.Length) != len(msg) { + t.Errorf("Length = %d, want %d", hdr.Length, len(msg)) + } +} + +func TestEncodeRedistributeAdd(t *testing.T) { + body := EncodeRedistributeAdd(AFIIPv4, RouteStatic) + if len(body) != 4 { + t.Fatalf("body len = %d, want 4", len(body)) + } + if body[0] != AFIIPv4 { + t.Errorf("afi = %d, want %d", body[0], AFIIPv4) + } + if body[1] != uint8(RouteStatic) { + t.Errorf("routeType = %d, want %d", body[1], RouteStatic) + } +} + +func TestReadMessage(t *testing.T) { + body := []byte{0x01, 0x02, 0x03} + msg := BuildMessage(CmdRouterIDUpdate, DefaultVrf, body) + r := bytes.NewReader(msg) + + hdr, gotBody, err := ReadMessage(r) + if err != nil { + t.Fatal(err) + } + if hdr.Command != CmdRouterIDUpdate { + t.Errorf("Command = %d, want %d", hdr.Command, CmdRouterIDUpdate) + } + if !bytes.Equal(gotBody, body) { + t.Errorf("body = %v, want %v", gotBody, body) + } +} + +func TestDecodeRouteIPv4(t *testing.T) { + body := buildRouteBody(t, syscall.AF_INET, + net.ParseIP("10.0.0.0").To4(), 24, + uint32(MsgNexthop|MsgDistance|MsgMetric), + RouteConnect, 0, + []testNexthop{ + {nhType: NHIPv4IFIndex, gate: net.ParseIP("10.0.0.1").To4(), ifindex: 2}, + }, + 10, 100, + ) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + + if route.Type != RouteConnect { + t.Errorf("Type = %d, want %d", route.Type, RouteConnect) + } + want := "10.0.0.0/24" + if route.Prefix.String() != want { + t.Errorf("Prefix = %s, want %s", route.Prefix.String(), want) + } + if len(route.Nexthops) != 1 { + t.Fatalf("Nexthops = %d, want 1", len(route.Nexthops)) + } + if route.Nexthops[0].Gate.String() != "10.0.0.1" { + t.Errorf("Gate = %s, want 10.0.0.1", route.Nexthops[0].Gate) + } + if route.Nexthops[0].Ifindex != 2 { + t.Errorf("Ifindex = %d, want 2", route.Nexthops[0].Ifindex) + } + if route.Distance != 10 { + t.Errorf("Distance = %d, want 10", route.Distance) + } + if route.Metric != 100 { + t.Errorf("Metric = %d, want 100", route.Metric) + } +} + +func TestDecodeRouteIPv6(t *testing.T) { + body := buildRouteBody(t, syscall.AF_INET6, + net.ParseIP("2001:db8::").To16(), 48, + uint32(MsgNexthop|MsgMetric), + RouteOSPF, 0, + []testNexthop{ + {nhType: NHIPv6IFIndex, gate: net.ParseIP("2001:db8::1").To16(), ifindex: 3}, + }, + 0, 200, + ) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + + want := "2001:db8::/48" + if route.Prefix.String() != want { + t.Errorf("Prefix = %s, want %s", route.Prefix.String(), want) + } + if route.Metric != 200 { + t.Errorf("Metric = %d, want 200", route.Metric) + } + if len(route.Nexthops) != 1 { + t.Fatalf("Nexthops = %d, want 1", len(route.Nexthops)) + } + if route.Nexthops[0].Gate.String() != "2001:db8::1" { + t.Errorf("Gate = %s, want 2001:db8::1", route.Nexthops[0].Gate) + } +} + +func TestDecodeRouteNoNexthops(t *testing.T) { + body := buildRouteBody(t, syscall.AF_INET, + net.ParseIP("192.168.1.0").To4(), 24, + uint32(MsgMetric), + RouteKernel, 0, + nil, + 0, 50, + ) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + if len(route.Nexthops) != 0 { + t.Errorf("Nexthops = %d, want 0", len(route.Nexthops)) + } + if route.Metric != 50 { + t.Errorf("Metric = %d, want 50", route.Metric) + } +} + +func TestDecodeRouteMultipleNexthops(t *testing.T) { + body := buildRouteBody(t, syscall.AF_INET, + net.ParseIP("10.0.0.0").To4(), 8, + uint32(MsgNexthop), + RouteStatic, 0, + []testNexthop{ + {nhType: NHIPv4IFIndex, gate: net.ParseIP("10.0.0.1").To4(), ifindex: 1}, + {nhType: NHIPv4IFIndex, gate: net.ParseIP("10.0.0.2").To4(), ifindex: 2}, + }, + 0, 0, + ) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + if len(route.Nexthops) != 2 { + t.Fatalf("Nexthops = %d, want 2", len(route.Nexthops)) + } + if route.Nexthops[1].Gate.String() != "10.0.0.2" { + t.Errorf("Gate[1] = %s, want 10.0.0.2", route.Nexthops[1].Gate) + } +} + +func TestDecodeRouteDefaultRoute(t *testing.T) { + body := buildRouteBody(t, syscall.AF_INET, + net.ParseIP("0.0.0.0").To4(), 0, + uint32(MsgNexthop|MsgMetric), + RouteStatic, 0, + []testNexthop{ + {nhType: NHIPv4IFIndex, gate: net.ParseIP("192.168.1.1").To4(), ifindex: 5}, + }, + 0, 0, + ) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + if route.Prefix.String() != "0.0.0.0/0" { + t.Errorf("Prefix = %s, want 0.0.0.0/0", route.Prefix.String()) + } +} + +func TestDecodeRouteIFIndexOnly(t *testing.T) { + body := buildRouteBody(t, syscall.AF_INET, + net.ParseIP("10.0.0.0").To4(), 24, + uint32(MsgNexthop), + RouteConnect, 0, + []testNexthop{ + {nhType: NHIFIndex, ifindex: 7}, + }, + 0, 0, + ) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + if len(route.Nexthops) != 1 { + t.Fatalf("Nexthops = %d, want 1", len(route.Nexthops)) + } + if route.Nexthops[0].Ifindex != 7 { + t.Errorf("Ifindex = %d, want 7", route.Nexthops[0].Ifindex) + } +} + +type testNexthop struct { + nhType NHType + gate net.IP + ifindex uint32 +} + +// TestDecodeRouteFRRRedistribute tests the exact wire format that FRR's +// zsend_redistribute_route produces: message flags 0x17 = nexthop|distance|metric|mtu. +func TestDecodeRouteFRRRedistribute(t *testing.T) { + msgFlags := uint32(MsgNexthop | MsgDistance | MsgMetric | MsgMTU) + + var body []byte + body = append(body, uint8(RouteRIP)) // type + body = append(body, 0, 0) // instance + tmp := make([]byte, 4) + binary.BigEndian.PutUint32(tmp, 0) // flags + body = append(body, tmp...) + binary.BigEndian.PutUint32(tmp, msgFlags) + body = append(body, tmp...) + body = append(body, 1) // safi=unicast + body = append(body, syscall.AF_INET) // family + body = append(body, 24) // prefixlen + body = append(body, 192, 168, 10) // prefix (3 bytes for /24) + + // 1 nexthop: type=IFINDEX, flags=0, ifindex=5 + binary.BigEndian.PutUint16(tmp[:2], 1) // nexthop count + body = append(body, tmp[:2]...) + body = append(body, 0, 0, 0, 0) // vrfID + body = append(body, uint8(NHIFIndex)) // nh type + body = append(body, 0) // nh flags + binary.BigEndian.PutUint32(tmp, 5) // ifindex + body = append(body, tmp...) + + body = append(body, 120) // distance (RIP=120) + binary.BigEndian.PutUint32(tmp, 3) // metric + body = append(body, tmp...) + binary.BigEndian.PutUint32(tmp, 1500) // mtu + body = append(body, tmp...) + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + if route.Type != RouteRIP { + t.Errorf("Type = %d, want %d", route.Type, RouteRIP) + } + if route.Prefix.String() != "192.168.10.0/24" { + t.Errorf("Prefix = %s, want 192.168.10.0/24", route.Prefix.String()) + } + if route.Distance != 120 { + t.Errorf("Distance = %d, want 120", route.Distance) + } + if route.Metric != 3 { + t.Errorf("Metric = %d, want 3", route.Metric) + } + if route.MTU != 1500 { + t.Errorf("MTU = %d, want 1500", route.MTU) + } + if len(route.Nexthops) != 1 { + t.Fatalf("Nexthops = %d, want 1", len(route.Nexthops)) + } + if route.Nexthops[0].Ifindex != 5 { + t.Errorf("Ifindex = %d, want 5", route.Nexthops[0].Ifindex) + } +} + +// TestDecodeRouteWithTag tests message flags 0x1f = nexthop|distance|metric|tag|mtu. +func TestDecodeRouteWithTag(t *testing.T) { + msgFlags := uint32(MsgNexthop | MsgDistance | MsgMetric | MsgTag | MsgMTU) + + var body []byte + body = append(body, uint8(RouteOSPF)) // type + body = append(body, 0, 0) // instance + tmp := make([]byte, 4) + binary.BigEndian.PutUint32(tmp, 0) + body = append(body, tmp...) // flags + binary.BigEndian.PutUint32(tmp, msgFlags) + body = append(body, tmp...) // message + body = append(body, 1) // safi + body = append(body, syscall.AF_INET) // family + body = append(body, 32) // prefixlen + body = append(body, 10, 1, 1, 1) // prefix + + binary.BigEndian.PutUint16(tmp[:2], 1) + body = append(body, tmp[:2]...) // nexthop count + body = append(body, 0, 0, 0, 0) // vrfID + body = append(body, uint8(NHIPv4IFIndex)) + body = append(body, 0) // nh flags + body = append(body, 10, 0, 0, 1) // gate + binary.BigEndian.PutUint32(tmp, 3) + body = append(body, tmp...) // ifindex + + body = append(body, 110) // distance (OSPF=110) + binary.BigEndian.PutUint32(tmp, 20) + body = append(body, tmp...) // metric + binary.BigEndian.PutUint32(tmp, 42) + body = append(body, tmp...) // tag + binary.BigEndian.PutUint32(tmp, 9000) + body = append(body, tmp...) // mtu + + route, err := DecodeRoute(body) + if err != nil { + t.Fatal(err) + } + if route.Distance != 110 { + t.Errorf("Distance = %d, want 110", route.Distance) + } + if route.Metric != 20 { + t.Errorf("Metric = %d, want 20", route.Metric) + } + if route.Tag != 42 { + t.Errorf("Tag = %d, want 42", route.Tag) + } + if route.MTU != 9000 { + t.Errorf("MTU = %d, want 9000", route.MTU) + } +} + +func buildRouteBody(t *testing.T, family uint8, prefix net.IP, prefixLen uint8, msgFlags uint32, routeType RouteType, flags uint32, nexthops []testNexthop, distance uint8, metric uint32) []byte { + t.Helper() + var buf []byte + + buf = append(buf, uint8(routeType)) + + // instance(2) + buf = append(buf, 0, 0) + + // flags(4) + tmp := make([]byte, 4) + binary.BigEndian.PutUint32(tmp, flags) + buf = append(buf, tmp...) + + // message(4) + binary.BigEndian.PutUint32(tmp, msgFlags) + buf = append(buf, tmp...) + + // safi(1) = unicast + buf = append(buf, 1) + + // family(1) + buf = append(buf, family) + + // prefixlen(1) + buf = append(buf, prefixLen) + + // prefix bytes + byteLen := int((prefixLen + 7) / 8) + buf = append(buf, prefix[:byteLen]...) + + // nexthops + if MsgFlag(msgFlags)&MsgNexthop != 0 { + nhCount := make([]byte, 2) + binary.BigEndian.PutUint16(nhCount, uint16(len(nexthops))) + buf = append(buf, nhCount...) + + for _, nh := range nexthops { + buf = append(buf, encodeTestNexthop(nh)...) + } + } + + // distance + if MsgFlag(msgFlags)&MsgDistance != 0 { + buf = append(buf, distance) + } + + // metric + if MsgFlag(msgFlags)&MsgMetric != 0 { + binary.BigEndian.PutUint32(tmp, metric) + buf = append(buf, tmp...) + } + + return buf +} + +func encodeTestNexthop(nh testNexthop) []byte { + var buf []byte + + // vrfID(4) = 0 + buf = append(buf, 0, 0, 0, 0) + + // type(1) + buf = append(buf, uint8(nh.nhType)) + + // flags(1) = 0 + buf = append(buf, 0) + + nhType := nh.nhType + switch nhType { + case NHIPv4: + nhType = NHIPv4IFIndex + case NHIPv6: + nhType = NHIPv6IFIndex + } + + switch nhType { + case NHIPv4IFIndex: + buf = append(buf, nh.gate.To4()...) + case NHIPv6IFIndex: + buf = append(buf, nh.gate.To16()...) + } + + switch nhType { + case NHIFIndex, NHIPv4IFIndex, NHIPv6IFIndex: + tmp := make([]byte, 4) + binary.BigEndian.PutUint32(tmp, nh.ifindex) + buf = append(buf, tmp...) + } + + return buf +} diff --git a/src/yangerd/internal/zapiwatcher/zapiwatcher.go b/src/yangerd/internal/zapiwatcher/zapiwatcher.go new file mode 100644 index 000000000..6e4e8c638 --- /dev/null +++ b/src/yangerd/internal/zapiwatcher/zapiwatcher.go @@ -0,0 +1,471 @@ +package zapiwatcher + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "net" + "regexp" + "strconv" + "strings" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/backoff" + "github.com/kernelkit/infix/src/yangerd/internal/tree" + "github.com/kernelkit/infix/src/yangerd/internal/zapi" +) + +const ( + zapiSocketPath = "/var/run/frr/zserv.api" + routingTreeKey = "ietf-routing:routing" + + // debounceDelay coalesces a burst of ZAPI route notifications into a + // single RIB read. FRR emits many RouteAdd/Del messages while a + // protocol converges; without debouncing we would re-read the table + // dozens of times in a few milliseconds. + debounceDelay = 200 * time.Millisecond +) + +// subscribeTypes are the route types we ask zebra to redistribute. We do +// not use the route payloads themselves -- redistribution only ever +// delivers the selected route per destination and does not reliably send +// a delete when a route is superseded. The subscription exists purely so +// zebra notifies us that *something* changed; the authoritative table is +// then read from zebra's vty socket (see RouteQuerier). +var subscribeTypes = []zapi.RouteType{ + zapi.RouteKernel, + zapi.RouteConnect, + zapi.RouteLocal, + zapi.RouteStatic, + zapi.RouteRIP, + zapi.RouteOSPF, +} + +// RouteQuerier runs a "show ... json" command against FRR and returns its +// raw output. Production code uses an frrvty.Client (zebra's vty socket); +// tests inject a fake. +type RouteQuerier interface { + Query(ctx context.Context, command string) ([]byte, error) +} + +// ZAPIWatcher keeps the operational RIB (ietf-routing:routing/ribs) in +// sync with FRR. It does NOT reconstruct routes from the ZAPI stream: +// the ZAPI socket is used only as a change trigger, and the full routing +// table -- every candidate per destination, with FRR's own +// selected/installed flags, exactly as "show ip route" renders it -- is +// read from zebra's vty socket on each change. Because every refresh is +// a complete snapshot, a route removed from zebra simply disappears; we +// never depend on receiving a ZAPI delete. +type ZAPIWatcher struct { + tree *tree.Tree + querier RouteQuerier + log *slog.Logger + refresh chan struct{} +} + +func New(t *tree.Tree, querier RouteQuerier, log *slog.Logger) *ZAPIWatcher { + if log == nil { + log = slog.Default() + } + return &ZAPIWatcher{ + tree: t, + querier: querier, + log: log, + refresh: make(chan struct{}, 1), + } +} + +func (w *ZAPIWatcher) Run(ctx context.Context) error { + // The refresh worker owns all writes to the tree and runs for the + // lifetime of the watcher, independent of the ZAPI connection. + go w.refreshLoop(ctx) + + bo := backoff.Default() + delay := bo.Initial + + for { + conn, err := w.connect(ctx) + if err != nil { + if ctx.Err() != nil { + return ctx.Err() + } + + w.log.Warn("zapi watcher: connect failed", "err", err, "delay", delay) + if err := backoff.Sleep(ctx, delay); err != nil { + return err + } + delay = bo.Next(delay) + continue + } + + delay = bo.Initial + w.log.Info("zapi watcher: connected", "socket", zapiSocketPath) + + // Read the current table now that we are subscribed, so we have + // data even if no further events arrive. + w.triggerRefresh() + + err = w.processMessages(ctx, conn) + _ = conn.Close() + if ctx.Err() != nil { + return ctx.Err() + } + + w.log.Warn("zapi watcher: disconnected", "err", err) + } +} + +func (w *ZAPIWatcher) connect(ctx context.Context) (net.Conn, error) { + d := net.Dialer{} + conn, err := d.DialContext(ctx, "unix", zapiSocketPath) + if err != nil { + return nil, fmt.Errorf("dial zserv: %w", err) + } + + hello := zapi.BuildMessage(zapi.CmdHello, zapi.DefaultVrf, zapi.EncodeHello()) + if _, err := conn.Write(hello); err != nil { + conn.Close() + return nil, fmt.Errorf("send hello: %w", err) + } + + for _, afi := range []uint8{zapi.AFIIPv4, zapi.AFIIPv6} { + msg := zapi.BuildMessage(zapi.CmdRouterIDAdd, zapi.DefaultVrf, zapi.EncodeRouterIDAdd(afi)) + if _, err := conn.Write(msg); err != nil { + conn.Close() + return nil, fmt.Errorf("send router-id-add: %w", err) + } + } + + for _, rt := range subscribeTypes { + for _, afi := range []uint8{zapi.AFIIPv4, zapi.AFIIPv6} { + msg := zapi.BuildMessage(zapi.CmdRedistributeAdd, zapi.DefaultVrf, zapi.EncodeRedistributeAdd(afi, rt)) + if _, err := conn.Write(msg); err != nil { + conn.Close() + return nil, fmt.Errorf("send redistribute-add: %w", err) + } + w.log.Debug("zapi watcher: subscribed", "afi", afi, "routeType", rt) + } + } + + return conn, nil +} + +// processMessages drains the ZAPI stream. We only care *that* a route +// changed, not what changed -- each route add/delete triggers a debounced +// re-read of the full table. +func (w *ZAPIWatcher) processMessages(ctx context.Context, conn net.Conn) error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + hdr, _, err := zapi.ReadMessage(conn) + if err != nil { + return fmt.Errorf("read message: %w", err) + } + + switch hdr.Command { + case zapi.CmdRedistRouteAdd, zapi.CmdRedistRouteDel: + w.log.Debug("zapi watcher: route change", "cmd", hdr.Command, "vrf", hdr.VrfID) + w.triggerRefresh() + } + } +} + +// triggerRefresh requests a table re-read. The buffered channel collapses +// multiple pending requests into one. +func (w *ZAPIWatcher) triggerRefresh() { + select { + case w.refresh <- struct{}{}: + default: + } +} + +func (w *ZAPIWatcher) refreshLoop(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-w.refresh: + } + + // Let a burst of notifications settle before reading. + select { + case <-ctx.Done(): + return + case <-time.After(debounceDelay): + } + // Drain a request that arrived during the debounce window; the + // upcoming read already reflects it. + select { + case <-w.refresh: + default: + } + + w.writeRibs(ctx) + } +} + +// writeRibs reads the full IPv4 and IPv6 routing tables from zebra and +// replaces the ribs subtree. On a query error it leaves the previous +// data untouched rather than blanking the table. +func (w *ZAPIWatcher) writeRibs(ctx context.Context) { + ipv4, err := w.collectRoutes(ctx, "ipv4") + if err != nil { + w.log.Warn("zapi watcher: read ipv4 routes", "err", err) + return + } + ipv6, err := w.collectRoutes(ctx, "ipv6") + if err != nil { + w.log.Warn("zapi watcher: read ipv6 routes", "err", err) + return + } + + ribs := map[string]any{ + "rib": []map[string]any{ + { + "name": "ipv4", + "address-family": "ietf-routing:ipv4", + "routes": map[string]any{"route": ipv4}, + }, + { + "name": "ipv6", + "address-family": "ietf-routing:ipv6", + "routes": map[string]any{"route": ipv6}, + }, + }, + } + + data, err := json.Marshal(map[string]any{"ribs": ribs}) + if err != nil { + w.log.Error("zapi watcher: marshal ribs", "err", err) + return + } + + w.tree.Merge(routingTreeKey, data) +} + +// collectRoutes runs "show ip route json" / "show ipv6 route json" and +// transforms every entry into an ietf-routing route node. +func (w *ZAPIWatcher) collectRoutes(ctx context.Context, family string) ([]json.RawMessage, error) { + command := "show ip route json" + if family == "ipv6" { + command = "show ipv6 route json" + } + + out, err := w.querier.Query(ctx, command) + if err != nil { + return nil, err + } + + // FRR prints "{}" for an empty table; otherwise a map of + // prefix -> [route, ...] (multiple candidates per prefix). + var table map[string][]map[string]any + if err := json.Unmarshal(out, &table); err != nil { + return nil, fmt.Errorf("parse %q: %w", command, err) + } + + now := time.Now() + routes := make([]json.RawMessage, 0, len(table)) + for prefix, entries := range table { + if !strings.Contains(prefix, "/") { + continue + } + for _, entry := range entries { + routes = append(routes, transformRoute(family, prefix, entry, now)) + } + } + return routes, nil +} + +// protocolMap maps FRR's protocol names to IETF routing-protocol +// identities. Unknown protocols fall back to kernel so they still +// validate against the model. +var protocolMap = map[string]string{ + "kernel": "infix-routing:kernel", + "connected": "ietf-routing:direct", + "local": "ietf-routing:direct", + "static": "ietf-routing:static", + "ospf": "ietf-ospf:ospfv2", + "ospf6": "ietf-ospf:ospfv3", + "rip": "ietf-rip:rip", + "ripng": "ietf-rip:rip", +} + +func protocolName(frr string) string { + if p, ok := protocolMap[frr]; ok { + return p + } + return "infix-routing:kernel" +} + +// transformRoute converts one FRR JSON route entry into an ietf-routing +// route node. It mirrors the legacy yanger ietf_routing.py:add_protocol. +func transformRoute(family, prefixKey string, route map[string]any, now time.Time) json.RawMessage { + addrKey := "ietf-ipv4-unicast-routing:address" + dpKey := "ietf-ipv4-unicast-routing:destination-prefix" + nhAddrKey := "ietf-ipv4-unicast-routing:next-hop-address" + hostLen := "32" + if family == "ipv6" { + addrKey = "ietf-ipv6-unicast-routing:address" + dpKey = "ietf-ipv6-unicast-routing:destination-prefix" + nhAddrKey = "ietf-ipv6-unicast-routing:next-hop-address" + hostLen = "128" + } + + dst := stringField(route, "prefix") + if dst == "" { + dst = prefixKey + } + if !strings.Contains(dst, "/") { + plen := hostLen + if v, ok := route["prefixLen"]; ok { + plen = strconv.Itoa(toInt(v)) + } + dst = dst + "/" + plen + } + + frr := stringField(route, "protocol") + + node := map[string]any{ + dpKey: dst, + "source-protocol": protocolName(frr), + "route-preference": toInt(route["distance"]), + "last-updated": now.Add(-parseUptime(stringField(route, "uptime"))).Format(time.RFC3339), + } + + // Metric is modelled only for OSPF and RIP routes. + switch { + case strings.Contains(frr, "ospf"): + node["ietf-ospf:metric"] = toInt(route["metric"]) + case strings.Contains(frr, "rip"): + node["ietf-rip:metric"] = toInt(route["metric"]) + } + + // "selected" is FRR's own best-path decision -- the '>' in + // "show ip route". active is a presence leaf, encoded as [null]. + if boolField(route, "selected") { + node["active"] = []any{nil} + } + + installed := boolField(route, "installed") + + nextHops := make([]map[string]any, 0) + if hops, ok := route["nexthops"].([]any); ok { + for _, h := range hops { + hop, ok := h.(map[string]any) + if !ok { + continue + } + nh := map[string]any{} + if ip := stringField(hop, "ip"); ip != "" { + nh[addrKey] = ip + } else if ifn := stringField(hop, "interfaceName"); ifn != "" { + nh["outgoing-interface"] = ifn + } + // zebra marks the nexthop programmed into the FIB with + // "fib":true (see zebra/zebra_vty.c). + if installed && boolField(hop, "fib") { + nh["infix-routing:installed"] = []any{nil} + } + if len(nh) > 0 { + nextHops = append(nextHops, nh) + } + } + } + + if len(nextHops) > 0 { + node["next-hop"] = map[string]any{ + "next-hop-list": map[string]any{ + "next-hop": nextHops, + }, + } + } else { + nh := map[string]any{} + switch frr { + case "blackhole": + nh["special-next-hop"] = "blackhole" + case "unreachable": + nh["special-next-hop"] = "unreachable" + default: + if ifn := stringField(route, "interfaceName"); ifn != "" { + nh["outgoing-interface"] = ifn + } + if gw := stringField(route, "nexthop"); gw != "" { + nh[nhAddrKey] = gw + } + } + node["next-hop"] = nh + } + + encoded, err := json.Marshal(node) + if err != nil { + return json.RawMessage(`{}`) + } + return encoded +} + +func stringField(m map[string]any, key string) string { + if s, ok := m[key].(string); ok { + return s + } + return "" +} + +func boolField(m map[string]any, key string) bool { + b, _ := m[key].(bool) + return b +} + +func toInt(v any) int { + switch n := v.(type) { + case float64: + return int(n) + case int: + return n + case int64: + return int(n) + case json.Number: + i, _ := n.Int64() + return int(i) + case string: + i, _ := strconv.Atoi(n) + return i + } + return 0 +} + +// FRR uptime string formats (frrtime), ported from yanger's +// uptime2datetime: "HH:MM:SS", "XdXXhXXm", "XXwXdXXh". +var ( + uptimeHMS = regexp.MustCompile(`^(\d{2}):(\d{2}):(\d{2})$`) + uptimeDHM = regexp.MustCompile(`^(\d+)d(\d{2})h(\d{2})m$`) + uptimeWDH = regexp.MustCompile(`^(\d{2})w(\d)d(\d{2})h$`) +) + +// parseUptime converts an FRR uptime string into a duration. The +// last-updated leaf is then computed as now-uptime. Unrecognised input +// yields zero (i.e. last-updated == now). +func parseUptime(s string) time.Duration { + atoi := func(x string) int { n, _ := strconv.Atoi(x); return n } + + if m := uptimeHMS.FindStringSubmatch(s); m != nil { + return time.Duration(atoi(m[1]))*time.Hour + + time.Duration(atoi(m[2]))*time.Minute + + time.Duration(atoi(m[3]))*time.Second + } + if m := uptimeDHM.FindStringSubmatch(s); m != nil { + return time.Duration(atoi(m[1]))*24*time.Hour + + time.Duration(atoi(m[2]))*time.Hour + + time.Duration(atoi(m[3]))*time.Minute + } + if m := uptimeWDH.FindStringSubmatch(s); m != nil { + return time.Duration(atoi(m[1]))*7*24*time.Hour + + time.Duration(atoi(m[2]))*24*time.Hour + + time.Duration(atoi(m[3]))*time.Hour + } + return 0 +} diff --git a/src/yangerd/internal/zapiwatcher/zapiwatcher_test.go b/src/yangerd/internal/zapiwatcher/zapiwatcher_test.go new file mode 100644 index 000000000..1541809a9 --- /dev/null +++ b/src/yangerd/internal/zapiwatcher/zapiwatcher_test.go @@ -0,0 +1,269 @@ +package zapiwatcher + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/kernelkit/infix/src/yangerd/internal/tree" +) + +// fakeQuerier returns canned vtysh JSON per command. +type fakeQuerier struct { + ipv4 string + ipv6 string + err error +} + +func (f fakeQuerier) Query(_ context.Context, command string) ([]byte, error) { + if f.err != nil { + return nil, f.err + } + if command == "show ipv6 route json" { + if f.ipv6 == "" { + return []byte("{}"), nil + } + return []byte(f.ipv6), nil + } + if f.ipv4 == "" { + return []byte("{}"), nil + } + return []byte(f.ipv4), nil +} + +func ipv4Routes(t *testing.T, tr *tree.Tree) []map[string]any { + t.Helper() + data := tr.Get(routingTreeKey) + if data == nil { + t.Fatal("routing tree key not set") + } + var routing map[string]any + if err := json.Unmarshal(data, &routing); err != nil { + t.Fatalf("unmarshal routing: %v", err) + } + ribs := routing["ribs"].(map[string]any) + for _, rib := range ribs["rib"].([]any) { + rm := rib.(map[string]any) + if rm["name"] == "ipv4" { + out := []map[string]any{} + for _, r := range rm["routes"].(map[string]any)["route"].([]any) { + out = append(out, r.(map[string]any)) + } + return out + } + } + t.Fatal("ipv4 rib not found") + return nil +} + +func TestProtocolName(t *testing.T) { + cases := map[string]string{ + "kernel": "infix-routing:kernel", + "connected": "ietf-routing:direct", + "local": "ietf-routing:direct", + "static": "ietf-routing:static", + "ospf": "ietf-ospf:ospfv2", + "rip": "ietf-rip:rip", + "bgp": "infix-routing:kernel", // unknown -> kernel + "": "infix-routing:kernel", + } + for in, want := range cases { + if got := protocolName(in); got != want { + t.Errorf("protocolName(%q) = %q, want %q", in, got, want) + } + } +} + +func TestParseUptime(t *testing.T) { + cases := map[string]time.Duration{ + "02:09:02": 2*time.Hour + 9*time.Minute + 2*time.Second, + "00:00:30": 30 * time.Second, + "3d04h05m": 3*24*time.Hour + 4*time.Hour + 5*time.Minute, + "02w3d04h": 2*7*24*time.Hour + 3*24*time.Hour + 4*time.Hour, + "bogus": 0, + "": 0, + } + for in, want := range cases { + if got := parseUptime(in); got != want { + t.Errorf("parseUptime(%q) = %v, want %v", in, got, want) + } + } +} + +// The user's exact bug: a static route is in the FIB (selected, distance +// 120) while a stale OSPF entry (distance 110) is still listed but not +// selected. active must follow FRR's "selected" flag, not the lowest +// admin distance. +func TestActiveFollowsSelectedNotDistance(t *testing.T) { + const j = `{ + "192.168.20.0/24":[ + {"prefix":"192.168.20.0/24","protocol":"ospf","selected":false,"distance":110,"metric":100,"uptime":"02:11:49", + "nexthops":[{"ip":"192.168.60.2","interfaceName":"e3","active":true}]}, + {"prefix":"192.168.20.0/24","protocol":"static","selected":true,"installed":true,"distance":120,"metric":0,"uptime":"02:09:02", + "nexthops":[{"ip":"192.168.50.2","interfaceName":"e7","fib":true,"active":true}]} + ] + }` + + tr := tree.New() + w := New(tr, fakeQuerier{ipv4: j}, nil) + w.writeRibs(context.Background()) + + routes := ipv4Routes(t, tr) + if len(routes) != 2 { + t.Fatalf("expected 2 candidate routes, got %d", len(routes)) + } + + for _, r := range routes { + pref := toInt(r["route-preference"]) + _, active := r["active"] + switch pref { + case 120: + if !active { + t.Error("static route (pref 120, selected) must be active") + } + case 110: + if active { + t.Error("ospf route (pref 110, not selected) must NOT be active") + } + default: + t.Errorf("unexpected route-preference %d", pref) + } + } +} + +// A full snapshot read means a route removed from zebra disappears from +// the cache without any ZAPI delete. +func TestSnapshotPurgesRemovedRoutes(t *testing.T) { + const before = `{ + "192.168.20.0/24":[ + {"prefix":"192.168.20.0/24","protocol":"ospf","selected":true,"distance":110,"uptime":"00:05:00","nexthops":[{"ip":"192.168.60.2"}]} + ] + }` + const after = `{ + "192.168.20.0/24":[ + {"prefix":"192.168.20.0/24","protocol":"static","selected":true,"installed":true,"distance":120,"uptime":"00:01:00","nexthops":[{"ip":"192.168.50.2","fib":true}]} + ] + }` + + tr := tree.New() + New(tr, fakeQuerier{ipv4: before}, nil).writeRibs(context.Background()) + if got := len(ipv4Routes(t, tr)); got != 1 { + t.Fatalf("before: expected 1 route, got %d", got) + } + + // zebra now has only the static route; the OSPF route is gone with no + // delete event. A fresh snapshot must not carry the corpse forward. + New(tr, fakeQuerier{ipv4: after}, nil).writeRibs(context.Background()) + routes := ipv4Routes(t, tr) + if len(routes) != 1 { + t.Fatalf("after: expected 1 route, got %d", len(routes)) + } + if got := protocolName("static"); routes[0]["source-protocol"] != got { + t.Errorf("surviving route protocol = %v, want %v", routes[0]["source-protocol"], got) + } +} + +func TestTransformRouteFields(t *testing.T) { + entry := map[string]any{ + "prefix": "10.0.0.0/24", + "protocol": "ospf", + "selected": true, + "installed": true, + "distance": float64(110), + "metric": float64(20), + "uptime": "01:00:00", + "nexthops": []any{ + map[string]any{"ip": "192.168.1.1", "interfaceName": "e1", "fib": true}, + }, + } + + now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC) + var parsed map[string]any + if err := json.Unmarshal(transformRoute("ipv4", "10.0.0.0/24", entry, now), &parsed); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if parsed["ietf-ipv4-unicast-routing:destination-prefix"] != "10.0.0.0/24" { + t.Errorf("destination-prefix = %v", parsed["ietf-ipv4-unicast-routing:destination-prefix"]) + } + if parsed["source-protocol"] != "ietf-ospf:ospfv2" { + t.Errorf("source-protocol = %v", parsed["source-protocol"]) + } + if toInt(parsed["route-preference"]) != 110 { + t.Errorf("route-preference = %v", parsed["route-preference"]) + } + if toInt(parsed["ietf-ospf:metric"]) != 20 { + t.Errorf("ietf-ospf:metric = %v", parsed["ietf-ospf:metric"]) + } + if _, ok := parsed["active"]; !ok { + t.Error("selected route must have active leaf") + } + // last-updated = now - 1h + if parsed["last-updated"] != "2026-06-10T11:00:00Z" { + t.Errorf("last-updated = %v, want 2026-06-10T11:00:00Z", parsed["last-updated"]) + } + + hops := parsed["next-hop"].(map[string]any)["next-hop-list"].(map[string]any)["next-hop"].([]any) + if len(hops) != 1 { + t.Fatalf("expected 1 nexthop, got %d", len(hops)) + } + hop := hops[0].(map[string]any) + if hop["ietf-ipv4-unicast-routing:address"] != "192.168.1.1" { + t.Errorf("nexthop address = %v", hop["ietf-ipv4-unicast-routing:address"]) + } + if _, ok := hop["infix-routing:installed"]; !ok { + t.Error("fib nexthop must have infix-routing:installed") + } +} + +func TestTransformRouteBlackhole(t *testing.T) { + entry := map[string]any{ + "prefix": "10.1.0.0/24", + "protocol": "blackhole", + "distance": float64(0), + "uptime": "00:00:10", + } + var parsed map[string]any + if err := json.Unmarshal(transformRoute("ipv4", "10.1.0.0/24", entry, time.Now()), &parsed); err != nil { + t.Fatalf("unmarshal: %v", err) + } + nh := parsed["next-hop"].(map[string]any) + if nh["special-next-hop"] != "blackhole" { + t.Errorf("special-next-hop = %v, want blackhole", nh["special-next-hop"]) + } +} + +func TestWriteRibsQueryErrorKeepsData(t *testing.T) { + tr := tree.New() + // Seed with good data. + New(tr, fakeQuerier{ipv4: `{"10.0.0.0/24":[{"prefix":"10.0.0.0/24","protocol":"static","selected":true,"distance":1,"uptime":"00:00:05","nexthops":[{"ip":"10.0.0.1"}]}]}`}, nil). + writeRibs(context.Background()) + before := tr.Get(routingTreeKey) + + // A failing query must not blank the table. + New(tr, fakeQuerier{err: context.DeadlineExceeded}, nil).writeRibs(context.Background()) + after := tr.Get(routingTreeKey) + + if string(before) != string(after) { + t.Error("query error overwrote existing rib data") + } +} + +func TestWriteRibsBothFamilies(t *testing.T) { + tr := tree.New() + w := New(tr, fakeQuerier{ + ipv4: `{"10.0.0.0/24":[{"prefix":"10.0.0.0/24","protocol":"static","selected":true,"distance":1,"uptime":"00:00:05","nexthops":[{"ip":"10.0.0.1"}]}]}`, + ipv6: `{"2001:db8::/64":[{"prefix":"2001:db8::/64","protocol":"connected","selected":true,"distance":0,"uptime":"00:00:05","nexthops":[{"interfaceName":"e1"}]}]}`, + }, nil) + w.writeRibs(context.Background()) + + var routing map[string]any + if err := json.Unmarshal(tr.Get(routingTreeKey), &routing); err != nil { + t.Fatalf("unmarshal: %v", err) + } + ribs := routing["ribs"].(map[string]any)["rib"].([]any) + if len(ribs) != 2 { + t.Fatalf("expected 2 ribs, got %d", len(ribs)) + } +} diff --git a/src/yangerd/plan.md b/src/yangerd/plan.md new file mode 100644 index 000000000..cdc8ba577 --- /dev/null +++ b/src/yangerd/plan.md @@ -0,0 +1,687 @@ +# yangerd Phase 1: Core + ietf-system + +**Status**: Complete +**Started**: 2026-03-28 +**Design doc**: `src/statd/doc/yangerd-design.md` + +## Overview + +Build the yangerd Go daemon core infrastructure alongside the simplest +module (`ietf-system`). This establishes the project skeleton, in-memory +tree, IPC server, collector framework, and Buildroot packaging. All +subsequent modules build on this foundation. + +## Constraints + +- No CGo (hard, non-negotiable — design doc Section 2.3) +- Not a sysrepo plugin — yangerd has no sysrepo dependency +- `uint64`/`int64`/`decimal64` YANG types serialized as JSON strings (RFC 7951 / libyang) +- Socket owned by `root:yangerd`, permissions `0660` +- Sources at `src/yangerd/`, module path `github.com/kernelkit/infix/src/yangerd` +- Buildroot: vendored deps (`GOFLAGS=-mod=vendor`), `$(eval $(golang-package))` +- NTP handled by separate `internal/collector/ntp.go` — excluded from system.go +- Ignore gRPC for FRR + +## Steps + +### Step 1: Project scaffolding +- **Status**: ✅ complete +- `src/yangerd/go.mod` — `module github.com/kernelkit/infix/src/yangerd`, go 1.21 +- `cmd/yangerd/main.go` — daemon entry: config → socket → tree → collectors → ready → signal → IPC serve +- `cmd/yangerctl/main.go` — CLI: get/health/dump subcommands with --socket/--timeout flags +- Directory tree per design doc Section 6 + +### Step 2: Core tree (`internal/tree/tree.go`) +- **Status**: ✅ complete +- `modelEntry` struct: `sync.RWMutex`, `json.RawMessage`, `time.Time` +- `Tree` struct: top-level `sync.RWMutex` protecting `map[string]*modelEntry` +- Methods: `New()`, `Set()`, `Get()`, `GetMulti()`, `Keys()`, `Info()` +- Double-checked locking in `Set()` for new keys +- Eventual consistency in `GetMulti()` (per-model read locks, not snapshot) + +### Step 3: IPC protocol + server +- **Status**: ✅ complete +- `internal/ipc/protocol.go` — framing (ver:1 + uint32 BE length + JSON), Request/Response types +- `internal/ipc/server.go` — Unix socket listener, connection handler, method routing, ready flag +- `internal/ipc/client.go` — Dial/query helper for yangerctl + +### Step 4: Collector framework +- **Status**: ✅ complete +- `internal/collector/collector.go` — Collector interface + RunAll() scheduling +- `internal/collector/runner.go` — CommandRunner + FileReader interfaces + production impls + +### Step 5: System collector (`internal/collector/system.go`) +- **Status**: ✅ complete +- ~658 lines implementing full ietf-system data collection +- Produces: `ietf-system:system` and `ietf-system:system-state` tree keys +- Sub-methods: addHostname, addTimezone, addUsers, addPlatform, addClock, + addSoftware, addSoftwareSlots, getBootOrder, addDNS, addServices, addResourceUsage, + addMemory, addLoadAvg, addFilesystems + +### Step 6: Config (`internal/config/config.go`) +- **Status**: ✅ complete +- Env var parsing: YANGERD_SOCKET, LOG_LEVEL, STARTUP_TIMEOUT, POLL_INTERVAL_SYSTEM, + ENABLE_WIFI/CONTAINERS/GPS + +### Step 7: Wire main.go startup +- **Status**: ✅ complete +- Config → socket → tree → collectors → ready → signal handling → IPC serve + +### Step 8: yangerctl (`cmd/yangerctl/main.go`) +- **Status**: ✅ complete +- Subcommands: get, health, dump with --socket/--timeout flags + +### Step 9: Tests +- **Status**: ✅ complete +- `internal/tree/tree_test.go` — Set/Get/GetMulti/Keys/Info + concurrent access (5 tests) +- `internal/ipc/protocol_test.go` — framing round-trip, version mismatch, oversized payload (3 tests) +- `internal/ipc/server_test.go` — get/dump/health/notReady/unknownMethod (5 tests) +- `internal/collector/system_test.go` — 17 tests covering all data sources, edge cases, failures +- `internal/testutil/mock.go` — reusable MockRunner + MockFileReader +- All tests pass with `-race` flag + +### Step 10: Buildroot packaging +- **Status**: ✅ complete +- `package/yangerd/yangerd.mk` — golang-package recipe with build targets + feature flags +- `package/yangerd/Config.in` — Kconfig entry with Go arch dependency +- `package/yangerd/yangerd.svc` — Finit service definition +- `package/Config.in` — source entry added + +## Bug fixes found during testing + +- **`fmt.Sprintf("%v", float64)` producing scientific notation**: In `addServices()`, JSON-decoded + float64 values like `4096000.0` were formatted as `"4.096e+06"` instead of `"4096000"`. Fixed by + using `strconv.Itoa(toInt(...))` for uint64 string encoding. Same fix applied to slot size in + `addSoftwareSlots()`. + +## File inventory + +### Production code (12 files) +| File | Lines | Description | +|------|-------|-------------| +| `go.mod` | 3 | Module definition | +| `internal/config/config.go` | ~85 | Environment variable parsing | +| `internal/tree/tree.go` | 119 | Per-model locked tree store | +| `internal/ipc/protocol.go` | ~120 | IPC framing + request/response types | +| `internal/ipc/server.go` | ~150 | Unix socket server | +| `internal/ipc/client.go` | ~80 | Client helper for yangerctl | +| `internal/collector/collector.go` | 51 | Collector interface + RunAll scheduler | +| `internal/collector/runner.go` | 40 | CommandRunner/FileReader interfaces | +| `internal/collector/system.go` | 658 | System data collector | +| `cmd/yangerd/main.go` | ~100 | Daemon entry point | +| `cmd/yangerctl/main.go` | ~120 | CLI diagnostic tool | + +### Test code (5 files) +| File | Tests | Description | +|------|-------|-------------| +| `internal/tree/tree_test.go` | 5 | Tree operations + concurrency | +| `internal/ipc/protocol_test.go` | 3 | Framing protocol | +| `internal/ipc/server_test.go` | 5 | IPC server end-to-end | +| `internal/collector/system_test.go` | 17 | System collector with mocks | +| `internal/testutil/mock.go` | — | Shared test mocks | + +### Buildroot packaging (4 files) +| File | Description | +|------|-------------| +| `package/yangerd/yangerd.mk` | Buildroot Go package recipe | +| `package/yangerd/Config.in` | Kconfig menu entry | +| `package/yangerd/yangerd.svc` | Finit service definition | +| `package/Config.in` | Updated: added yangerd source | + +## Reference files + +| File | Purpose | +|------|---------| +| `src/statd/doc/yangerd-design.md` | Authoritative design document | +| `src/statd/python/yanger/ietf_system.py` | Python reference impl (461 lines) | +| `src/confd/yang/confd/ietf-system@2014-08-06.yang` | Standard YANG model | +| `src/confd/yang/confd/infix-system.yang` | Infix augmentations | +| `src/confd/yang/confd/infix-system-software.yang` | Software submodule | +| `src/netbrowse/` | Existing Go project (pattern reference) | +| `package/netbrowse/netbrowse.mk` | Buildroot Go package template | + +## Notes + +- NTP data is handled by `internal/collector/ntp.go` (Phase 2), not system.go +- DNS statistics (cache-size, cache-hits, cache-misses from infix-system.yang) not in current Python impl +- `addFilesystems` uses `syscall.Statfs()` directly (not mockable via FileReader); not unit-tested +- `addHostname` uses `os.Hostname()` directly; not unit-tested in isolation +- `addTimezone` uses `filepath.EvalSymlinks()` directly; not unit-tested in isolation + +--- + +# yangerd Phase 2: Polling Collectors + +**Status**: Complete +**Started**: 2026-03-28 +**Completed**: 2026-03-29 + +## Overview + +Implement all polling-based collectors that replace the Python yanger +scripts. Reactive collectors (netlink/interfaces, ZAPI/routing-table, +LLDP, D-Bus/DHCP/firewall) are deferred to Phase 3. + +## Collectors implemented + +### RoutingCollector (`routing.go`, ~748 lines) +- **Tree key**: `ietf-routing:routing` +- **Interval**: 10s (configurable: `YANGERD_POLL_INTERVAL_ROUTING`) +- Merges OSPF, RIP, and BFD into a single `control-plane-protocols` list +- OSPF: `ospf-status` helper + `vtysh show ip ospf json` for areas, interfaces, neighbors, routes +- RIP: `vtysh -c 'show ip rip status'` text parsing + `show ip route rip json` +- BFD: `vtysh -c 'show bfd peers json'`, filters out multihop sessions + +### NTPCollector (`ntp.go`, ~434 lines) +- **Tree key**: `ietf-ntp:ntp` +- **Interval**: 60s (configurable: `YANGERD_POLL_INTERVAL_NTP`) +- Parses `chronyc -c` CSV output: sources, sourcestats, tracking, serverstats +- Detects NTP listening port via `ss -ulnp` +- Infix augmentations: clock-state frequency/offset details + +### HardwareCollector (`hardware.go`, ~1122 lines) +- **Tree key**: `ietf-hardware:hardware` +- **Interval**: 10s (configurable: `YANGERD_POLL_INTERVAL_HARDWARE`) +- Motherboard from `/run/system.json` +- VPD components with vendor extensions +- USB port components with lock/unlock state +- hwmon sensors: temp, fan, PWM, voltage, current, power with parent/child relationships +- Thermal zones from `/sys/class/thermal/` +- WiFi radios via `iw.py` (gated by `YANGERD_ENABLE_WIFI`) +- GPS receivers via gpsd TCP (gated by `YANGERD_ENABLE_GPS`) + +### ContainerCollector (`containers.go`, ~466 lines) +- **Tree key**: `infix-containers:containers` +- **Interval**: 10s (configurable: `YANGERD_POLL_INTERVAL_CONTAINERS`) +- Feature-gated by `YANGERD_ENABLE_CONTAINERS` +- Podman ps/inspect/stats integration +- Cgroup v2 resource limits (memory.max, cpu.max) +- Network info (host/bridge + port publishing) +- Resource usage stats (memory, CPU, block I/O, net I/O, PIDs) + +## Bug fixes + +- **NTP `ss` parsing**: `addServerStatus()` used `fields[4]` for local address + in `ss -ulnp` output, but `strings.Fields` puts local address at index 3. + Fixed to `fields[3]`. + +## Test summary + +| File | Tests | Status | +|------|-------|--------| +| `routing_test.go` | 16 | ✅ pass | +| `ntp_test.go` | 10 | ✅ pass | +| `hardware_test.go` | 10 | ✅ pass | +| `containers_test.go` | 12 | ✅ pass | +| **Phase 2 total** | **48** | **✅ all pass** | +| **Overall total** | **65** | **✅ all pass** | + +## File inventory (Phase 2 additions) + +### Production code (4 files) +| File | Lines | Description | +|------|-------|-------------| +| `internal/collector/routing.go` | 748 | OSPF+RIP+BFD routing collector | +| `internal/collector/ntp.go` | 434 | NTP collector (chronyc) | +| `internal/collector/hardware.go` | 1122 | Hardware/sensor collector | +| `internal/collector/containers.go` | 466 | Container collector (podman) | + +### Test code (4 files) +| File | Tests | Description | +|------|-------|-------------| +| `internal/collector/routing_test.go` | 16 | Routing protocol tests | +| `internal/collector/ntp_test.go` | 10 | NTP chronyc parsing tests | +| `internal/collector/hardware_test.go` | 10 | Sensor/VPD/thermal tests | +| `internal/collector/containers_test.go` | 12 | Podman/cgroup parsing tests | + +### Modified files +| File | Change | +|------|--------| +| `internal/config/config.go` | Added PollRouting/NTP/Hardware/Containers fields | +| `cmd/yangerd/main.go` | Registered all Phase 2 collectors | + +--- + +# yangerd Phase 3: Reactive/Event-Driven Collectors + +**Status**: Complete +**Started**: 2026-03-29 +**Completed**: 2026-03-29 + +## Overview + +Reactive collectors replace polling with persistent subscriptions (netlink, +ZAPI, D-Bus signals, subprocess watchers). They implement a +`Run(ctx context.Context) error` goroutine pattern instead of the polling +`Collector` interface. + +## Packages implemented + +### Foundation infrastructure + +| Package | File | Lines | Description | +|---------|------|-------|-------------| +| `ipbatch` | `internal/ipbatch/ipbatch.go` | 212 | Persistent `ip -json` subprocess with mutex-serialized Query(), exponential backoff restart (100ms→30s), canary queries, 4MiB scanner | +| `bridgebatch` | `internal/bridgebatch/bridgebatch.go` | 201 | Persistent `bridge -json` subprocess, same pattern as ipbatch | +| `fswatcher` | `internal/fswatcher/fswatcher.go` | 176 | Inotify watcher with per-path debounce, glob expansion, initial read | + +### Reactive monitors + +| Package | File | Lines | Description | +|---------|------|-------|-------------| +| `monitor` | `internal/monitor/monitor.go` | 509 | NLMonitor — netlink link/addr/neigh/MDB subscription, stores raw ip-json at sub-paths | +| `ethmonitor` | `internal/ethmonitor/ethmonitor.go` | 223 | EthMonitor — ethtool genetlink for speed/duplex/auto-negotiation | +| `iwmonitor` | `internal/iwmonitor/iwmonitor.go` | 311 | IWMonitor — `iw event -t` parser + station dump/info queries | +| `lldpmonitor` | `internal/lldpmonitor/lldpmonitor.go` | 315 | LLDPMonitor — `lldpcli json0 watch` for neighbor discovery | +| `zapiwatcher` | `internal/zapiwatcher/zapiwatcher.go` | 305 | ZAPIWatcher — FRR ZAPI v6 route redistribution | +| `dbusmonitor` | `internal/dbusmonitor/dbusmonitor.go` | 1020 | DBusMonitor — dnsmasq lease events + firewalld Reloaded/NameOwnerChanged | + +### Transformer + +| Package | File | Lines | Description | +|---------|------|-------|-------------| +| `iface` | `internal/iface/iface.go` | 813 | Pure interface transformer: raw `ip -json` → YANG JSON (type mapping, oper-state, counters, IPv4/IPv6, VLAN/VETH/GRE/VXLAN/LAG/bridge augments) | + +## External dependencies added + +- `github.com/fsnotify/fsnotify` v1.9.0 +- `github.com/vishvananda/netlink` +- `github.com/mdlayher/genetlink` + `github.com/mdlayher/ethtool` +- `github.com/osrg/gobgp/v4/pkg/zebra` +- `github.com/godbus/dbus/v5` + +## Key design decisions + +- **Event-as-trigger pattern**: Most reactive sources use events solely as triggers + and then re-read canonical data (e.g., netlink notification triggers `ip -json link show`) +- **Persistent subprocess managers** (IPBatch/BridgeBatch): Mutex-serialized Query(), + dead/alive atomic state, ErrBatchDead sentinel error, exponential backoff restart +- **Iface transformer is pure**: Takes raw `ip -json` arrays, returns YANG JSON. + Uses `FileChecker` interface for IPv6 MTU and WiFi detection +- **Interface filtering**: Skip `group=="internal"` or `link_type` in `("can","vcan")` +- **RFC 7951 compliance**: Counter values (uint64) encoded as JSON strings + +## Test summary + +| File | Tests | Status | +|------|-------|--------| +| `internal/iface/iface_test.go` | 20+ | ✅ pass | +| `internal/iwmonitor/iwmonitor_test.go` | 8+ | ✅ pass | +| `internal/lldpmonitor/lldpmonitor_test.go` | 8+ | ✅ pass | +| `internal/monitor/monitor_test.go` | 8+ | ✅ pass | +| `internal/dbusmonitor/dbusmonitor_test.go` | 25+ | ✅ pass | +| `internal/zapiwatcher/zapiwatcher_test.go` | 8 | ✅ pass | +| `internal/fswatcher/fswatcher_test.go` | 11 | ✅ pass | +| **Phase 3 total** | **93** | **✅ all pass** | +| **Overall total** | **158** | **✅ all pass with -race** | + +## Modified files + +| File | Change | +|------|--------| +| `internal/config/config.go` | Added reactive config fields (ZAPISocket, DBus paths, WiFi/LLDP enables) | +| `cmd/yangerd/main.go` | All reactive subsystems wired (229 lines total) | + +## Packages without tests (intentional) + +- `ipbatch`, `bridgebatch` — require real subprocesses (`ip`, `bridge`) +- `ethmonitor` — requires genetlink socket (kernel interface) + +--- + +# yangerd Phase 4: Architecture Fix — Transform-on-Write + +**Status**: Complete +**Started**: 2026-03-29 +**Completed**: 2026-03-29 + +## Problem + +NLMonitor stored raw ip-json fragments at per-interface sub-paths +(e.g. `/ietf-interfaces:interfaces/interface[name='eth0']`, +`/addresses`, `/statistics`). These are not valid YANG keys and the +output did not match what Python yanger produces — a single complete +`{"ietf-interfaces:interfaces":{"interface":[...]}}` document. + +The `iface.Transform()` function existed but was never called. +EthMonitor and IWMonitor also stored at fragment paths. + +## Fix: Transform-on-write + +NLMonitor is now the central coordinator. It owns staging data (raw +link/addr/stats arrays) and after every netlink event: + +1. Runs `iface.Transform(links, addrs, stats, fc)` to produce the base + YANG document +2. Merges augment data (ethernet, wifi, bridge FDB/MDB) from staging + maps into the matching interface entries +3. Stores the complete result at a single tree key `ietf-interfaces:interfaces` + +EthMonitor and IWMonitor no longer write to the tree directly. They call +`NLMonitor.SetEthernetData(ifname, data)` and +`NLMonitor.SetWifiData(ifname, data)` which update staging maps and +trigger a rebuild. + +## Changes + +| File | Change | +|------|--------| +| `internal/monitor/monitor.go` | Added staging fields (`links`, `addrs`, `stats`, `fdb`, `mdb`, `ethernet`, `wifi`), `rebuild()` method calling `iface.Transform()` + `mergeAugments()`, `replaceByIfName()` helper, `SetEthernetData()`/`SetWifiData()` public methods. Removed per-interface fragment tree paths. Added `iface.FileChecker` parameter to `New()`. | +| `internal/monitor/monitor_test.go` | Removed `TestPathHelpers` (old fragment paths). Added tests: `TestReplaceByIfName`, `TestReplaceByIfNamePreservesUpdatedData`, `TestMergeAugments`, `TestMergeAugmentsNoOp`, `TestMergeAugmentsInvalidDoc`, `TestTreeKey`. | +| `internal/ethmonitor/ethmonitor.go` | Removed `tree` field and tree import. Added `onUpdate` callback. `New()` no longer takes `*tree.Tree`. `refreshEthernetSettings()` calls `onUpdate(ifname, data)` instead of `tree.Set()`. | +| `internal/iwmonitor/iwmonitor.go` | Removed `tree` field and tree import. Added `onUpdate` callback + `publishWifi()` method. `New()` no longer takes `*tree.Tree`. Assembles combined wifi JSON per interface. | +| `cmd/yangerd/main.go` | Added `osFileChecker` type implementing `iface.FileChecker`. Updated `monitor.New()` call with FileChecker. Wired `ethMon.SetOnUpdate(nlmon.SetEthernetData)` and `iwmon.SetOnUpdate(nlmon.SetWifiData)`. Updated `ethmonitor.New()` and `iwmonitor.New()` signatures. | + +## Test summary + +| Metric | Value | +|--------|-------| +| New tests added | 5 | +| Tests removed | 1 (TestPathHelpers — obsolete fragment paths) | +| **Total tests** | **163** | +| **Status** | **✅ all pass with -race** | + +--- + +# yangerd Phase 5: Buildroot Compatibility Fixes + +**Status**: Complete +**Completed**: 2026-03-29 + +## Problem + +`make yangerd-rebuild` failed: Buildroot ships Go 1.23.12 but go.mod +had `go 1.24.5` (from gobgp/v4's requirement). Two other dependencies +also required go 1.24+. + +## Fixes + +| Change | Before | After | Reason | +|--------|--------|-------|--------| +| `go.mod` go directive | `go 1.24.5` | `go 1.23.0` | Buildroot has Go 1.23.12 | +| `osrg/gobgp` | v4 (go 1.24.5) | v3 v3.37.0 (go 1.23.0) | All v4 releases need 1.24+ | +| `mdlayher/ethtool` | v0.5.1 (go 1.24.0) | v0.4.1 (go 1.23.0) | API-compatible downgrade | +| `golang.org/x/sys` | v0.40.0 (go 1.24.0) | v0.35.0 (go 1.23.0) | Sufficient for all deps | + +### gobgp v3 vs v4 API changes + +- `Nexthop.Gate`: `netip.Addr` (v4) → `net.IP` (v3) +- `Prefix.Prefix`: `netip.Addr` (v4) → `net.IP` (v3) +- `NewClient()`: `*slog.Logger` (v4) → `log.Logger` interface (v3) +- Created `slogAdapter` in zapiwatcher.go to bridge `*slog.Logger` → gobgp v3 `log.Logger` + +### ip batch `-s` flag fix + +`ip -json -force -batch -` does not accept `-s` as a batch line subcommand. +Added `-s -d` as global flags to the persistent batch process args instead. +Removed separate `-s link show` queries and the `stats` staging field. + +## Verification + +- All 163 tests pass with `-mod=vendor` and `-race` +- `go build ./cmd/yangerd` and `go build ./cmd/yangerctl` succeed +- `go vet ./...` clean + +--- + +# yangerd Phase 6: statd C Integration + +**Status**: Complete (code-complete, pending real device testing) +**Completed**: 2026-03-29 + +## Overview + +statd (C daemon) now queries yangerd over Unix socket IPC instead of +fork/exec'ing Python yanger scripts, with automatic fallback to yanger +when yangerd is unavailable. + +## Architecture + +``` +statd (C) ──yangerd_query()──► /run/yangerd.sock ──► yangerd (Go) + │ │ + │ fallback if yangerd unavailable │ tree.Get(key) + ▼ ▼ +ly_add_yanger_data() in-memory Tree store + (fork/exec Python) (per-module JSON blobs) +``` + +## Wire protocol (C↔Go) + +``` +Frame: [ver:1byte=0x01] [length:4bytes big-endian] [JSON body] +Request: {"method":"get","path":"ietf-interfaces:interfaces"} +Response: {"status":"ok","data":{"ietf-interfaces:interfaces":{...}}} +``` + +## Files created + +| File | Lines | Description | +|------|-------|-------------| +| `src/statd/yangerd.h` | 28 | Header: socket path, timeout, max payload, proto version, `yangerd_query()` | +| `src/statd/yangerd.c` | 245 | Full C IPC client: connect, framed I/O, jansson JSON parsing | + +## Files modified + +| File | Change | +|------|--------| +| `src/statd/statd.c` | Added `#include "yangerd.h"`, `ly_add_yangerd_data()` wrapper, `xpath_to_yangerd_path()` helper; updated all 5 sysrepo callbacks | +| `src/statd/Makefile.am` | Added `yangerd.c yangerd.h` to `statd_SOURCES` | + +## Key functions + +- **`yangerd_query(path, &buf, &len)`** — Connect to socket, send framed "get" request, receive framed response, extract "data" JSON field +- **`ly_add_yangerd_data(ctx, parent, path, yanger_args)`** — Try yangerd first; on failure, fall back to `ly_add_yanger_data()` (fork/exec Python) +- **`xpath_to_yangerd_path(xpath, buf, bufsz)`** — Strip leading `/`, take first path segment (maps sysrepo xpath to yangerd tree key) + +## Subscription → yangerd key mapping + +| statd subscription xpath | yangerd tree key | +|---|---| +| `/ietf-interfaces:interfaces` | `ietf-interfaces:interfaces` | +| `/ietf-routing:routing/ribs` | `ietf-routing:routing` | +| `/ietf-hardware:hardware` | `ietf-hardware:hardware` | +| `/ietf-system:system` | `ietf-system:system` | +| `/ietf-system:system-state` | `ietf-system:system-state` | +| `/ieee802-dot1ab-lldp:lldp` | `ieee802-dot1ab-lldp:lldp` | +| `/infix-containers:containers` | `infix-containers:containers` | +| `/infix-dhcp-server:dhcp-server` | `infix-dhcp-server:dhcp-server` | +| `/infix-firewall:firewall` | `infix-firewall:firewall` | +| `/ietf-ntp:ntp` | `ietf-ntp:ntp` | +| OSPF/RIP/BFD callbacks | `ietf-routing:routing` (hardcoded) | + +## Remaining + +- ~~Real device testing~~ Done — bugs found and fixed (see Phase 7) +- ~~Verify fallback~~ Removed — no fallback by design +- Performance comparison (optional): yangerd IPC vs fork/exec Python yanger + +--- + +# yangerd Phase 7: Real Device Bug Fixes + +**Status**: Complete (Bugs 1-7) +**Completed**: 2026-03-30 + +## Overview + +Two rounds of `yangerctl dump` on a real Infix x86_64 device exposed +seven bugs across four categories. All seven are now fixed. + +## Bug 1: Double-wrapping (FIXED) + +### Problem + +The IPC server's `handleGet()` wraps stored data in `{key: data}`, but +some collectors ALSO stored their data pre-wrapped. This caused +double-nesting, e.g. `{"ietf-interfaces:interfaces":{"ietf-interfaces:interfaces":{...}}}`. + +### Contract established + +Collectors store data WITHOUT the module key wrapper. The server adds it. + +### Fixes + +| File | Change | +|------|--------| +| `internal/iface/iface.go` | `Transform()` returns `{"interface":[...]}` (removed outer wrapper) | +| `internal/iface/iface_test.go` | Updated `mustInterfaces()` to parse unwrapped format | +| `internal/monitor/monitor.go` | `mergeAugments()` updated to parse unwrapped format | +| `internal/monitor/monitor_test.go` | `TestMergeAugments`/`TestMergeAugmentsNoOp` updated | +| `internal/dbusmonitor/dbusmonitor.go` | `buildDHCPTree()`/`buildFirewallTree()` removed wrappers | +| `internal/dbusmonitor/dbusmonitor_test.go` | Tests updated for unwrapped format | + +## Bug 2: Fragmented routing tree keys (FIXED) + +### Problem + +`ietf-routing:routing` is a shared tree written by three different +sources — RoutingCollector (control-plane-protocols), ZAPIWatcher (ribs), +and FSWatcher (forwarding interfaces). Each wrote to its own sub-path +key or used `tree.Set()` which overwrote the others' data. + +### Solution: `Tree.Merge()` + +Added a shallow first-level JSON merge method to `Tree` that allows each +writer to merge its fields into the shared key without overwriting +others' data. All three writers now use `tree.Merge("ietf-routing:routing", ...)`. + +### Fixes + +| File | Change | +|------|--------| +| `internal/tree/tree.go` | Added `Merge()` and `Delete()` methods | +| `internal/tree/tree_test.go` | 7 new tests (Merge subtests, empty, non-object, delete, concurrent) | +| `internal/zapiwatcher/zapiwatcher.go` | Refactored: internal `routes` map, builds complete ribs, writes via `Merge()` | +| `internal/zapiwatcher/zapiwatcher_test.go` | Removed obsolete `routePath`/`routeKey` tests | +| `internal/collector/routing.go` | Changed `t.Set()` → `t.Merge()` on line 59 | +| `internal/fswatcher/fswatcher.go` | Added `UseMerge` field to `WatchHandler`; `InitialRead()`/`fireHandler()` respect it | +| `cmd/yangerd/main.go` | Replaced per-file `forwardingTreeKey()` with `forwardingAggregator` that scans all forwarding files and writes a complete `interfaces` list via `Merge()` | + +### Forwarding aggregator + +Replaces the old per-file approach (each `/proc/sys/net/ipv{4,6}/conf/*/forwarding` +file got its own sub-path tree key) with an aggregator that: +1. On any forwarding file change, rescans ALL forwarding files +2. Builds the complete `{"interfaces":{"interface":["e1","e2",...]}}` list +3. Writes via `tree.Merge("ietf-routing:routing", ...)` — coexists with ribs and control-plane-protocols +4. Matches Python yanger `get_routing_interfaces()` output format +5. Uses `forwarding` for IPv4, `force_forwarding` for IPv6 (matching Python behavior) + +## Bug 3: Duplicate interface entries (FIXED) + +### Problem + +When Infix renames `eth0` to `e1`, `ip -json link show` reports both the +old and new names with the same ifindex. Both appeared in the YANG output. + +### Solution: `dedup()` in iface transformer + +Added `dedup()` function that runs before `skipInterface()`. When +multiple entries share the same ifindex, keeps the one with +`operstate=="UP"` (or the first seen if neither is UP). + +### Fixes + +| File | Change | +|------|--------| +| `internal/iface/iface.go` | Added `dedup()` function; `Transform()` calls `dedup(decodeObjects(linkData))` | +| `internal/iface/iface_test.go` | 4 new test cases: UP-over-DOWN, both-DOWN-keeps-first, different-ifindex, zero-ifindex | + +## Verification + +- All tests pass (including new tests for Merge, dedup, and forwarding aggregator) +- `go build ./cmd/yangerd` and `go build ./cmd/yangerctl` succeed +- `go vet ./...` clean + +## Bug 4: Interface removal not handled (FIXED) + +### Problem + +When an interface is removed, its `/proc/sys/net/*/conf/IFNAME/forwarding` +file disappears. The inotify Remove event was only calling `rewatch()`, +not updating the tree — the removed interface stayed in the YANG data. + +### Solution: `handleRemove()` in FSWatcher + +Replaced the old Remove→handleEvent→rewatch sequence with a dedicated +`handleRemove()` method that handles the two handler types differently: + +- **UseMerge handlers** (forwarding aggregator): fires the handler, which + rescans via glob and naturally excludes the removed file +- **Plain handlers**: calls `tree.Delete()` to clear stale data + +After handling, attempts to re-add the inotify watch. If the file is +permanently gone, cleans up the handler and debounce timer entries. + +### Fixes + +| File | Change | +|------|--------| +| `internal/fswatcher/fswatcher.go` | Added `handleRemove()`, removed `rewatch()`, `Run()` dispatches Remove to `handleRemove()` | +| `internal/fswatcher/fswatcher_test.go` | 4 new tests: merge-handler removal, plain-handler removal, unknown path, rewatch-succeeds | + +## Bug 5: Phantom GPS devices (FIXED) + +### Problem + +`gps0`–`gps3` appeared in hardware output even when `/dev/gps*` didn't +exist. `readlink -f` on non-existent paths succeeds (returns the +canonical form), so the loop at `hardware.go:941` never skipped them. + +### Solution + +Added an `ls /dev/gpsN` existence check before the `readlink -f` call, +matching the Python reference (`ietf_hardware.py:727`: `HOST.exists()`). + +### Fixes + +| File | Change | +|------|--------| +| `internal/collector/hardware.go` | Added `ls` existence check before `readlink -f` in GPS loop | +| `internal/collector/hardware_test.go` | `TestHardwareGPSDeviceNotFound` — verifies no phantom GPS when devices missing | + +## Bug 6: `null` JSON arrays instead of `[]` (FIXED) + +### Problem + +Go `nil` slices marshal to `null` in JSON. YANG lists must be arrays, +so libyang rejects `null`. Three locations used `var slice []Type` +(nil) instead of `make([]Type, 0)` (empty array). + +### Fixes + +| File | Line | Change | +|------|------|--------| +| `cmd/yangerd/main.go` | 282 | `var ifnames []string` → `ifnames := make([]string, 0)` | +| `internal/collector/system.go` | 172 | `var users []interface{}` → `users := make([]interface{}, 0)` | +| `internal/collector/system.go` | 422 | `var servers []interface{}` → `servers := make([]interface{}, 0)` | +| `internal/collector/system_test.go` | — | `TestSystemCollectorNoUsersEmptyArray`, `TestSystemCollectorNoDNSEmptyArray` | + +## Bug 7: Firewall data without firewalld running (FIXED) + +### Problem + +`refreshFirewall()` was called on initial D-Bus connect. When firewalld +wasn't on the bus, all D-Bus calls failed but execution continued. +`getFirewallPolicies()` unconditionally appended a hardcoded +"default-drop" policy, producing phantom firewall data. + +### Solution + +Made `getDefaultZone` the gate: if it fails, `refreshFirewall()` returns +early without writing to the tree. This is the minimal fix — the +`NameOwnerChanged` handler already clears the tree when firewalld exits. + +### Fixes + +| File | Change | +|------|--------| +| `internal/dbusmonitor/dbusmonitor.go` | `refreshFirewall()` returns early if `getDefaultZone` fails | + +## Verification + +- All tests pass (173+ tests) +- `go build ./cmd/yangerd` and `go build ./cmd/yangerctl` succeed +- `go vet ./...` clean +- Ready for re-test on real device diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/.cirrus.yml b/src/yangerd/vendor/github.com/fsnotify/fsnotify/.cirrus.yml new file mode 100644 index 000000000..7f257e99a --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/.cirrus.yml @@ -0,0 +1,14 @@ +freebsd_task: + name: 'FreeBSD' + freebsd_instance: + image_family: freebsd-14-2 + install_script: + - pkg update -f + - pkg install -y go + test_script: + # run tests as user "cirrus" instead of root + - pw useradd cirrus -m + - chown -R cirrus:cirrus . + - FSNOTIFY_BUFFER=4096 sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./... + - sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./... + - FSNOTIFY_DEBUG=1 sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race -v ./... diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/.gitignore b/src/yangerd/vendor/github.com/fsnotify/fsnotify/.gitignore new file mode 100644 index 000000000..daea9dd6d --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/.gitignore @@ -0,0 +1,10 @@ +# go test -c output +*.test +*.test.exe + +# Output of go build ./cmd/fsnotify +/fsnotify +/fsnotify.exe + +/test/kqueue +/test/a.out diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/.mailmap b/src/yangerd/vendor/github.com/fsnotify/fsnotify/.mailmap new file mode 100644 index 000000000..a04f2907f --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/.mailmap @@ -0,0 +1,2 @@ +Chris Howey +Nathan Youngman <4566+nathany@users.noreply.github.com> diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md b/src/yangerd/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md new file mode 100644 index 000000000..6468d2cf4 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md @@ -0,0 +1,602 @@ +# Changelog + +1.9.0 2024-04-04 +---------------- + +### Changes and fixes + +- all: make BufferedWatcher buffered again ([#657]) + +- inotify: fix race when adding/removing watches while a watched path is being + deleted ([#678], [#686]) + +- inotify: don't send empty event if a watched path is unmounted ([#655]) + +- inotify: don't register duplicate watches when watching both a symlink and its + target; previously that would get "half-added" and removing the second would + panic ([#679]) + +- kqueue: fix watching relative symlinks ([#681]) + +- kqueue: correctly mark pre-existing entries when watching a link to a dir on + kqueue ([#682]) + +- illumos: don't send error if changed file is deleted while processing the + event ([#678]) + + +[#657]: https://github.com/fsnotify/fsnotify/pull/657 +[#678]: https://github.com/fsnotify/fsnotify/pull/678 +[#686]: https://github.com/fsnotify/fsnotify/pull/686 +[#655]: https://github.com/fsnotify/fsnotify/pull/655 +[#681]: https://github.com/fsnotify/fsnotify/pull/681 +[#679]: https://github.com/fsnotify/fsnotify/pull/679 +[#682]: https://github.com/fsnotify/fsnotify/pull/682 + +1.8.0 2024-10-31 +---------------- + +### Additions + +- all: add `FSNOTIFY_DEBUG` to print debug logs to stderr ([#619]) + +### Changes and fixes + +- windows: fix behaviour of `WatchList()` to be consistent with other platforms ([#610]) + +- kqueue: ignore events with Ident=0 ([#590]) + +- kqueue: set O_CLOEXEC to prevent passing file descriptors to children ([#617]) + +- kqueue: emit events as "/path/dir/file" instead of "path/link/file" when watching a symlink ([#625]) + +- inotify: don't send event for IN_DELETE_SELF when also watching the parent ([#620]) + +- inotify: fix panic when calling Remove() in a goroutine ([#650]) + +- fen: allow watching subdirectories of watched directories ([#621]) + +[#590]: https://github.com/fsnotify/fsnotify/pull/590 +[#610]: https://github.com/fsnotify/fsnotify/pull/610 +[#617]: https://github.com/fsnotify/fsnotify/pull/617 +[#619]: https://github.com/fsnotify/fsnotify/pull/619 +[#620]: https://github.com/fsnotify/fsnotify/pull/620 +[#621]: https://github.com/fsnotify/fsnotify/pull/621 +[#625]: https://github.com/fsnotify/fsnotify/pull/625 +[#650]: https://github.com/fsnotify/fsnotify/pull/650 + +1.7.0 - 2023-10-22 +------------------ +This version of fsnotify needs Go 1.17. + +### Additions + +- illumos: add FEN backend to support illumos and Solaris. ([#371]) + +- all: add `NewBufferedWatcher()` to use a buffered channel, which can be useful + in cases where you can't control the kernel buffer and receive a large number + of events in bursts. ([#550], [#572]) + +- all: add `AddWith()`, which is identical to `Add()` but allows passing + options. ([#521]) + +- windows: allow setting the ReadDirectoryChangesW() buffer size with + `fsnotify.WithBufferSize()`; the default of 64K is the highest value that + works on all platforms and is enough for most purposes, but in some cases a + highest buffer is needed. ([#521]) + +### Changes and fixes + +- inotify: remove watcher if a watched path is renamed ([#518]) + + After a rename the reported name wasn't updated, or even an empty string. + Inotify doesn't provide any good facilities to update it, so just remove the + watcher. This is already how it worked on kqueue and FEN. + + On Windows this does work, and remains working. + +- windows: don't listen for file attribute changes ([#520]) + + File attribute changes are sent as `FILE_ACTION_MODIFIED` by the Windows API, + with no way to see if they're a file write or attribute change, so would show + up as a fsnotify.Write event. This is never useful, and could result in many + spurious Write events. + +- windows: return `ErrEventOverflow` if the buffer is full ([#525]) + + Before it would merely return "short read", making it hard to detect this + error. + +- kqueue: make sure events for all files are delivered properly when removing a + watched directory ([#526]) + + Previously they would get sent with `""` (empty string) or `"."` as the path + name. + +- kqueue: don't emit spurious Create events for symbolic links ([#524]) + + The link would get resolved but kqueue would "forget" it already saw the link + itself, resulting on a Create for every Write event for the directory. + +- all: return `ErrClosed` on `Add()` when the watcher is closed ([#516]) + +- other: add `Watcher.Errors` and `Watcher.Events` to the no-op `Watcher` in + `backend_other.go`, making it easier to use on unsupported platforms such as + WASM, AIX, etc. ([#528]) + +- other: use the `backend_other.go` no-op if the `appengine` build tag is set; + Google AppEngine forbids usage of the unsafe package so the inotify backend + won't compile there. + +[#371]: https://github.com/fsnotify/fsnotify/pull/371 +[#516]: https://github.com/fsnotify/fsnotify/pull/516 +[#518]: https://github.com/fsnotify/fsnotify/pull/518 +[#520]: https://github.com/fsnotify/fsnotify/pull/520 +[#521]: https://github.com/fsnotify/fsnotify/pull/521 +[#524]: https://github.com/fsnotify/fsnotify/pull/524 +[#525]: https://github.com/fsnotify/fsnotify/pull/525 +[#526]: https://github.com/fsnotify/fsnotify/pull/526 +[#528]: https://github.com/fsnotify/fsnotify/pull/528 +[#537]: https://github.com/fsnotify/fsnotify/pull/537 +[#550]: https://github.com/fsnotify/fsnotify/pull/550 +[#572]: https://github.com/fsnotify/fsnotify/pull/572 + +1.6.0 - 2022-10-13 +------------------ +This version of fsnotify needs Go 1.16 (this was already the case since 1.5.1, +but not documented). It also increases the minimum Linux version to 2.6.32. + +### Additions + +- all: add `Event.Has()` and `Op.Has()` ([#477]) + + This makes checking events a lot easier; for example: + + if event.Op&Write == Write && !(event.Op&Remove == Remove) { + } + + Becomes: + + if event.Has(Write) && !event.Has(Remove) { + } + +- all: add cmd/fsnotify ([#463]) + + A command-line utility for testing and some examples. + +### Changes and fixes + +- inotify: don't ignore events for files that don't exist ([#260], [#470]) + + Previously the inotify watcher would call `os.Lstat()` to check if a file + still exists before emitting events. + + This was inconsistent with other platforms and resulted in inconsistent event + reporting (e.g. when a file is quickly removed and re-created), and generally + a source of confusion. It was added in 2013 to fix a memory leak that no + longer exists. + +- all: return `ErrNonExistentWatch` when `Remove()` is called on a path that's + not watched ([#460]) + +- inotify: replace epoll() with non-blocking inotify ([#434]) + + Non-blocking inotify was not generally available at the time this library was + written in 2014, but now it is. As a result, the minimum Linux version is + bumped from 2.6.27 to 2.6.32. This hugely simplifies the code and is faster. + +- kqueue: don't check for events every 100ms ([#480]) + + The watcher would wake up every 100ms, even when there was nothing to do. Now + it waits until there is something to do. + +- macos: retry opening files on EINTR ([#475]) + +- kqueue: skip unreadable files ([#479]) + + kqueue requires a file descriptor for every file in a directory; this would + fail if a file was unreadable by the current user. Now these files are simply + skipped. + +- windows: fix renaming a watched directory if the parent is also watched ([#370]) + +- windows: increase buffer size from 4K to 64K ([#485]) + +- windows: close file handle on Remove() ([#288]) + +- kqueue: put pathname in the error if watching a file fails ([#471]) + +- inotify, windows: calling Close() more than once could race ([#465]) + +- kqueue: improve Close() performance ([#233]) + +- all: various documentation additions and clarifications. + +[#233]: https://github.com/fsnotify/fsnotify/pull/233 +[#260]: https://github.com/fsnotify/fsnotify/pull/260 +[#288]: https://github.com/fsnotify/fsnotify/pull/288 +[#370]: https://github.com/fsnotify/fsnotify/pull/370 +[#434]: https://github.com/fsnotify/fsnotify/pull/434 +[#460]: https://github.com/fsnotify/fsnotify/pull/460 +[#463]: https://github.com/fsnotify/fsnotify/pull/463 +[#465]: https://github.com/fsnotify/fsnotify/pull/465 +[#470]: https://github.com/fsnotify/fsnotify/pull/470 +[#471]: https://github.com/fsnotify/fsnotify/pull/471 +[#475]: https://github.com/fsnotify/fsnotify/pull/475 +[#477]: https://github.com/fsnotify/fsnotify/pull/477 +[#479]: https://github.com/fsnotify/fsnotify/pull/479 +[#480]: https://github.com/fsnotify/fsnotify/pull/480 +[#485]: https://github.com/fsnotify/fsnotify/pull/485 + +## [1.5.4] - 2022-04-25 + +* Windows: add missing defer to `Watcher.WatchList` [#447](https://github.com/fsnotify/fsnotify/pull/447) +* go.mod: use latest x/sys [#444](https://github.com/fsnotify/fsnotify/pull/444) +* Fix compilation for OpenBSD [#443](https://github.com/fsnotify/fsnotify/pull/443) + +## [1.5.3] - 2022-04-22 + +* This version is retracted. An incorrect branch is published accidentally [#445](https://github.com/fsnotify/fsnotify/issues/445) + +## [1.5.2] - 2022-04-21 + +* Add a feature to return the directories and files that are being monitored [#374](https://github.com/fsnotify/fsnotify/pull/374) +* Fix potential crash on windows if `raw.FileNameLength` exceeds `syscall.MAX_PATH` [#361](https://github.com/fsnotify/fsnotify/pull/361) +* Allow build on unsupported GOOS [#424](https://github.com/fsnotify/fsnotify/pull/424) +* Don't set `poller.fd` twice in `newFdPoller` [#406](https://github.com/fsnotify/fsnotify/pull/406) +* fix go vet warnings: call to `(*T).Fatalf` from a non-test goroutine [#416](https://github.com/fsnotify/fsnotify/pull/416) + +## [1.5.1] - 2021-08-24 + +* Revert Add AddRaw to not follow symlinks [#394](https://github.com/fsnotify/fsnotify/pull/394) + +## [1.5.0] - 2021-08-20 + +* Go: Increase minimum required version to Go 1.12 [#381](https://github.com/fsnotify/fsnotify/pull/381) +* Feature: Add AddRaw method which does not follow symlinks when adding a watch [#289](https://github.com/fsnotify/fsnotify/pull/298) +* Windows: Follow symlinks by default like on all other systems [#289](https://github.com/fsnotify/fsnotify/pull/289) +* CI: Use GitHub Actions for CI and cover go 1.12-1.17 + [#378](https://github.com/fsnotify/fsnotify/pull/378) + [#381](https://github.com/fsnotify/fsnotify/pull/381) + [#385](https://github.com/fsnotify/fsnotify/pull/385) +* Go 1.14+: Fix unsafe pointer conversion [#325](https://github.com/fsnotify/fsnotify/pull/325) + +## [1.4.9] - 2020-03-11 + +* Move example usage to the readme #329. This may resolve #328. + +## [1.4.8] - 2020-03-10 + +* CI: test more go versions (@nathany 1d13583d846ea9d66dcabbfefbfb9d8e6fb05216) +* Tests: Queued inotify events could have been read by the test before max_queued_events was hit (@matthias-stone #265) +* Tests: t.Fatalf -> t.Errorf in go routines (@gdey #266) +* CI: Less verbosity (@nathany #267) +* Tests: Darwin: Exchangedata is deprecated on 10.13 (@nathany #267) +* Tests: Check if channels are closed in the example (@alexeykazakov #244) +* CI: Only run golint on latest version of go and fix issues (@cpuguy83 #284) +* CI: Add windows to travis matrix (@cpuguy83 #284) +* Docs: Remover appveyor badge (@nathany 11844c0959f6fff69ba325d097fce35bd85a8e93) +* Linux: create epoll and pipe fds with close-on-exec (@JohannesEbke #219) +* Linux: open files with close-on-exec (@linxiulei #273) +* Docs: Plan to support fanotify (@nathany ab058b44498e8b7566a799372a39d150d9ea0119 ) +* Project: Add go.mod (@nathany #309) +* Project: Revise editor config (@nathany #309) +* Project: Update copyright for 2019 (@nathany #309) +* CI: Drop go1.8 from CI matrix (@nathany #309) +* Docs: Updating the FAQ section for supportability with NFS & FUSE filesystems (@Pratik32 4bf2d1fec78374803a39307bfb8d340688f4f28e ) + +## [1.4.7] - 2018-01-09 + +* BSD/macOS: Fix possible deadlock on closing the watcher on kqueue (thanks @nhooyr and @glycerine) +* Tests: Fix missing verb on format string (thanks @rchiossi) +* Linux: Fix deadlock in Remove (thanks @aarondl) +* Linux: Watch.Add improvements (avoid race, fix consistency, reduce garbage) (thanks @twpayne) +* Docs: Moved FAQ into the README (thanks @vahe) +* Linux: Properly handle inotify's IN_Q_OVERFLOW event (thanks @zeldovich) +* Docs: replace references to OS X with macOS + +## [1.4.2] - 2016-10-10 + +* Linux: use InotifyInit1 with IN_CLOEXEC to stop leaking a file descriptor to a child process when using fork/exec [#178](https://github.com/fsnotify/fsnotify/pull/178) (thanks @pattyshack) + +## [1.4.1] - 2016-10-04 + +* Fix flaky inotify stress test on Linux [#177](https://github.com/fsnotify/fsnotify/pull/177) (thanks @pattyshack) + +## [1.4.0] - 2016-10-01 + +* add a String() method to Event.Op [#165](https://github.com/fsnotify/fsnotify/pull/165) (thanks @oozie) + +## [1.3.1] - 2016-06-28 + +* Windows: fix for double backslash when watching the root of a drive [#151](https://github.com/fsnotify/fsnotify/issues/151) (thanks @brunoqc) + +## [1.3.0] - 2016-04-19 + +* Support linux/arm64 by [patching](https://go-review.googlesource.com/#/c/21971/) x/sys/unix and switching to to it from syscall (thanks @suihkulokki) [#135](https://github.com/fsnotify/fsnotify/pull/135) + +## [1.2.10] - 2016-03-02 + +* Fix golint errors in windows.go [#121](https://github.com/fsnotify/fsnotify/pull/121) (thanks @tiffanyfj) + +## [1.2.9] - 2016-01-13 + +kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsnotify/pull/111) (thanks @bep) + +## [1.2.8] - 2015-12-17 + +* kqueue: fix race condition in Close [#105](https://github.com/fsnotify/fsnotify/pull/105) (thanks @djui for reporting the issue and @ppknap for writing a failing test) +* inotify: fix race in test +* enable race detection for continuous integration (Linux, Mac, Windows) + +## [1.2.5] - 2015-10-17 + +* inotify: use epoll_create1 for arm64 support (requires Linux 2.6.27 or later) [#100](https://github.com/fsnotify/fsnotify/pull/100) (thanks @suihkulokki) +* inotify: fix path leaks [#73](https://github.com/fsnotify/fsnotify/pull/73) (thanks @chamaken) +* kqueue: watch for rename events on subdirectories [#83](https://github.com/fsnotify/fsnotify/pull/83) (thanks @guotie) +* kqueue: avoid infinite loops from symlinks cycles [#101](https://github.com/fsnotify/fsnotify/pull/101) (thanks @illicitonion) + +## [1.2.1] - 2015-10-14 + +* kqueue: don't watch named pipes [#98](https://github.com/fsnotify/fsnotify/pull/98) (thanks @evanphx) + +## [1.2.0] - 2015-02-08 + +* inotify: use epoll to wake up readEvents [#66](https://github.com/fsnotify/fsnotify/pull/66) (thanks @PieterD) +* inotify: closing watcher should now always shut down goroutine [#63](https://github.com/fsnotify/fsnotify/pull/63) (thanks @PieterD) +* kqueue: close kqueue after removing watches, fixes [#59](https://github.com/fsnotify/fsnotify/issues/59) + +## [1.1.1] - 2015-02-05 + +* inotify: Retry read on EINTR [#61](https://github.com/fsnotify/fsnotify/issues/61) (thanks @PieterD) + +## [1.1.0] - 2014-12-12 + +* kqueue: rework internals [#43](https://github.com/fsnotify/fsnotify/pull/43) + * add low-level functions + * only need to store flags on directories + * less mutexes [#13](https://github.com/fsnotify/fsnotify/issues/13) + * done can be an unbuffered channel + * remove calls to os.NewSyscallError +* More efficient string concatenation for Event.String() [#52](https://github.com/fsnotify/fsnotify/pull/52) (thanks @mdlayher) +* kqueue: fix regression in rework causing subdirectories to be watched [#48](https://github.com/fsnotify/fsnotify/issues/48) +* kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51) + +## [1.0.4] - 2014-09-07 + +* kqueue: add dragonfly to the build tags. +* Rename source code files, rearrange code so exported APIs are at the top. +* Add done channel to example code. [#37](https://github.com/fsnotify/fsnotify/pull/37) (thanks @chenyukang) + +## [1.0.3] - 2014-08-19 + +* [Fix] Windows MOVED_TO now translates to Create like on BSD and Linux. [#36](https://github.com/fsnotify/fsnotify/issues/36) + +## [1.0.2] - 2014-08-17 + +* [Fix] Missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso) +* [Fix] Make ./path and path equivalent. (thanks @zhsso) + +## [1.0.0] - 2014-08-15 + +* [API] Remove AddWatch on Windows, use Add. +* Improve documentation for exported identifiers. [#30](https://github.com/fsnotify/fsnotify/issues/30) +* Minor updates based on feedback from golint. + +## dev / 2014-07-09 + +* Moved to [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify). +* Use os.NewSyscallError instead of returning errno (thanks @hariharan-uno) + +## dev / 2014-07-04 + +* kqueue: fix incorrect mutex used in Close() +* Update example to demonstrate usage of Op. + +## dev / 2014-06-28 + +* [API] Don't set the Write Op for attribute notifications [#4](https://github.com/fsnotify/fsnotify/issues/4) +* Fix for String() method on Event (thanks Alex Brainman) +* Don't build on Plan 9 or Solaris (thanks @4ad) + +## dev / 2014-06-21 + +* Events channel of type Event rather than *Event. +* [internal] use syscall constants directly for inotify and kqueue. +* [internal] kqueue: rename events to kevents and fileEvent to event. + +## dev / 2014-06-19 + +* Go 1.3+ required on Windows (uses syscall.ERROR_MORE_DATA internally). +* [internal] remove cookie from Event struct (unused). +* [internal] Event struct has the same definition across every OS. +* [internal] remove internal watch and removeWatch methods. + +## dev / 2014-06-12 + +* [API] Renamed Watch() to Add() and RemoveWatch() to Remove(). +* [API] Pluralized channel names: Events and Errors. +* [API] Renamed FileEvent struct to Event. +* [API] Op constants replace methods like IsCreate(). + +## dev / 2014-06-12 + +* Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98) + +## dev / 2014-05-23 + +* [API] Remove current implementation of WatchFlags. + * current implementation doesn't take advantage of OS for efficiency + * provides little benefit over filtering events as they are received, but has extra bookkeeping and mutexes + * no tests for the current implementation + * not fully implemented on Windows [#93](https://github.com/howeyc/fsnotify/issues/93#issuecomment-39285195) + +## [0.9.3] - 2014-12-31 + +* kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51) + +## [0.9.2] - 2014-08-17 + +* [Backport] Fix missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso) + +## [0.9.1] - 2014-06-12 + +* Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98) + +## [0.9.0] - 2014-01-17 + +* IsAttrib() for events that only concern a file's metadata [#79][] (thanks @abustany) +* [Fix] kqueue: fix deadlock [#77][] (thanks @cespare) +* [NOTICE] Development has moved to `code.google.com/p/go.exp/fsnotify` in preparation for inclusion in the Go standard library. + +## [0.8.12] - 2013-11-13 + +* [API] Remove FD_SET and friends from Linux adapter + +## [0.8.11] - 2013-11-02 + +* [Doc] Add Changelog [#72][] (thanks @nathany) +* [Doc] Spotlight and double modify events on macOS [#62][] (reported by @paulhammond) + +## [0.8.10] - 2013-10-19 + +* [Fix] kqueue: remove file watches when parent directory is removed [#71][] (reported by @mdwhatcott) +* [Fix] kqueue: race between Close and readEvents [#70][] (reported by @bernerdschaefer) +* [Doc] specify OS-specific limits in README (thanks @debrando) + +## [0.8.9] - 2013-09-08 + +* [Doc] Contributing (thanks @nathany) +* [Doc] update package path in example code [#63][] (thanks @paulhammond) +* [Doc] GoCI badge in README (Linux only) [#60][] +* [Doc] Cross-platform testing with Vagrant [#59][] (thanks @nathany) + +## [0.8.8] - 2013-06-17 + +* [Fix] Windows: handle `ERROR_MORE_DATA` on Windows [#49][] (thanks @jbowtie) + +## [0.8.7] - 2013-06-03 + +* [API] Make syscall flags internal +* [Fix] inotify: ignore event changes +* [Fix] race in symlink test [#45][] (reported by @srid) +* [Fix] tests on Windows +* lower case error messages + +## [0.8.6] - 2013-05-23 + +* kqueue: Use EVT_ONLY flag on Darwin +* [Doc] Update README with full example + +## [0.8.5] - 2013-05-09 + +* [Fix] inotify: allow monitoring of "broken" symlinks (thanks @tsg) + +## [0.8.4] - 2013-04-07 + +* [Fix] kqueue: watch all file events [#40][] (thanks @ChrisBuchholz) + +## [0.8.3] - 2013-03-13 + +* [Fix] inoitfy/kqueue memory leak [#36][] (reported by @nbkolchin) +* [Fix] kqueue: use fsnFlags for watching a directory [#33][] (reported by @nbkolchin) + +## [0.8.2] - 2013-02-07 + +* [Doc] add Authors +* [Fix] fix data races for map access [#29][] (thanks @fsouza) + +## [0.8.1] - 2013-01-09 + +* [Fix] Windows path separators +* [Doc] BSD License + +## [0.8.0] - 2012-11-09 + +* kqueue: directory watching improvements (thanks @vmirage) +* inotify: add `IN_MOVED_TO` [#25][] (requested by @cpisto) +* [Fix] kqueue: deleting watched directory [#24][] (reported by @jakerr) + +## [0.7.4] - 2012-10-09 + +* [Fix] inotify: fixes from https://codereview.appspot.com/5418045/ (ugorji) +* [Fix] kqueue: preserve watch flags when watching for delete [#21][] (reported by @robfig) +* [Fix] kqueue: watch the directory even if it isn't a new watch (thanks @robfig) +* [Fix] kqueue: modify after recreation of file + +## [0.7.3] - 2012-09-27 + +* [Fix] kqueue: watch with an existing folder inside the watched folder (thanks @vmirage) +* [Fix] kqueue: no longer get duplicate CREATE events + +## [0.7.2] - 2012-09-01 + +* kqueue: events for created directories + +## [0.7.1] - 2012-07-14 + +* [Fix] for renaming files + +## [0.7.0] - 2012-07-02 + +* [Feature] FSNotify flags +* [Fix] inotify: Added file name back to event path + +## [0.6.0] - 2012-06-06 + +* kqueue: watch files after directory created (thanks @tmc) + +## [0.5.1] - 2012-05-22 + +* [Fix] inotify: remove all watches before Close() + +## [0.5.0] - 2012-05-03 + +* [API] kqueue: return errors during watch instead of sending over channel +* kqueue: match symlink behavior on Linux +* inotify: add `DELETE_SELF` (requested by @taralx) +* [Fix] kqueue: handle EINTR (reported by @robfig) +* [Doc] Godoc example [#1][] (thanks @davecheney) + +## [0.4.0] - 2012-03-30 + +* Go 1 released: build with go tool +* [Feature] Windows support using winfsnotify +* Windows does not have attribute change notifications +* Roll attribute notifications into IsModify + +## [0.3.0] - 2012-02-19 + +* kqueue: add files when watch directory + +## [0.2.0] - 2011-12-30 + +* update to latest Go weekly code + +## [0.1.0] - 2011-10-19 + +* kqueue: add watch on file creation to match inotify +* kqueue: create file event +* inotify: ignore `IN_IGNORED` events +* event String() +* linux: common FileEvent functions +* initial commit + +[#79]: https://github.com/howeyc/fsnotify/pull/79 +[#77]: https://github.com/howeyc/fsnotify/pull/77 +[#72]: https://github.com/howeyc/fsnotify/issues/72 +[#71]: https://github.com/howeyc/fsnotify/issues/71 +[#70]: https://github.com/howeyc/fsnotify/issues/70 +[#63]: https://github.com/howeyc/fsnotify/issues/63 +[#62]: https://github.com/howeyc/fsnotify/issues/62 +[#60]: https://github.com/howeyc/fsnotify/issues/60 +[#59]: https://github.com/howeyc/fsnotify/issues/59 +[#49]: https://github.com/howeyc/fsnotify/issues/49 +[#45]: https://github.com/howeyc/fsnotify/issues/45 +[#40]: https://github.com/howeyc/fsnotify/issues/40 +[#36]: https://github.com/howeyc/fsnotify/issues/36 +[#33]: https://github.com/howeyc/fsnotify/issues/33 +[#29]: https://github.com/howeyc/fsnotify/issues/29 +[#25]: https://github.com/howeyc/fsnotify/issues/25 +[#24]: https://github.com/howeyc/fsnotify/issues/24 +[#21]: https://github.com/howeyc/fsnotify/issues/21 diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md b/src/yangerd/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md new file mode 100644 index 000000000..4cc40fa59 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md @@ -0,0 +1,145 @@ +Thank you for your interest in contributing to fsnotify! We try to review and +merge PRs in a reasonable timeframe, but please be aware that: + +- To avoid "wasted" work, please discuss changes on the issue tracker first. You + can just send PRs, but they may end up being rejected for one reason or the + other. + +- fsnotify is a cross-platform library, and changes must work reasonably well on + all supported platforms. + +- Changes will need to be compatible; old code should still compile, and the + runtime behaviour can't change in ways that are likely to lead to problems for + users. + +Testing +------- +Just `go test ./...` runs all the tests; the CI runs this on all supported +platforms. Testing different platforms locally can be done with something like +[goon] or [Vagrant], but this isn't super-easy to set up at the moment. + +Use the `-short` flag to make the "stress test" run faster. + +Writing new tests +----------------- +Scripts in the testdata directory allow creating test cases in a "shell-like" +syntax. The basic format is: + + script + + Output: + desired output + +For example: + + # Create a new empty file with some data. + watch / + echo data >/file + + Output: + create /file + write /file + +Just create a new file to add a new test; select which tests to run with +`-run TestScript/[path]`. + +script +------ +The script is a "shell-like" script: + + cmd arg arg + +Comments are supported with `#`: + + # Comment + cmd arg arg # Comment + +All operations are done in a temp directory; a path like "/foo" is rewritten to +"/tmp/TestFoo/foo". + +Arguments can be quoted with `"` or `'`; there are no escapes and they're +functionally identical right now, but this may change in the future, so best to +assume shell-like rules. + + touch "/file with spaces" + +End-of-line escapes with `\` are not supported. + +### Supported commands + + watch path [ops] # Watch the path, reporting events for it. Nothing is + # watched by default. Optionally a list of ops can be + # given, as with AddWith(path, WithOps(...)). + unwatch path # Stop watching the path. + watchlist n # Assert watchlist length. + + stop # Stop running the script; for debugging. + debug [yes/no] # Enable/disable FSNOTIFY_DEBUG (tests are run in + parallel by default, so -parallel=1 is probably a good + idea). + print [any strings] # Print text to stdout; for debugging. + + touch path + mkdir [-p] dir + ln -s target link # Only ln -s supported. + mkfifo path + mknod dev path + mv src dst + rm [-r] path + chmod mode path # Octal only + sleep time-in-ms + + cat path # Read path (does nothing with the data; just reads it). + echo str >>path # Append "str" to "path". + echo str >path # Truncate "path" and write "str". + + require reason # Skip the test if "reason" is true; "skip" and + skip reason # "require" behave identical; it supports both for + # readability. Possible reasons are: + # + # always Always skip this test. + # symlink Symlinks are supported (requires admin + # permissions on Windows). + # mkfifo Platform doesn't support FIFO named sockets. + # mknod Platform doesn't support device nodes. + + +output +------ +After `Output:` the desired output is given; this is indented by convention, but +that's not required. + +The format of that is: + + # Comment + event path # Comment + + system: + event path + system2: + event path + +Every event is one line, and any whitespace between the event and path are +ignored. The path can optionally be surrounded in ". Anything after a "#" is +ignored. + +Platform-specific tests can be added after GOOS; for example: + + watch / + touch /file + + Output: + # Tested if nothing else matches + create /file + + # Windows-specific test. + windows: + write /file + +You can specify multiple platforms with a comma (e.g. "windows, linux:"). +"kqueue" is a shortcut for all kqueue systems (BSD, macOS). + + +[goon]: https://github.com/arp242/goon +[Vagrant]: https://www.vagrantup.com/ +[integration_test.go]: /integration_test.go diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/LICENSE b/src/yangerd/vendor/github.com/fsnotify/fsnotify/LICENSE new file mode 100644 index 000000000..fb03ade75 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/LICENSE @@ -0,0 +1,25 @@ +Copyright © 2012 The Go Authors. All rights reserved. +Copyright © fsnotify Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. +* Neither the name of Google Inc. nor the names of its contributors may be used + to endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/README.md b/src/yangerd/vendor/github.com/fsnotify/fsnotify/README.md new file mode 100644 index 000000000..1f4eb583d --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/README.md @@ -0,0 +1,182 @@ +fsnotify is a Go library to provide cross-platform filesystem notifications on +Windows, Linux, macOS, BSD, and illumos. + +Go 1.17 or newer is required; the full documentation is at +https://pkg.go.dev/github.com/fsnotify/fsnotify + +--- + +Platform support: + +| Backend | OS | Status | +| :-------------------- | :--------- | :------------------------------------------------------------------------ | +| inotify | Linux | Supported | +| kqueue | BSD, macOS | Supported | +| ReadDirectoryChangesW | Windows | Supported | +| FEN | illumos | Supported | +| fanotify | Linux 5.9+ | [Not yet](https://github.com/fsnotify/fsnotify/issues/114) | +| FSEvents | macOS | [Needs support in x/sys/unix][fsevents] | +| USN Journals | Windows | [Needs support in x/sys/windows][usn] | +| Polling | *All* | [Not yet](https://github.com/fsnotify/fsnotify/issues/9) | + +Linux and illumos should include Android and Solaris, but these are currently +untested. + +[fsevents]: https://github.com/fsnotify/fsnotify/issues/11#issuecomment-1279133120 +[usn]: https://github.com/fsnotify/fsnotify/issues/53#issuecomment-1279829847 + +Usage +----- +A basic example: + +```go +package main + +import ( + "log" + + "github.com/fsnotify/fsnotify" +) + +func main() { + // Create new watcher. + watcher, err := fsnotify.NewWatcher() + if err != nil { + log.Fatal(err) + } + defer watcher.Close() + + // Start listening for events. + go func() { + for { + select { + case event, ok := <-watcher.Events: + if !ok { + return + } + log.Println("event:", event) + if event.Has(fsnotify.Write) { + log.Println("modified file:", event.Name) + } + case err, ok := <-watcher.Errors: + if !ok { + return + } + log.Println("error:", err) + } + } + }() + + // Add a path. + err = watcher.Add("/tmp") + if err != nil { + log.Fatal(err) + } + + // Block main goroutine forever. + <-make(chan struct{}) +} +``` + +Some more examples can be found in [cmd/fsnotify](cmd/fsnotify), which can be +run with: + + % go run ./cmd/fsnotify + +Further detailed documentation can be found in godoc: +https://pkg.go.dev/github.com/fsnotify/fsnotify + +FAQ +--- +### Will a file still be watched when it's moved to another directory? +No, not unless you are watching the location it was moved to. + +### Are subdirectories watched? +No, you must add watches for any directory you want to watch (a recursive +watcher is on the roadmap: [#18]). + +[#18]: https://github.com/fsnotify/fsnotify/issues/18 + +### Do I have to watch the Error and Event channels in a goroutine? +Yes. You can read both channels in the same goroutine using `select` (you don't +need a separate goroutine for both channels; see the example). + +### Why don't notifications work with NFS, SMB, FUSE, /proc, or /sys? +fsnotify requires support from underlying OS to work. The current NFS and SMB +protocols does not provide network level support for file notifications, and +neither do the /proc and /sys virtual filesystems. + +This could be fixed with a polling watcher ([#9]), but it's not yet implemented. + +[#9]: https://github.com/fsnotify/fsnotify/issues/9 + +### Why do I get many Chmod events? +Some programs may generate a lot of attribute changes; for example Spotlight on +macOS, anti-virus programs, backup applications, and some others are known to do +this. As a rule, it's typically best to ignore Chmod events. They're often not +useful, and tend to cause problems. + +Spotlight indexing on macOS can result in multiple events (see [#15]). A +temporary workaround is to add your folder(s) to the *Spotlight Privacy +settings* until we have a native FSEvents implementation (see [#11]). + +[#11]: https://github.com/fsnotify/fsnotify/issues/11 +[#15]: https://github.com/fsnotify/fsnotify/issues/15 + +### Watching a file doesn't work well +Watching individual files (rather than directories) is generally not recommended +as many programs (especially editors) update files atomically: it will write to +a temporary file which is then moved to to destination, overwriting the original +(or some variant thereof). The watcher on the original file is now lost, as that +no longer exists. + +The upshot of this is that a power failure or crash won't leave a half-written +file. + +Watch the parent directory and use `Event.Name` to filter out files you're not +interested in. There is an example of this in `cmd/fsnotify/file.go`. + +Platform-specific notes +----------------------- +### Linux +When a file is removed a REMOVE event won't be emitted until all file +descriptors are closed; it will emit a CHMOD instead: + + fp := os.Open("file") + os.Remove("file") // CHMOD + fp.Close() // REMOVE + +This is the event that inotify sends, so not much can be changed about this. + +The `fs.inotify.max_user_watches` sysctl variable specifies the upper limit for +the number of watches per user, and `fs.inotify.max_user_instances` specifies +the maximum number of inotify instances per user. Every Watcher you create is an +"instance", and every path you add is a "watch". + +These are also exposed in `/proc` as `/proc/sys/fs/inotify/max_user_watches` and +`/proc/sys/fs/inotify/max_user_instances` + +To increase them you can use `sysctl` or write the value to proc file: + + # The default values on Linux 5.18 + sysctl fs.inotify.max_user_watches=124983 + sysctl fs.inotify.max_user_instances=128 + +To make the changes persist on reboot edit `/etc/sysctl.conf` or +`/usr/lib/sysctl.d/50-default.conf` (details differ per Linux distro; check your +distro's documentation): + + fs.inotify.max_user_watches=124983 + fs.inotify.max_user_instances=128 + +Reaching the limit will result in a "no space left on device" or "too many open +files" error. + +### kqueue (macOS, all BSD systems) +kqueue requires opening a file descriptor for every file that's being watched; +so if you're watching a directory with five files then that's six file +descriptors. You will run in to your system's "max open files" limit faster on +these platforms. + +The sysctl variables `kern.maxfiles` and `kern.maxfilesperproc` can be used to +control the maximum number of open files. diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_fen.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_fen.go new file mode 100644 index 000000000..57fc69284 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_fen.go @@ -0,0 +1,467 @@ +//go:build solaris + +// FEN backend for illumos (supported) and Solaris (untested, but should work). +// +// See port_create(3c) etc. for docs. https://www.illumos.org/man/3C/port_create + +package fsnotify + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sync" + "time" + + "github.com/fsnotify/fsnotify/internal" + "golang.org/x/sys/unix" +) + +type fen struct { + *shared + Events chan Event + Errors chan error + + mu sync.Mutex + port *unix.EventPort + dirs map[string]Op // Explicitly watched directories + watches map[string]Op // Explicitly watched non-directories +} + +var defaultBufferSize = 0 + +func newBackend(ev chan Event, errs chan error) (backend, error) { + w := &fen{ + shared: newShared(ev, errs), + Events: ev, + Errors: errs, + dirs: make(map[string]Op), + watches: make(map[string]Op), + } + + var err error + w.port, err = unix.NewEventPort() + if err != nil { + return nil, fmt.Errorf("fsnotify.NewWatcher: %w", err) + } + + go w.readEvents() + return w, nil +} + +func (w *fen) Close() error { + if w.shared.close() { + return nil + } + return w.port.Close() +} + +func (w *fen) Add(name string) error { return w.AddWith(name) } + +func (w *fen) AddWith(name string, opts ...addOpt) error { + if w.isClosed() { + return ErrClosed + } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } + + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } + + // Currently we resolve symlinks that were explicitly requested to be + // watched. Otherwise we would use LStat here. + stat, err := os.Stat(name) + if err != nil { + return err + } + + // Associate all files in the directory. + if stat.IsDir() { + err := w.handleDirectory(name, stat, true, w.associateFile) + if err != nil { + return err + } + + w.mu.Lock() + w.dirs[name] = with.op + w.mu.Unlock() + return nil + } + + err = w.associateFile(name, stat, true) + if err != nil { + return err + } + + w.mu.Lock() + w.watches[name] = with.op + w.mu.Unlock() + return nil +} + +func (w *fen) Remove(name string) error { + if w.isClosed() { + return nil + } + if !w.port.PathIsWatched(name) { + return fmt.Errorf("%w: %s", ErrNonExistentWatch, name) + } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } + + // The user has expressed an intent. Immediately remove this name from + // whichever watch list it might be in. If it's not in there the delete + // doesn't cause harm. + w.mu.Lock() + delete(w.watches, name) + delete(w.dirs, name) + w.mu.Unlock() + + stat, err := os.Stat(name) + if err != nil { + return err + } + + // Remove associations for every file in the directory. + if stat.IsDir() { + err := w.handleDirectory(name, stat, false, w.dissociateFile) + if err != nil { + return err + } + return nil + } + + err = w.port.DissociatePath(name) + if err != nil { + return err + } + + return nil +} + +// readEvents contains the main loop that runs in a goroutine watching for events. +func (w *fen) readEvents() { + // If this function returns, the watcher has been closed and we can close + // these channels + defer func() { + close(w.Errors) + close(w.Events) + }() + + pevents := make([]unix.PortEvent, 8) + for { + count, err := w.port.Get(pevents, 1, nil) + if err != nil && err != unix.ETIME { + // Interrupted system call (count should be 0) ignore and continue + if errors.Is(err, unix.EINTR) && count == 0 { + continue + } + // Get failed because we called w.Close() + if errors.Is(err, unix.EBADF) && w.isClosed() { + return + } + // There was an error not caused by calling w.Close() + if !w.sendError(fmt.Errorf("port.Get: %w", err)) { + return + } + } + + p := pevents[:count] + for _, pevent := range p { + if pevent.Source != unix.PORT_SOURCE_FILE { + // Event from unexpected source received; should never happen. + if !w.sendError(errors.New("Event from unexpected source received")) { + return + } + continue + } + + if debug { + internal.Debug(pevent.Path, pevent.Events) + } + + err = w.handleEvent(&pevent) + if !w.sendError(err) { + return + } + } + } +} + +func (w *fen) handleDirectory(path string, stat os.FileInfo, follow bool, handler func(string, os.FileInfo, bool) error) error { + files, err := os.ReadDir(path) + if err != nil { + return err + } + + // Handle all children of the directory. + for _, entry := range files { + finfo, err := entry.Info() + if err != nil { + return err + } + err = handler(filepath.Join(path, finfo.Name()), finfo, false) + if err != nil { + return err + } + } + + // And finally handle the directory itself. + return handler(path, stat, follow) +} + +// handleEvent might need to emit more than one fsnotify event if the events +// bitmap matches more than one event type (e.g. the file was both modified and +// had the attributes changed between when the association was created and the +// when event was returned) +func (w *fen) handleEvent(event *unix.PortEvent) error { + var ( + events = event.Events + path = event.Path + fmode = event.Cookie.(os.FileMode) + reRegister = true + ) + + w.mu.Lock() + _, watchedDir := w.dirs[path] + _, watchedPath := w.watches[path] + w.mu.Unlock() + isWatched := watchedDir || watchedPath + + if events&unix.FILE_DELETE != 0 { + if !w.sendEvent(Event{Name: path, Op: Remove}) { + return nil + } + reRegister = false + } + if events&unix.FILE_RENAME_FROM != 0 { + if !w.sendEvent(Event{Name: path, Op: Rename}) { + return nil + } + // Don't keep watching the new file name + reRegister = false + } + if events&unix.FILE_RENAME_TO != 0 { + // We don't report a Rename event for this case, because Rename events + // are interpreted as referring to the _old_ name of the file, and in + // this case the event would refer to the new name of the file. This + // type of rename event is not supported by fsnotify. + + // inotify reports a Remove event in this case, so we simulate this + // here. + if !w.sendEvent(Event{Name: path, Op: Remove}) { + return nil + } + // Don't keep watching the file that was removed + reRegister = false + } + + // The file is gone, nothing left to do. + if !reRegister { + if watchedDir { + w.mu.Lock() + delete(w.dirs, path) + w.mu.Unlock() + } + if watchedPath { + w.mu.Lock() + delete(w.watches, path) + w.mu.Unlock() + } + return nil + } + + // If we didn't get a deletion the file still exists and we're going to have + // to watch it again. Let's Stat it now so that we can compare permissions + // and have what we need to continue watching the file + + stat, err := os.Lstat(path) + if err != nil { + // This is unexpected, but we should still emit an event. This happens + // most often on "rm -r" of a subdirectory inside a watched directory We + // get a modify event of something happening inside, but by the time we + // get here, the sudirectory is already gone. Clearly we were watching + // this path but now it is gone. Let's tell the user that it was + // removed. + if !w.sendEvent(Event{Name: path, Op: Remove}) { + return nil + } + // Suppress extra write events on removed directories; they are not + // informative and can be confusing. + return nil + } + + // resolve symlinks that were explicitly watched as we would have at Add() + // time. this helps suppress spurious Chmod events on watched symlinks + if isWatched { + stat, err = os.Stat(path) + if err != nil { + // The symlink still exists, but the target is gone. Report the + // Remove similar to above. + if !w.sendEvent(Event{Name: path, Op: Remove}) { + return nil + } + // Don't return the error + } + } + + if events&unix.FILE_MODIFIED != 0 { + if fmode.IsDir() && watchedDir { + if err := w.updateDirectory(path); err != nil { + return err + } + } else { + if !w.sendEvent(Event{Name: path, Op: Write}) { + return nil + } + } + } + if events&unix.FILE_ATTRIB != 0 && stat != nil { + // Only send Chmod if perms changed + if stat.Mode().Perm() != fmode.Perm() { + if !w.sendEvent(Event{Name: path, Op: Chmod}) { + return nil + } + } + } + + if stat != nil { + // If we get here, it means we've hit an event above that requires us to + // continue watching the file or directory + err := w.associateFile(path, stat, isWatched) + if errors.Is(err, fs.ErrNotExist) { + // Path may have been removed since the stat. + err = nil + } + return err + } + return nil +} + +// The directory was modified, so we must find unwatched entities and watch +// them. If something was removed from the directory, nothing will happen, as +// everything else should still be watched. +func (w *fen) updateDirectory(path string) error { + files, err := os.ReadDir(path) + if err != nil { + // Directory no longer exists: probably just deleted since we got the + // event. + if errors.Is(err, fs.ErrNotExist) { + return nil + } + return err + } + + for _, entry := range files { + path := filepath.Join(path, entry.Name()) + if w.port.PathIsWatched(path) { + continue + } + + finfo, err := entry.Info() + if err != nil { + return err + } + err = w.associateFile(path, finfo, false) + if errors.Is(err, fs.ErrNotExist) { + // File may have disappeared between getting the dir listing and + // adding the port: that's okay to ignore. + continue + } + if !w.sendError(err) { + return nil + } + if !w.sendEvent(Event{Name: path, Op: Create}) { + return nil + } + } + return nil +} + +func (w *fen) associateFile(path string, stat os.FileInfo, follow bool) error { + if w.isClosed() { + return ErrClosed + } + // This is primarily protecting the call to AssociatePath but it is + // important and intentional that the call to PathIsWatched is also + // protected by this mutex. Without this mutex, AssociatePath has been seen + // to error out that the path is already associated. + w.mu.Lock() + defer w.mu.Unlock() + + if w.port.PathIsWatched(path) { + // Remove the old association in favor of this one If we get ENOENT, + // then while the x/sys/unix wrapper still thought that this path was + // associated, the underlying event port did not. This call will have + // cleared up that discrepancy. The most likely cause is that the event + // has fired but we haven't processed it yet. + err := w.port.DissociatePath(path) + if err != nil && !errors.Is(err, unix.ENOENT) { + return fmt.Errorf("port.DissociatePath(%q): %w", path, err) + } + } + + var events int + if !follow { + // Watch symlinks themselves rather than their targets unless this entry + // is explicitly watched. + events |= unix.FILE_NOFOLLOW + } + if true { // TODO: implement withOps() + events |= unix.FILE_MODIFIED + } + if true { + events |= unix.FILE_ATTRIB + } + err := w.port.AssociatePath(path, stat, events, stat.Mode()) + if err != nil { + return fmt.Errorf("port.AssociatePath(%q): %w", path, err) + } + return nil +} + +func (w *fen) dissociateFile(path string, stat os.FileInfo, unused bool) error { + if !w.port.PathIsWatched(path) { + return nil + } + err := w.port.DissociatePath(path) + if err != nil { + return fmt.Errorf("port.DissociatePath(%q): %w", path, err) + } + return nil +} + +func (w *fen) WatchList() []string { + if w.isClosed() { + return nil + } + + w.mu.Lock() + defer w.mu.Unlock() + + entries := make([]string, 0, len(w.watches)+len(w.dirs)) + for pathname := range w.dirs { + entries = append(entries, pathname) + } + for pathname := range w.watches { + entries = append(entries, pathname) + } + + return entries +} + +func (w *fen) xSupports(op Op) bool { + if op.Has(xUnportableOpen) || op.Has(xUnportableRead) || + op.Has(xUnportableCloseWrite) || op.Has(xUnportableCloseRead) { + return false + } + return true +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_inotify.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_inotify.go new file mode 100644 index 000000000..a36cb89d7 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_inotify.go @@ -0,0 +1,583 @@ +//go:build linux && !appengine + +package fsnotify + +import ( + "errors" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strings" + "sync" + "time" + "unsafe" + + "github.com/fsnotify/fsnotify/internal" + "golang.org/x/sys/unix" +) + +type inotify struct { + *shared + Events chan Event + Errors chan error + + // Store fd here as os.File.Read() will no longer return on close after + // calling Fd(). See: https://github.com/golang/go/issues/26439 + fd int + inotifyFile *os.File + watches *watches + doneResp chan struct{} // Channel to respond to Close + + // Store rename cookies in an array, with the index wrapping to 0. Almost + // all of the time what we get is a MOVED_FROM to set the cookie and the + // next event inotify sends will be MOVED_TO to read it. However, this is + // not guaranteed – as described in inotify(7) – and we may get other events + // between the two MOVED_* events (including other MOVED_* ones). + // + // A second issue is that moving a file outside the watched directory will + // trigger a MOVED_FROM to set the cookie, but we never see the MOVED_TO to + // read and delete it. So just storing it in a map would slowly leak memory. + // + // Doing it like this gives us a simple fast LRU-cache that won't allocate. + // Ten items should be more than enough for our purpose, and a loop over + // such a short array is faster than a map access anyway (not that it hugely + // matters since we're talking about hundreds of ns at the most, but still). + cookies [10]koekje + cookieIndex uint8 + cookiesMu sync.Mutex +} + +type ( + watches struct { + wd map[uint32]*watch // wd → watch + path map[string]uint32 // pathname → wd + } + watch struct { + wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall) + flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags) + path string // Watch path. + recurse bool // Recursion with ./...? + } + koekje struct { + cookie uint32 + path string + } +) + +func newWatches() *watches { + return &watches{ + wd: make(map[uint32]*watch), + path: make(map[string]uint32), + } +} + +func (w *watches) byPath(path string) *watch { return w.wd[w.path[path]] } +func (w *watches) byWd(wd uint32) *watch { return w.wd[wd] } +func (w *watches) len() int { return len(w.wd) } +func (w *watches) add(ww *watch) { w.wd[ww.wd] = ww; w.path[ww.path] = ww.wd } +func (w *watches) remove(watch *watch) { delete(w.path, watch.path); delete(w.wd, watch.wd) } + +func (w *watches) removePath(path string) ([]uint32, error) { + path, recurse := recursivePath(path) + wd, ok := w.path[path] + if !ok { + return nil, fmt.Errorf("%w: %s", ErrNonExistentWatch, path) + } + + watch := w.wd[wd] + if recurse && !watch.recurse { + return nil, fmt.Errorf("can't use /... with non-recursive watch %q", path) + } + + delete(w.path, path) + delete(w.wd, wd) + if !watch.recurse { + return []uint32{wd}, nil + } + + wds := make([]uint32, 0, 8) + wds = append(wds, wd) + for p, rwd := range w.path { + if strings.HasPrefix(p, path) { + delete(w.path, p) + delete(w.wd, rwd) + wds = append(wds, rwd) + } + } + return wds, nil +} + +func (w *watches) updatePath(path string, f func(*watch) (*watch, error)) error { + var existing *watch + wd, ok := w.path[path] + if ok { + existing = w.wd[wd] + } + + upd, err := f(existing) + if err != nil { + return err + } + if upd != nil { + w.wd[upd.wd] = upd + w.path[upd.path] = upd.wd + + if upd.wd != wd { + delete(w.wd, wd) + } + } + + return nil +} + +var defaultBufferSize = 0 + +func newBackend(ev chan Event, errs chan error) (backend, error) { + // Need to set nonblocking mode for SetDeadline to work, otherwise blocking + // I/O operations won't terminate on close. + fd, errno := unix.InotifyInit1(unix.IN_CLOEXEC | unix.IN_NONBLOCK) + if fd == -1 { + return nil, errno + } + + w := &inotify{ + shared: newShared(ev, errs), + Events: ev, + Errors: errs, + fd: fd, + inotifyFile: os.NewFile(uintptr(fd), ""), + watches: newWatches(), + doneResp: make(chan struct{}), + } + + go w.readEvents() + return w, nil +} + +func (w *inotify) Close() error { + if w.shared.close() { + return nil + } + + // Causes any blocking reads to return with an error, provided the file + // still supports deadline operations. + err := w.inotifyFile.Close() + if err != nil { + return err + } + + <-w.doneResp // Wait for readEvents() to finish. + return nil +} + +func (w *inotify) Add(name string) error { return w.AddWith(name) } + +func (w *inotify) AddWith(path string, opts ...addOpt) error { + if w.isClosed() { + return ErrClosed + } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), path) + } + + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } + + add := func(path string, with withOpts, recurse bool) error { + var flags uint32 + if with.noFollow { + flags |= unix.IN_DONT_FOLLOW + } + if with.op.Has(Create) { + flags |= unix.IN_CREATE + } + if with.op.Has(Write) { + flags |= unix.IN_MODIFY + } + if with.op.Has(Remove) { + flags |= unix.IN_DELETE | unix.IN_DELETE_SELF + } + if with.op.Has(Rename) { + flags |= unix.IN_MOVED_TO | unix.IN_MOVED_FROM | unix.IN_MOVE_SELF + } + if with.op.Has(Chmod) { + flags |= unix.IN_ATTRIB + } + if with.op.Has(xUnportableOpen) { + flags |= unix.IN_OPEN + } + if with.op.Has(xUnportableRead) { + flags |= unix.IN_ACCESS + } + if with.op.Has(xUnportableCloseWrite) { + flags |= unix.IN_CLOSE_WRITE + } + if with.op.Has(xUnportableCloseRead) { + flags |= unix.IN_CLOSE_NOWRITE + } + return w.register(path, flags, recurse) + } + + w.mu.Lock() + defer w.mu.Unlock() + path, recurse := recursivePath(path) + if recurse { + return filepath.WalkDir(path, func(root string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + if root == path { + return fmt.Errorf("fsnotify: not a directory: %q", path) + } + return nil + } + + // Send a Create event when adding new directory from a recursive + // watch; this is for "mkdir -p one/two/three". Usually all those + // directories will be created before we can set up watchers on the + // subdirectories, so only "one" would be sent as a Create event and + // not "one/two" and "one/two/three" (inotifywait -r has the same + // problem). + if with.sendCreate && root != path { + w.sendEvent(Event{Name: root, Op: Create}) + } + + return add(root, with, true) + }) + } + + return add(path, with, false) +} + +func (w *inotify) register(path string, flags uint32, recurse bool) error { + return w.watches.updatePath(path, func(existing *watch) (*watch, error) { + if existing != nil { + flags |= existing.flags | unix.IN_MASK_ADD + } + + wd, err := unix.InotifyAddWatch(w.fd, path, flags) + if wd == -1 { + return nil, err + } + + if e, ok := w.watches.wd[uint32(wd)]; ok { + return e, nil + } + + if existing == nil { + return &watch{ + wd: uint32(wd), + path: path, + flags: flags, + recurse: recurse, + }, nil + } + + existing.wd = uint32(wd) + existing.flags = flags + return existing, nil + }) +} + +func (w *inotify) Remove(name string) error { + if w.isClosed() { + return nil + } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } + + w.mu.Lock() + defer w.mu.Unlock() + return w.remove(filepath.Clean(name)) +} + +func (w *inotify) remove(name string) error { + wds, err := w.watches.removePath(name) + if err != nil { + return err + } + + for _, wd := range wds { + _, err := unix.InotifyRmWatch(w.fd, wd) + if err != nil { + // TODO: Perhaps it's not helpful to return an error here in every + // case; the only two possible errors are: + // + // EBADF, which happens when w.fd is not a valid file descriptor of + // any kind. + // + // EINVAL, which is when fd is not an inotify descriptor or wd is + // not a valid watch descriptor. Watch descriptors are invalidated + // when they are removed explicitly or implicitly; explicitly by + // inotify_rm_watch, implicitly when the file they are watching is + // deleted. + return err + } + } + return nil +} + +func (w *inotify) WatchList() []string { + if w.isClosed() { + return nil + } + + w.mu.Lock() + defer w.mu.Unlock() + entries := make([]string, 0, w.watches.len()) + for pathname := range w.watches.path { + entries = append(entries, pathname) + } + return entries +} + +// readEvents reads from the inotify file descriptor, converts the +// received events into Event objects and sends them via the Events channel +func (w *inotify) readEvents() { + defer func() { + close(w.doneResp) + close(w.Errors) + close(w.Events) + }() + + var buf [unix.SizeofInotifyEvent * 4096]byte // Buffer for a maximum of 4096 raw events + for { + if w.isClosed() { + return + } + + n, err := w.inotifyFile.Read(buf[:]) + if err != nil { + if errors.Is(err, os.ErrClosed) { + return + } + if !w.sendError(err) { + return + } + continue + } + + if n < unix.SizeofInotifyEvent { + err := errors.New("notify: short read in readEvents()") // Read was too short. + if n == 0 { + err = io.EOF // If EOF is received. This should really never happen. + } + if !w.sendError(err) { + return + } + continue + } + + // We don't know how many events we just read into the buffer While the + // offset points to at least one whole event. + var offset uint32 + for offset <= uint32(n-unix.SizeofInotifyEvent) { + // Point to the event in the buffer. + inEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buf[offset])) + + if inEvent.Mask&unix.IN_Q_OVERFLOW != 0 { + if !w.sendError(ErrEventOverflow) { + return + } + } + + ev, ok := w.handleEvent(inEvent, &buf, offset) + if !ok { + return + } + if !w.sendEvent(ev) { + return + } + + // Move to the next event in the buffer + offset += unix.SizeofInotifyEvent + inEvent.Len + } + } +} + +func (w *inotify) handleEvent(inEvent *unix.InotifyEvent, buf *[65536]byte, offset uint32) (Event, bool) { + w.mu.Lock() + defer w.mu.Unlock() + + /// If the event happened to the watched directory or the watched file, the + /// kernel doesn't append the filename to the event, but we would like to + /// always fill the the "Name" field with a valid filename. We retrieve the + /// path of the watch from the "paths" map. + /// + /// Can be nil if Remove() was called in another goroutine for this path + /// inbetween reading the events from the kernel and reading the internal + /// state. Not much we can do about it, so just skip. See #616. + watch := w.watches.byWd(uint32(inEvent.Wd)) + if watch == nil { + return Event{}, true + } + + var ( + name = watch.path + nameLen = uint32(inEvent.Len) + ) + if nameLen > 0 { + /// Point "bytes" at the first byte of the filename + bb := *buf + bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&bb[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen] + /// The filename is padded with NULL bytes. TrimRight() gets rid of those. + name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\x00") + } + + if debug { + internal.Debug(name, inEvent.Mask, inEvent.Cookie) + } + + if inEvent.Mask&unix.IN_IGNORED != 0 || inEvent.Mask&unix.IN_UNMOUNT != 0 { + w.watches.remove(watch) + return Event{}, true + } + + // inotify will automatically remove the watch on deletes; just need + // to clean our state here. + if inEvent.Mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF { + w.watches.remove(watch) + } + + // We can't really update the state when a watched path is moved; only + // IN_MOVE_SELF is sent and not IN_MOVED_{FROM,TO}. So remove the watch. + if inEvent.Mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF { + if watch.recurse { // Do nothing + return Event{}, true + } + + err := w.remove(watch.path) + if err != nil && !errors.Is(err, ErrNonExistentWatch) { + if !w.sendError(err) { + return Event{}, false + } + } + } + + /// Skip if we're watching both this path and the parent; the parent will + /// already send a delete so no need to do it twice. + if inEvent.Mask&unix.IN_DELETE_SELF != 0 { + _, ok := w.watches.path[filepath.Dir(watch.path)] + if ok { + return Event{}, true + } + } + + ev := w.newEvent(name, inEvent.Mask, inEvent.Cookie) + // Need to update watch path for recurse. + if watch.recurse { + isDir := inEvent.Mask&unix.IN_ISDIR == unix.IN_ISDIR + /// New directory created: set up watch on it. + if isDir && ev.Has(Create) { + err := w.register(ev.Name, watch.flags, true) + if !w.sendError(err) { + return Event{}, false + } + + // This was a directory rename, so we need to update all the + // children. + // + // TODO: this is of course pretty slow; we should use a better data + // structure for storing all of this, e.g. store children in the + // watch. I have some code for this in my kqueue refactor we can use + // in the future. For now I'm okay with this as it's not publicly + // available. Correctness first, performance second. + if ev.renamedFrom != "" { + for k, ww := range w.watches.wd { + if k == watch.wd || ww.path == ev.Name { + continue + } + if strings.HasPrefix(ww.path, ev.renamedFrom) { + ww.path = strings.Replace(ww.path, ev.renamedFrom, ev.Name, 1) + w.watches.wd[k] = ww + } + } + } + } + } + + return ev, true +} + +func (w *inotify) isRecursive(path string) bool { + ww := w.watches.byPath(path) + if ww == nil { // path could be a file, so also check the Dir. + ww = w.watches.byPath(filepath.Dir(path)) + } + return ww != nil && ww.recurse +} + +func (w *inotify) newEvent(name string, mask, cookie uint32) Event { + e := Event{Name: name} + if mask&unix.IN_CREATE == unix.IN_CREATE || mask&unix.IN_MOVED_TO == unix.IN_MOVED_TO { + e.Op |= Create + } + if mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF || mask&unix.IN_DELETE == unix.IN_DELETE { + e.Op |= Remove + } + if mask&unix.IN_MODIFY == unix.IN_MODIFY { + e.Op |= Write + } + if mask&unix.IN_OPEN == unix.IN_OPEN { + e.Op |= xUnportableOpen + } + if mask&unix.IN_ACCESS == unix.IN_ACCESS { + e.Op |= xUnportableRead + } + if mask&unix.IN_CLOSE_WRITE == unix.IN_CLOSE_WRITE { + e.Op |= xUnportableCloseWrite + } + if mask&unix.IN_CLOSE_NOWRITE == unix.IN_CLOSE_NOWRITE { + e.Op |= xUnportableCloseRead + } + if mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF || mask&unix.IN_MOVED_FROM == unix.IN_MOVED_FROM { + e.Op |= Rename + } + if mask&unix.IN_ATTRIB == unix.IN_ATTRIB { + e.Op |= Chmod + } + + if cookie != 0 { + if mask&unix.IN_MOVED_FROM == unix.IN_MOVED_FROM { + w.cookiesMu.Lock() + w.cookies[w.cookieIndex] = koekje{cookie: cookie, path: e.Name} + w.cookieIndex++ + if w.cookieIndex > 9 { + w.cookieIndex = 0 + } + w.cookiesMu.Unlock() + } else if mask&unix.IN_MOVED_TO == unix.IN_MOVED_TO { + w.cookiesMu.Lock() + var prev string + for _, c := range w.cookies { + if c.cookie == cookie { + prev = c.path + break + } + } + w.cookiesMu.Unlock() + e.renamedFrom = prev + } + } + return e +} + +func (w *inotify) xSupports(op Op) bool { + return true // Supports everything. +} + +func (w *inotify) state() { + w.mu.Lock() + defer w.mu.Unlock() + for wd, ww := range w.watches.wd { + fmt.Fprintf(os.Stderr, "%4d: recurse=%t %q\n", wd, ww.recurse, ww.path) + } +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go new file mode 100644 index 000000000..340aeec06 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go @@ -0,0 +1,705 @@ +//go:build freebsd || openbsd || netbsd || dragonfly || darwin + +package fsnotify + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "sync" + "time" + + "github.com/fsnotify/fsnotify/internal" + "golang.org/x/sys/unix" +) + +type kqueue struct { + *shared + Events chan Event + Errors chan error + + kq int // File descriptor (as returned by the kqueue() syscall). + closepipe [2]int // Pipe used for closing kq. + watches *watches +} + +type ( + watches struct { + mu sync.RWMutex + wd map[int]watch // wd → watch + path map[string]int // pathname → wd + byDir map[string]map[int]struct{} // dirname(path) → wd + seen map[string]struct{} // Keep track of if we know this file exists. + byUser map[string]struct{} // Watches added with Watcher.Add() + } + watch struct { + wd int + name string + linkName string // In case of links; name is the target, and this is the link. + isDir bool + dirFlags uint32 + } +) + +func newWatches() *watches { + return &watches{ + wd: make(map[int]watch), + path: make(map[string]int), + byDir: make(map[string]map[int]struct{}), + seen: make(map[string]struct{}), + byUser: make(map[string]struct{}), + } +} + +func (w *watches) listPaths(userOnly bool) []string { + w.mu.RLock() + defer w.mu.RUnlock() + + if userOnly { + l := make([]string, 0, len(w.byUser)) + for p := range w.byUser { + l = append(l, p) + } + return l + } + + l := make([]string, 0, len(w.path)) + for p := range w.path { + l = append(l, p) + } + return l +} + +func (w *watches) watchesInDir(path string) []string { + w.mu.RLock() + defer w.mu.RUnlock() + + l := make([]string, 0, 4) + for fd := range w.byDir[path] { + info := w.wd[fd] + if _, ok := w.byUser[info.name]; !ok { + l = append(l, info.name) + } + } + return l +} + +// Mark path as added by the user. +func (w *watches) addUserWatch(path string) { + w.mu.Lock() + defer w.mu.Unlock() + w.byUser[path] = struct{}{} +} + +func (w *watches) addLink(path string, fd int) { + w.mu.Lock() + defer w.mu.Unlock() + + w.path[path] = fd + w.seen[path] = struct{}{} +} + +func (w *watches) add(path, linkPath string, fd int, isDir bool) { + w.mu.Lock() + defer w.mu.Unlock() + + w.path[path] = fd + w.wd[fd] = watch{wd: fd, name: path, linkName: linkPath, isDir: isDir} + + parent := filepath.Dir(path) + byDir, ok := w.byDir[parent] + if !ok { + byDir = make(map[int]struct{}, 1) + w.byDir[parent] = byDir + } + byDir[fd] = struct{}{} +} + +func (w *watches) byWd(fd int) (watch, bool) { + w.mu.RLock() + defer w.mu.RUnlock() + info, ok := w.wd[fd] + return info, ok +} + +func (w *watches) byPath(path string) (watch, bool) { + w.mu.RLock() + defer w.mu.RUnlock() + info, ok := w.wd[w.path[path]] + return info, ok +} + +func (w *watches) updateDirFlags(path string, flags uint32) bool { + w.mu.Lock() + defer w.mu.Unlock() + + fd, ok := w.path[path] + if !ok { // Already deleted: don't re-set it here. + return false + } + info := w.wd[fd] + info.dirFlags = flags + w.wd[fd] = info + return true +} + +func (w *watches) remove(fd int, path string) bool { + w.mu.Lock() + defer w.mu.Unlock() + + isDir := w.wd[fd].isDir + delete(w.path, path) + delete(w.byUser, path) + + parent := filepath.Dir(path) + delete(w.byDir[parent], fd) + + if len(w.byDir[parent]) == 0 { + delete(w.byDir, parent) + } + + delete(w.wd, fd) + delete(w.seen, path) + return isDir +} + +func (w *watches) markSeen(path string, exists bool) { + w.mu.Lock() + defer w.mu.Unlock() + if exists { + w.seen[path] = struct{}{} + } else { + delete(w.seen, path) + } +} + +func (w *watches) seenBefore(path string) bool { + w.mu.RLock() + defer w.mu.RUnlock() + _, ok := w.seen[path] + return ok +} + +var defaultBufferSize = 0 + +func newBackend(ev chan Event, errs chan error) (backend, error) { + kq, closepipe, err := newKqueue() + if err != nil { + return nil, err + } + + w := &kqueue{ + shared: newShared(ev, errs), + Events: ev, + Errors: errs, + kq: kq, + closepipe: closepipe, + watches: newWatches(), + } + + go w.readEvents() + return w, nil +} + +// newKqueue creates a new kernel event queue and returns a descriptor. +// +// This registers a new event on closepipe, which will trigger an event when +// it's closed. This way we can use kevent() without timeout/polling; without +// the closepipe, it would block forever and we wouldn't be able to stop it at +// all. +func newKqueue() (kq int, closepipe [2]int, err error) { + kq, err = unix.Kqueue() + if err != nil { + return kq, closepipe, err + } + + // Register the close pipe. + err = unix.Pipe(closepipe[:]) + if err != nil { + unix.Close(kq) + return kq, closepipe, err + } + unix.CloseOnExec(closepipe[0]) + unix.CloseOnExec(closepipe[1]) + + // Register changes to listen on the closepipe. + changes := make([]unix.Kevent_t, 1) + // SetKevent converts int to the platform-specific types. + unix.SetKevent(&changes[0], closepipe[0], unix.EVFILT_READ, + unix.EV_ADD|unix.EV_ENABLE|unix.EV_ONESHOT) + + ok, err := unix.Kevent(kq, changes, nil, nil) + if ok == -1 { + unix.Close(kq) + unix.Close(closepipe[0]) + unix.Close(closepipe[1]) + return kq, closepipe, err + } + return kq, closepipe, nil +} + +func (w *kqueue) Close() error { + if w.shared.close() { + return nil + } + + pathsToRemove := w.watches.listPaths(false) + for _, name := range pathsToRemove { + w.Remove(name) + } + + unix.Close(w.closepipe[1]) // Send "quit" message to readEvents + return nil +} + +func (w *kqueue) Add(name string) error { return w.AddWith(name) } + +func (w *kqueue) AddWith(name string, opts ...addOpt) error { + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } + + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } + + _, err := w.addWatch(name, noteAllEvents, false) + if err != nil { + return err + } + w.watches.addUserWatch(name) + return nil +} + +func (w *kqueue) Remove(name string) error { + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } + return w.remove(name, true) +} + +func (w *kqueue) remove(name string, unwatchFiles bool) error { + if w.isClosed() { + return nil + } + + name = filepath.Clean(name) + info, ok := w.watches.byPath(name) + if !ok { + return fmt.Errorf("%w: %s", ErrNonExistentWatch, name) + } + + err := w.register([]int{info.wd}, unix.EV_DELETE, 0) + if err != nil { + return err + } + + unix.Close(info.wd) + + isDir := w.watches.remove(info.wd, name) + + // Find all watched paths that are in this directory that are not external. + if unwatchFiles && isDir { + pathsToRemove := w.watches.watchesInDir(name) + for _, name := range pathsToRemove { + // Since these are internal, not much sense in propagating error to + // the user, as that will just confuse them with an error about a + // path they did not explicitly watch themselves. + w.Remove(name) + } + } + return nil +} + +func (w *kqueue) WatchList() []string { + if w.isClosed() { + return nil + } + return w.watches.listPaths(true) +} + +// Watch all events (except NOTE_EXTEND, NOTE_LINK, NOTE_REVOKE) +const noteAllEvents = unix.NOTE_DELETE | unix.NOTE_WRITE | unix.NOTE_ATTRIB | unix.NOTE_RENAME + +// addWatch adds name to the watched file set; the flags are interpreted as +// described in kevent(2). +// +// Returns the real path to the file which was added, with symlinks resolved. +func (w *kqueue) addWatch(name string, flags uint32, listDir bool) (string, error) { + if w.isClosed() { + return "", ErrClosed + } + + name = filepath.Clean(name) + + info, alreadyWatching := w.watches.byPath(name) + if !alreadyWatching { + fi, err := os.Lstat(name) + if err != nil { + return "", err + } + + // Don't watch sockets or named pipes. + if (fi.Mode()&os.ModeSocket == os.ModeSocket) || (fi.Mode()&os.ModeNamedPipe == os.ModeNamedPipe) { + return "", nil + } + + // Follow symlinks, but only for paths added with Add(), and not paths + // we're adding from internalWatch from a listdir. + if !listDir && fi.Mode()&os.ModeSymlink == os.ModeSymlink { + link, err := os.Readlink(name) + if err != nil { + return "", err + } + if !filepath.IsAbs(link) { + link = filepath.Join(filepath.Dir(name), link) + } + + _, alreadyWatching = w.watches.byPath(link) + if alreadyWatching { + // Add to watches so we don't get spurious Create events later + // on when we diff the directories. + w.watches.addLink(name, 0) + return link, nil + } + + info.linkName = name + name = link + fi, err = os.Lstat(name) + if err != nil { + return "", err + } + } + + // Retry on EINTR; open() can return EINTR in practice on macOS. + // See #354, and Go issues 11180 and 39237. + for { + info.wd, err = unix.Open(name, openMode, 0) + if err == nil { + break + } + if errors.Is(err, unix.EINTR) { + continue + } + return "", err + } + + info.isDir = fi.IsDir() + } + + err := w.register([]int{info.wd}, unix.EV_ADD|unix.EV_CLEAR|unix.EV_ENABLE, flags) + if err != nil { + unix.Close(info.wd) + return "", err + } + + if !alreadyWatching { + w.watches.add(name, info.linkName, info.wd, info.isDir) + } + + // Watch the directory if it has not been watched before, or if it was + // watched before, but perhaps only a NOTE_DELETE (watchDirectoryFiles) + if info.isDir { + watchDir := (flags&unix.NOTE_WRITE) == unix.NOTE_WRITE && + (!alreadyWatching || (info.dirFlags&unix.NOTE_WRITE) != unix.NOTE_WRITE) + if !w.watches.updateDirFlags(name, flags) { + return "", nil + } + + if watchDir { + d := name + if info.linkName != "" { + d = info.linkName + } + if err := w.watchDirectoryFiles(d); err != nil { + return "", err + } + } + } + return name, nil +} + +// readEvents reads from kqueue and converts the received kevents into +// Event values that it sends down the Events channel. +func (w *kqueue) readEvents() { + defer func() { + close(w.Events) + close(w.Errors) + _ = unix.Close(w.kq) + unix.Close(w.closepipe[0]) + }() + + eventBuffer := make([]unix.Kevent_t, 10) + for { + kevents, err := w.read(eventBuffer) + // EINTR is okay, the syscall was interrupted before timeout expired. + if err != nil && err != unix.EINTR { + if !w.sendError(fmt.Errorf("fsnotify.readEvents: %w", err)) { + return + } + } + + for _, kevent := range kevents { + var ( + wd = int(kevent.Ident) + mask = uint32(kevent.Fflags) + ) + + // Shut down the loop when the pipe is closed, but only after all + // other events have been processed. + if wd == w.closepipe[0] { + return + } + + path, ok := w.watches.byWd(wd) + if debug { + internal.Debug(path.name, &kevent) + } + + // On macOS it seems that sometimes an event with Ident=0 is + // delivered, and no other flags/information beyond that, even + // though we never saw such a file descriptor. For example in + // TestWatchSymlink/277 (usually at the end, but sometimes sooner): + // + // fmt.Printf("READ: %2d %#v\n", kevent.Ident, kevent) + // unix.Kevent_t{Ident:0x2a, Filter:-4, Flags:0x25, Fflags:0x2, Data:0, Udata:(*uint8)(nil)} + // unix.Kevent_t{Ident:0x0, Filter:-4, Flags:0x25, Fflags:0x2, Data:0, Udata:(*uint8)(nil)} + // + // The first is a normal event, the second with Ident 0. No error + // flag, no data, no ... nothing. + // + // I read a bit through bsd/kern_event.c from the xnu source, but I + // don't really see an obvious location where this is triggered – + // this doesn't seem intentional, but idk... + // + // Technically fd 0 is a valid descriptor, so only skip it if + // there's no path, and if we're on macOS. + if !ok && kevent.Ident == 0 && runtime.GOOS == "darwin" { + continue + } + + event := w.newEvent(path.name, path.linkName, mask) + + if event.Has(Rename) || event.Has(Remove) { + w.remove(event.Name, false) + w.watches.markSeen(event.Name, false) + } + + if path.isDir && event.Has(Write) && !event.Has(Remove) { + w.dirChange(event.Name) + } else if !w.sendEvent(event) { + return + } + + if event.Has(Remove) { + // Look for a file that may have overwritten this; for example, + // mv f1 f2 will delete f2, then create f2. + if path.isDir { + fileDir := filepath.Clean(event.Name) + _, found := w.watches.byPath(fileDir) + if found { + // TODO: this branch is never triggered in any test. + // Added in d6220df (2012). + // isDir check added in 8611c35 (2016): https://github.com/fsnotify/fsnotify/pull/111 + // + // I don't really get how this can be triggered either. + // And it wasn't triggered in the patch that added it, + // either. + // + // Original also had a comment: + // make sure the directory exists before we watch for + // changes. When we do a recursive watch and perform + // rm -rf, the parent directory might have gone + // missing, ignore the missing directory and let the + // upcoming delete event remove the watch from the + // parent directory. + err := w.dirChange(fileDir) + if !w.sendError(err) { + return + } + } + } else { + path := filepath.Clean(event.Name) + if fi, err := os.Lstat(path); err == nil { + err := w.sendCreateIfNew(path, fi) + if !w.sendError(err) { + return + } + } + } + } + } + } +} + +// newEvent returns an platform-independent Event based on kqueue Fflags. +func (w *kqueue) newEvent(name, linkName string, mask uint32) Event { + e := Event{Name: name} + if linkName != "" { + // If the user watched "/path/link" then emit events as "/path/link" + // rather than "/path/target". + e.Name = linkName + } + + if mask&unix.NOTE_DELETE == unix.NOTE_DELETE { + e.Op |= Remove + } + if mask&unix.NOTE_WRITE == unix.NOTE_WRITE { + e.Op |= Write + } + if mask&unix.NOTE_RENAME == unix.NOTE_RENAME { + e.Op |= Rename + } + if mask&unix.NOTE_ATTRIB == unix.NOTE_ATTRIB { + e.Op |= Chmod + } + // No point sending a write and delete event at the same time: if it's gone, + // then it's gone. + if e.Op.Has(Write) && e.Op.Has(Remove) { + e.Op &^= Write + } + return e +} + +// watchDirectoryFiles to mimic inotify when adding a watch on a directory +func (w *kqueue) watchDirectoryFiles(dirPath string) error { + files, err := os.ReadDir(dirPath) + if err != nil { + return err + } + + for _, f := range files { + path := filepath.Join(dirPath, f.Name()) + + fi, err := f.Info() + if err != nil { + return fmt.Errorf("%q: %w", path, err) + } + + cleanPath, err := w.internalWatch(path, fi) + if err != nil { + // No permission to read the file; that's not a problem: just skip. + // But do add it to w.fileExists to prevent it from being picked up + // as a "new" file later (it still shows up in the directory + // listing). + switch { + case errors.Is(err, unix.EACCES) || errors.Is(err, unix.EPERM): + cleanPath = filepath.Clean(path) + default: + return fmt.Errorf("%q: %w", path, err) + } + } + + w.watches.markSeen(cleanPath, true) + } + + return nil +} + +// Search the directory for new files and send an event for them. +// +// This functionality is to have the BSD watcher match the inotify, which sends +// a create event for files created in a watched directory. +func (w *kqueue) dirChange(dir string) error { + files, err := os.ReadDir(dir) + if err != nil { + // Directory no longer exists: we can ignore this safely. kqueue will + // still give us the correct events. + if errors.Is(err, os.ErrNotExist) { + return nil + } + return fmt.Errorf("fsnotify.dirChange %q: %w", dir, err) + } + + for _, f := range files { + fi, err := f.Info() + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return fmt.Errorf("fsnotify.dirChange: %w", err) + } + + err = w.sendCreateIfNew(filepath.Join(dir, fi.Name()), fi) + if err != nil { + // Don't need to send an error if this file isn't readable. + if errors.Is(err, unix.EACCES) || errors.Is(err, unix.EPERM) || errors.Is(err, os.ErrNotExist) { + return nil + } + return fmt.Errorf("fsnotify.dirChange: %w", err) + } + } + return nil +} + +// Send a create event if the file isn't already being tracked, and start +// watching this file. +func (w *kqueue) sendCreateIfNew(path string, fi os.FileInfo) error { + if !w.watches.seenBefore(path) { + if !w.sendEvent(Event{Name: path, Op: Create}) { + return nil + } + } + + // Like watchDirectoryFiles, but without doing another ReadDir. + path, err := w.internalWatch(path, fi) + if err != nil { + return err + } + w.watches.markSeen(path, true) + return nil +} + +func (w *kqueue) internalWatch(name string, fi os.FileInfo) (string, error) { + if fi.IsDir() { + // mimic Linux providing delete events for subdirectories, but preserve + // the flags used if currently watching subdirectory + info, _ := w.watches.byPath(name) + return w.addWatch(name, info.dirFlags|unix.NOTE_DELETE|unix.NOTE_RENAME, true) + } + + // Watch file to mimic Linux inotify. + return w.addWatch(name, noteAllEvents, true) +} + +// Register events with the queue. +func (w *kqueue) register(fds []int, flags int, fflags uint32) error { + changes := make([]unix.Kevent_t, len(fds)) + for i, fd := range fds { + // SetKevent converts int to the platform-specific types. + unix.SetKevent(&changes[i], fd, unix.EVFILT_VNODE, flags) + changes[i].Fflags = fflags + } + + // Register the events. + success, err := unix.Kevent(w.kq, changes, nil, nil) + if success == -1 { + return err + } + return nil +} + +// read retrieves pending events, or waits until an event occurs. +func (w *kqueue) read(events []unix.Kevent_t) ([]unix.Kevent_t, error) { + n, err := unix.Kevent(w.kq, nil, events, nil) + if err != nil { + return nil, err + } + return events[0:n], nil +} + +func (w *kqueue) xSupports(op Op) bool { + //if runtime.GOOS == "freebsd" { + // return true // Supports everything. + //} + if op.Has(xUnportableOpen) || op.Has(xUnportableRead) || + op.Has(xUnportableCloseWrite) || op.Has(xUnportableCloseRead) { + return false + } + return true +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_other.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_other.go new file mode 100644 index 000000000..b8c0ad722 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_other.go @@ -0,0 +1,22 @@ +//go:build appengine || (!darwin && !dragonfly && !freebsd && !openbsd && !linux && !netbsd && !solaris && !windows) + +package fsnotify + +import "errors" + +type other struct { + Events chan Event + Errors chan error +} + +var defaultBufferSize = 0 + +func newBackend(ev chan Event, errs chan error) (backend, error) { + return nil, errors.New("fsnotify not supported on the current platform") +} +func (w *other) Close() error { return nil } +func (w *other) WatchList() []string { return nil } +func (w *other) Add(name string) error { return nil } +func (w *other) AddWith(name string, opts ...addOpt) error { return nil } +func (w *other) Remove(name string) error { return nil } +func (w *other) xSupports(op Op) bool { return false } diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_windows.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_windows.go new file mode 100644 index 000000000..3433642d6 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/backend_windows.go @@ -0,0 +1,680 @@ +//go:build windows + +// Windows backend based on ReadDirectoryChangesW() +// +// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-readdirectorychangesw + +package fsnotify + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "reflect" + "runtime" + "strings" + "sync" + "time" + "unsafe" + + "github.com/fsnotify/fsnotify/internal" + "golang.org/x/sys/windows" +) + +type readDirChangesW struct { + Events chan Event + Errors chan error + + port windows.Handle // Handle to completion port + input chan *input // Inputs to the reader are sent on this channel + done chan chan<- error + + mu sync.Mutex // Protects access to watches, closed + watches watchMap // Map of watches (key: i-number) + closed bool // Set to true when Close() is first called +} + +var defaultBufferSize = 50 + +func newBackend(ev chan Event, errs chan error) (backend, error) { + port, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0) + if err != nil { + return nil, os.NewSyscallError("CreateIoCompletionPort", err) + } + w := &readDirChangesW{ + Events: ev, + Errors: errs, + port: port, + watches: make(watchMap), + input: make(chan *input, 1), + done: make(chan chan<- error, 1), + } + go w.readEvents() + return w, nil +} + +func (w *readDirChangesW) isClosed() bool { + w.mu.Lock() + defer w.mu.Unlock() + return w.closed +} + +func (w *readDirChangesW) sendEvent(name, renamedFrom string, mask uint64) bool { + if mask == 0 { + return false + } + + event := w.newEvent(name, uint32(mask)) + event.renamedFrom = renamedFrom + select { + case ch := <-w.done: + w.done <- ch + case w.Events <- event: + } + return true +} + +// Returns true if the error was sent, or false if watcher is closed. +func (w *readDirChangesW) sendError(err error) bool { + if err == nil { + return true + } + select { + case <-w.done: + return false + case w.Errors <- err: + return true + } +} + +func (w *readDirChangesW) Close() error { + if w.isClosed() { + return nil + } + + w.mu.Lock() + w.closed = true + w.mu.Unlock() + + // Send "done" message to the reader goroutine + ch := make(chan error) + w.done <- ch + if err := w.wakeupReader(); err != nil { + return err + } + return <-ch +} + +func (w *readDirChangesW) Add(name string) error { return w.AddWith(name) } + +func (w *readDirChangesW) AddWith(name string, opts ...addOpt) error { + if w.isClosed() { + return ErrClosed + } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), filepath.ToSlash(name)) + } + + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } + if with.bufsize < 4096 { + return fmt.Errorf("fsnotify.WithBufferSize: buffer size cannot be smaller than 4096 bytes") + } + + in := &input{ + op: opAddWatch, + path: filepath.Clean(name), + flags: sysFSALLEVENTS, + reply: make(chan error), + bufsize: with.bufsize, + } + w.input <- in + if err := w.wakeupReader(); err != nil { + return err + } + return <-in.reply +} + +func (w *readDirChangesW) Remove(name string) error { + if w.isClosed() { + return nil + } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), filepath.ToSlash(name)) + } + + in := &input{ + op: opRemoveWatch, + path: filepath.Clean(name), + reply: make(chan error), + } + w.input <- in + if err := w.wakeupReader(); err != nil { + return err + } + return <-in.reply +} + +func (w *readDirChangesW) WatchList() []string { + if w.isClosed() { + return nil + } + + w.mu.Lock() + defer w.mu.Unlock() + + entries := make([]string, 0, len(w.watches)) + for _, entry := range w.watches { + for _, watchEntry := range entry { + for name := range watchEntry.names { + entries = append(entries, filepath.Join(watchEntry.path, name)) + } + // the directory itself is being watched + if watchEntry.mask != 0 { + entries = append(entries, watchEntry.path) + } + } + } + + return entries +} + +// These options are from the old golang.org/x/exp/winfsnotify, where you could +// add various options to the watch. This has long since been removed. +// +// The "sys" in the name is misleading as they're not part of any "system". +// +// This should all be removed at some point, and just use windows.FILE_NOTIFY_* +const ( + sysFSALLEVENTS = 0xfff + sysFSCREATE = 0x100 + sysFSDELETE = 0x200 + sysFSDELETESELF = 0x400 + sysFSMODIFY = 0x2 + sysFSMOVE = 0xc0 + sysFSMOVEDFROM = 0x40 + sysFSMOVEDTO = 0x80 + sysFSMOVESELF = 0x800 + sysFSIGNORED = 0x8000 +) + +func (w *readDirChangesW) newEvent(name string, mask uint32) Event { + e := Event{Name: name} + if mask&sysFSCREATE == sysFSCREATE || mask&sysFSMOVEDTO == sysFSMOVEDTO { + e.Op |= Create + } + if mask&sysFSDELETE == sysFSDELETE || mask&sysFSDELETESELF == sysFSDELETESELF { + e.Op |= Remove + } + if mask&sysFSMODIFY == sysFSMODIFY { + e.Op |= Write + } + if mask&sysFSMOVE == sysFSMOVE || mask&sysFSMOVESELF == sysFSMOVESELF || mask&sysFSMOVEDFROM == sysFSMOVEDFROM { + e.Op |= Rename + } + return e +} + +const ( + opAddWatch = iota + opRemoveWatch +) + +const ( + provisional uint64 = 1 << (32 + iota) +) + +type input struct { + op int + path string + flags uint32 + bufsize int + reply chan error +} + +type inode struct { + handle windows.Handle + volume uint32 + index uint64 +} + +type watch struct { + ov windows.Overlapped + ino *inode // i-number + recurse bool // Recursive watch? + path string // Directory path + mask uint64 // Directory itself is being watched with these notify flags + names map[string]uint64 // Map of names being watched and their notify flags + rename string // Remembers the old name while renaming a file + buf []byte // buffer, allocated later +} + +type ( + indexMap map[uint64]*watch + watchMap map[uint32]indexMap +) + +func (w *readDirChangesW) wakeupReader() error { + err := windows.PostQueuedCompletionStatus(w.port, 0, 0, nil) + if err != nil { + return os.NewSyscallError("PostQueuedCompletionStatus", err) + } + return nil +} + +func (w *readDirChangesW) getDir(pathname string) (dir string, err error) { + attr, err := windows.GetFileAttributes(windows.StringToUTF16Ptr(pathname)) + if err != nil { + return "", os.NewSyscallError("GetFileAttributes", err) + } + if attr&windows.FILE_ATTRIBUTE_DIRECTORY != 0 { + dir = pathname + } else { + dir, _ = filepath.Split(pathname) + dir = filepath.Clean(dir) + } + return +} + +func (w *readDirChangesW) getIno(path string) (ino *inode, err error) { + h, err := windows.CreateFile(windows.StringToUTF16Ptr(path), + windows.FILE_LIST_DIRECTORY, + windows.FILE_SHARE_READ|windows.FILE_SHARE_WRITE|windows.FILE_SHARE_DELETE, + nil, windows.OPEN_EXISTING, + windows.FILE_FLAG_BACKUP_SEMANTICS|windows.FILE_FLAG_OVERLAPPED, 0) + if err != nil { + return nil, os.NewSyscallError("CreateFile", err) + } + + var fi windows.ByHandleFileInformation + err = windows.GetFileInformationByHandle(h, &fi) + if err != nil { + windows.CloseHandle(h) + return nil, os.NewSyscallError("GetFileInformationByHandle", err) + } + ino = &inode{ + handle: h, + volume: fi.VolumeSerialNumber, + index: uint64(fi.FileIndexHigh)<<32 | uint64(fi.FileIndexLow), + } + return ino, nil +} + +// Must run within the I/O thread. +func (m watchMap) get(ino *inode) *watch { + if i := m[ino.volume]; i != nil { + return i[ino.index] + } + return nil +} + +// Must run within the I/O thread. +func (m watchMap) set(ino *inode, watch *watch) { + i := m[ino.volume] + if i == nil { + i = make(indexMap) + m[ino.volume] = i + } + i[ino.index] = watch +} + +// Must run within the I/O thread. +func (w *readDirChangesW) addWatch(pathname string, flags uint64, bufsize int) error { + pathname, recurse := recursivePath(pathname) + + dir, err := w.getDir(pathname) + if err != nil { + return err + } + + ino, err := w.getIno(dir) + if err != nil { + return err + } + w.mu.Lock() + watchEntry := w.watches.get(ino) + w.mu.Unlock() + if watchEntry == nil { + _, err := windows.CreateIoCompletionPort(ino.handle, w.port, 0, 0) + if err != nil { + windows.CloseHandle(ino.handle) + return os.NewSyscallError("CreateIoCompletionPort", err) + } + watchEntry = &watch{ + ino: ino, + path: dir, + names: make(map[string]uint64), + recurse: recurse, + buf: make([]byte, bufsize), + } + w.mu.Lock() + w.watches.set(ino, watchEntry) + w.mu.Unlock() + flags |= provisional + } else { + windows.CloseHandle(ino.handle) + } + if pathname == dir { + watchEntry.mask |= flags + } else { + watchEntry.names[filepath.Base(pathname)] |= flags + } + + err = w.startRead(watchEntry) + if err != nil { + return err + } + + if pathname == dir { + watchEntry.mask &= ^provisional + } else { + watchEntry.names[filepath.Base(pathname)] &= ^provisional + } + return nil +} + +// Must run within the I/O thread. +func (w *readDirChangesW) remWatch(pathname string) error { + pathname, recurse := recursivePath(pathname) + + dir, err := w.getDir(pathname) + if err != nil { + return err + } + ino, err := w.getIno(dir) + if err != nil { + return err + } + + w.mu.Lock() + watch := w.watches.get(ino) + w.mu.Unlock() + + if recurse && !watch.recurse { + return fmt.Errorf("can't use \\... with non-recursive watch %q", pathname) + } + + err = windows.CloseHandle(ino.handle) + if err != nil { + w.sendError(os.NewSyscallError("CloseHandle", err)) + } + if watch == nil { + return fmt.Errorf("%w: %s", ErrNonExistentWatch, pathname) + } + if pathname == dir { + w.sendEvent(watch.path, "", watch.mask&sysFSIGNORED) + watch.mask = 0 + } else { + name := filepath.Base(pathname) + w.sendEvent(filepath.Join(watch.path, name), "", watch.names[name]&sysFSIGNORED) + delete(watch.names, name) + } + + return w.startRead(watch) +} + +// Must run within the I/O thread. +func (w *readDirChangesW) deleteWatch(watch *watch) { + for name, mask := range watch.names { + if mask&provisional == 0 { + w.sendEvent(filepath.Join(watch.path, name), "", mask&sysFSIGNORED) + } + delete(watch.names, name) + } + if watch.mask != 0 { + if watch.mask&provisional == 0 { + w.sendEvent(watch.path, "", watch.mask&sysFSIGNORED) + } + watch.mask = 0 + } +} + +// Must run within the I/O thread. +func (w *readDirChangesW) startRead(watch *watch) error { + err := windows.CancelIo(watch.ino.handle) + if err != nil { + w.sendError(os.NewSyscallError("CancelIo", err)) + w.deleteWatch(watch) + } + mask := w.toWindowsFlags(watch.mask) + for _, m := range watch.names { + mask |= w.toWindowsFlags(m) + } + if mask == 0 { + err := windows.CloseHandle(watch.ino.handle) + if err != nil { + w.sendError(os.NewSyscallError("CloseHandle", err)) + } + w.mu.Lock() + delete(w.watches[watch.ino.volume], watch.ino.index) + w.mu.Unlock() + return nil + } + + // We need to pass the array, rather than the slice. + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&watch.buf)) + rdErr := windows.ReadDirectoryChanges(watch.ino.handle, + (*byte)(unsafe.Pointer(hdr.Data)), uint32(hdr.Len), + watch.recurse, mask, nil, &watch.ov, 0) + if rdErr != nil { + err := os.NewSyscallError("ReadDirectoryChanges", rdErr) + if rdErr == windows.ERROR_ACCESS_DENIED && watch.mask&provisional == 0 { + // Watched directory was probably removed + w.sendEvent(watch.path, "", watch.mask&sysFSDELETESELF) + err = nil + } + w.deleteWatch(watch) + w.startRead(watch) + return err + } + return nil +} + +// readEvents reads from the I/O completion port, converts the +// received events into Event objects and sends them via the Events channel. +// Entry point to the I/O thread. +func (w *readDirChangesW) readEvents() { + var ( + n uint32 + key uintptr + ov *windows.Overlapped + ) + runtime.LockOSThread() + + for { + // This error is handled after the watch == nil check below. + qErr := windows.GetQueuedCompletionStatus(w.port, &n, &key, &ov, windows.INFINITE) + + watch := (*watch)(unsafe.Pointer(ov)) + if watch == nil { + select { + case ch := <-w.done: + w.mu.Lock() + var indexes []indexMap + for _, index := range w.watches { + indexes = append(indexes, index) + } + w.mu.Unlock() + for _, index := range indexes { + for _, watch := range index { + w.deleteWatch(watch) + w.startRead(watch) + } + } + + err := windows.CloseHandle(w.port) + if err != nil { + err = os.NewSyscallError("CloseHandle", err) + } + close(w.Events) + close(w.Errors) + ch <- err + return + case in := <-w.input: + switch in.op { + case opAddWatch: + in.reply <- w.addWatch(in.path, uint64(in.flags), in.bufsize) + case opRemoveWatch: + in.reply <- w.remWatch(in.path) + } + default: + } + continue + } + + switch qErr { + case nil: + // No error + case windows.ERROR_MORE_DATA: + if watch == nil { + w.sendError(errors.New("ERROR_MORE_DATA has unexpectedly null lpOverlapped buffer")) + } else { + // The i/o succeeded but the buffer is full. + // In theory we should be building up a full packet. + // In practice we can get away with just carrying on. + n = uint32(unsafe.Sizeof(watch.buf)) + } + case windows.ERROR_ACCESS_DENIED: + // Watched directory was probably removed + w.sendEvent(watch.path, "", watch.mask&sysFSDELETESELF) + w.deleteWatch(watch) + w.startRead(watch) + continue + case windows.ERROR_OPERATION_ABORTED: + // CancelIo was called on this handle + continue + default: + w.sendError(os.NewSyscallError("GetQueuedCompletionPort", qErr)) + continue + } + + var offset uint32 + for { + if n == 0 { + w.sendError(ErrEventOverflow) + break + } + + // Point "raw" to the event in the buffer + raw := (*windows.FileNotifyInformation)(unsafe.Pointer(&watch.buf[offset])) + + // Create a buf that is the size of the path name + size := int(raw.FileNameLength / 2) + var buf []uint16 + // TODO: Use unsafe.Slice in Go 1.17; https://stackoverflow.com/questions/51187973 + sh := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + sh.Data = uintptr(unsafe.Pointer(&raw.FileName)) + sh.Len = size + sh.Cap = size + name := windows.UTF16ToString(buf) + fullname := filepath.Join(watch.path, name) + + if debug { + internal.Debug(fullname, raw.Action) + } + + var mask uint64 + switch raw.Action { + case windows.FILE_ACTION_REMOVED: + mask = sysFSDELETESELF + case windows.FILE_ACTION_MODIFIED: + mask = sysFSMODIFY + case windows.FILE_ACTION_RENAMED_OLD_NAME: + watch.rename = name + case windows.FILE_ACTION_RENAMED_NEW_NAME: + // Update saved path of all sub-watches. + old := filepath.Join(watch.path, watch.rename) + w.mu.Lock() + for _, watchMap := range w.watches { + for _, ww := range watchMap { + if strings.HasPrefix(ww.path, old) { + ww.path = filepath.Join(fullname, strings.TrimPrefix(ww.path, old)) + } + } + } + w.mu.Unlock() + + if watch.names[watch.rename] != 0 { + watch.names[name] |= watch.names[watch.rename] + delete(watch.names, watch.rename) + mask = sysFSMOVESELF + } + } + + if raw.Action != windows.FILE_ACTION_RENAMED_NEW_NAME { + w.sendEvent(fullname, "", watch.names[name]&mask) + } + if raw.Action == windows.FILE_ACTION_REMOVED { + w.sendEvent(fullname, "", watch.names[name]&sysFSIGNORED) + delete(watch.names, name) + } + + if watch.rename != "" && raw.Action == windows.FILE_ACTION_RENAMED_NEW_NAME { + w.sendEvent(fullname, filepath.Join(watch.path, watch.rename), watch.mask&w.toFSnotifyFlags(raw.Action)) + } else { + w.sendEvent(fullname, "", watch.mask&w.toFSnotifyFlags(raw.Action)) + } + + if raw.Action == windows.FILE_ACTION_RENAMED_NEW_NAME { + w.sendEvent(filepath.Join(watch.path, watch.rename), "", watch.names[name]&mask) + } + + // Move to the next event in the buffer + if raw.NextEntryOffset == 0 { + break + } + offset += raw.NextEntryOffset + + // Error! + if offset >= n { + //lint:ignore ST1005 Windows should be capitalized + w.sendError(errors.New("Windows system assumed buffer larger than it is, events have likely been missed")) + break + } + } + + if err := w.startRead(watch); err != nil { + w.sendError(err) + } + } +} + +func (w *readDirChangesW) toWindowsFlags(mask uint64) uint32 { + var m uint32 + if mask&sysFSMODIFY != 0 { + m |= windows.FILE_NOTIFY_CHANGE_LAST_WRITE + } + if mask&(sysFSMOVE|sysFSCREATE|sysFSDELETE) != 0 { + m |= windows.FILE_NOTIFY_CHANGE_FILE_NAME | windows.FILE_NOTIFY_CHANGE_DIR_NAME + } + return m +} + +func (w *readDirChangesW) toFSnotifyFlags(action uint32) uint64 { + switch action { + case windows.FILE_ACTION_ADDED: + return sysFSCREATE + case windows.FILE_ACTION_REMOVED: + return sysFSDELETE + case windows.FILE_ACTION_MODIFIED: + return sysFSMODIFY + case windows.FILE_ACTION_RENAMED_OLD_NAME: + return sysFSMOVEDFROM + case windows.FILE_ACTION_RENAMED_NEW_NAME: + return sysFSMOVEDTO + } + return 0 +} + +func (w *readDirChangesW) xSupports(op Op) bool { + if op.Has(xUnportableOpen) || op.Has(xUnportableRead) || + op.Has(xUnportableCloseWrite) || op.Has(xUnportableCloseRead) { + return false + } + return true +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/fsnotify.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/fsnotify.go new file mode 100644 index 000000000..f64be4bf9 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/fsnotify.go @@ -0,0 +1,496 @@ +// Package fsnotify provides a cross-platform interface for file system +// notifications. +// +// Currently supported systems: +// +// - Linux via inotify +// - BSD, macOS via kqueue +// - Windows via ReadDirectoryChangesW +// - illumos via FEN +// +// # FSNOTIFY_DEBUG +// +// Set the FSNOTIFY_DEBUG environment variable to "1" to print debug messages to +// stderr. This can be useful to track down some problems, especially in cases +// where fsnotify is used as an indirect dependency. +// +// Every event will be printed as soon as there's something useful to print, +// with as little processing from fsnotify. +// +// Example output: +// +// FSNOTIFY_DEBUG: 11:34:23.633087586 256:IN_CREATE → "/tmp/file-1" +// FSNOTIFY_DEBUG: 11:34:23.633202319 4:IN_ATTRIB → "/tmp/file-1" +// FSNOTIFY_DEBUG: 11:34:28.989728764 512:IN_DELETE → "/tmp/file-1" +package fsnotify + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" +) + +// Watcher watches a set of paths, delivering events on a channel. +// +// A watcher should not be copied (e.g. pass it by pointer, rather than by +// value). +// +// # Linux notes +// +// When a file is removed a Remove event won't be emitted until all file +// descriptors are closed, and deletes will always emit a Chmod. For example: +// +// fp := os.Open("file") +// os.Remove("file") // Triggers Chmod +// fp.Close() // Triggers Remove +// +// This is the event that inotify sends, so not much can be changed about this. +// +// The fs.inotify.max_user_watches sysctl variable specifies the upper limit +// for the number of watches per user, and fs.inotify.max_user_instances +// specifies the maximum number of inotify instances per user. Every Watcher you +// create is an "instance", and every path you add is a "watch". +// +// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and +// /proc/sys/fs/inotify/max_user_instances +// +// To increase them you can use sysctl or write the value to the /proc file: +// +// # Default values on Linux 5.18 +// sysctl fs.inotify.max_user_watches=124983 +// sysctl fs.inotify.max_user_instances=128 +// +// To make the changes persist on reboot edit /etc/sysctl.conf or +// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check +// your distro's documentation): +// +// fs.inotify.max_user_watches=124983 +// fs.inotify.max_user_instances=128 +// +// Reaching the limit will result in a "no space left on device" or "too many open +// files" error. +// +// # kqueue notes (macOS, BSD) +// +// kqueue requires opening a file descriptor for every file that's being watched; +// so if you're watching a directory with five files then that's six file +// descriptors. You will run in to your system's "max open files" limit faster on +// these platforms. +// +// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to +// control the maximum number of open files, as well as /etc/login.conf on BSD +// systems. +// +// # Windows notes +// +// Paths can be added as "C:\\path\\to\\dir", but forward slashes +// ("C:/path/to/dir") will also work. +// +// When a watched directory is removed it will always send an event for the +// directory itself, but may not send events for all files in that directory. +// Sometimes it will send events for all files, sometimes it will send no +// events, and often only for some files. +// +// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest +// value that is guaranteed to work with SMB filesystems. If you have many +// events in quick succession this may not be enough, and you will have to use +// [WithBufferSize] to increase the value. +type Watcher struct { + b backend + + // Events sends the filesystem change events. + // + // fsnotify can send the following events; a "path" here can refer to a + // file, directory, symbolic link, or special file like a FIFO. + // + // fsnotify.Create A new path was created; this may be followed by one + // or more Write events if data also gets written to a + // file. + // + // fsnotify.Remove A path was removed. + // + // fsnotify.Rename A path was renamed. A rename is always sent with the + // old path as Event.Name, and a Create event will be + // sent with the new name. Renames are only sent for + // paths that are currently watched; e.g. moving an + // unmonitored file into a monitored directory will + // show up as just a Create. Similarly, renaming a file + // to outside a monitored directory will show up as + // only a Rename. + // + // fsnotify.Write A file or named pipe was written to. A Truncate will + // also trigger a Write. A single "write action" + // initiated by the user may show up as one or multiple + // writes, depending on when the system syncs things to + // disk. For example when compiling a large Go program + // you may get hundreds of Write events, and you may + // want to wait until you've stopped receiving them + // (see the dedup example in cmd/fsnotify). + // + // Some systems may send Write event for directories + // when the directory content changes. + // + // fsnotify.Chmod Attributes were changed. On Linux this is also sent + // when a file is removed (or more accurately, when a + // link to an inode is removed). On kqueue it's sent + // when a file is truncated. On Windows it's never + // sent. + Events chan Event + + // Errors sends any errors. + Errors chan error +} + +// Event represents a file system notification. +type Event struct { + // Path to the file or directory. + // + // Paths are relative to the input; for example with Add("dir") the Name + // will be set to "dir/file" if you create that file, but if you use + // Add("/path/to/dir") it will be "/path/to/dir/file". + Name string + + // File operation that triggered the event. + // + // This is a bitmask and some systems may send multiple operations at once. + // Use the Event.Has() method instead of comparing with ==. + Op Op + + // Create events will have this set to the old path if it's a rename. This + // only works when both the source and destination are watched. It's not + // reliable when watching individual files, only directories. + // + // For example "mv /tmp/file /tmp/rename" will emit: + // + // Event{Op: Rename, Name: "/tmp/file"} + // Event{Op: Create, Name: "/tmp/rename", RenamedFrom: "/tmp/file"} + renamedFrom string +} + +// Op describes a set of file operations. +type Op uint32 + +// The operations fsnotify can trigger; see the documentation on [Watcher] for a +// full description, and check them with [Event.Has]. +const ( + // A new pathname was created. + Create Op = 1 << iota + + // The pathname was written to; this does *not* mean the write has finished, + // and a write can be followed by more writes. + Write + + // The path was removed; any watches on it will be removed. Some "remove" + // operations may trigger a Rename if the file is actually moved (for + // example "remove to trash" is often a rename). + Remove + + // The path was renamed to something else; any watches on it will be + // removed. + Rename + + // File attributes were changed. + // + // It's generally not recommended to take action on this event, as it may + // get triggered very frequently by some software. For example, Spotlight + // indexing on macOS, anti-virus software, backup software, etc. + Chmod + + // File descriptor was opened. + // + // Only works on Linux and FreeBSD. + xUnportableOpen + + // File was read from. + // + // Only works on Linux and FreeBSD. + xUnportableRead + + // File opened for writing was closed. + // + // Only works on Linux and FreeBSD. + // + // The advantage of using this over Write is that it's more reliable than + // waiting for Write events to stop. It's also faster (if you're not + // listening to Write events): copying a file of a few GB can easily + // generate tens of thousands of Write events in a short span of time. + xUnportableCloseWrite + + // File opened for reading was closed. + // + // Only works on Linux and FreeBSD. + xUnportableCloseRead +) + +var ( + // ErrNonExistentWatch is used when Remove() is called on a path that's not + // added. + ErrNonExistentWatch = errors.New("fsnotify: can't remove non-existent watch") + + // ErrClosed is used when trying to operate on a closed Watcher. + ErrClosed = errors.New("fsnotify: watcher already closed") + + // ErrEventOverflow is reported from the Errors channel when there are too + // many events: + // + // - inotify: inotify returns IN_Q_OVERFLOW – because there are too + // many queued events (the fs.inotify.max_queued_events + // sysctl can be used to increase this). + // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. + // - kqueue, fen: Not used. + ErrEventOverflow = errors.New("fsnotify: queue or buffer overflow") + + // ErrUnsupported is returned by AddWith() when WithOps() specified an + // Unportable event that's not supported on this platform. + //lint:ignore ST1012 not relevant + xErrUnsupported = errors.New("fsnotify: not supported with this backend") +) + +// NewWatcher creates a new Watcher. +func NewWatcher() (*Watcher, error) { + ev, errs := make(chan Event, defaultBufferSize), make(chan error) + b, err := newBackend(ev, errs) + if err != nil { + return nil, err + } + return &Watcher{b: b, Events: ev, Errors: errs}, nil +} + +// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events +// channel. +// +// The main use case for this is situations with a very large number of events +// where the kernel buffer size can't be increased (e.g. due to lack of +// permissions). An unbuffered Watcher will perform better for almost all use +// cases, and whenever possible you will be better off increasing the kernel +// buffers instead of adding a large userspace buffer. +func NewBufferedWatcher(sz uint) (*Watcher, error) { + ev, errs := make(chan Event, sz), make(chan error) + b, err := newBackend(ev, errs) + if err != nil { + return nil, err + } + return &Watcher{b: b, Events: ev, Errors: errs}, nil +} + +// Add starts monitoring the path for changes. +// +// A path can only be watched once; watching it more than once is a no-op and will +// not return an error. Paths that do not yet exist on the filesystem cannot be +// watched. +// +// A watch will be automatically removed if the watched path is deleted or +// renamed. The exception is the Windows backend, which doesn't remove the +// watcher on renames. +// +// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special +// filesystems (/proc, /sys, etc.) generally don't work. +// +// Returns [ErrClosed] if [Watcher.Close] was called. +// +// See [Watcher.AddWith] for a version that allows adding options. +// +// # Watching directories +// +// All files in a directory are monitored, including new files that are created +// after the watcher is started. Subdirectories are not watched (i.e. it's +// non-recursive). +// +// # Watching files +// +// Watching individual files (rather than directories) is generally not +// recommended as many programs (especially editors) update files atomically: it +// will write to a temporary file which is then moved to destination, +// overwriting the original (or some variant thereof). The watcher on the +// original file is now lost, as that no longer exists. +// +// The upshot of this is that a power failure or crash won't leave a +// half-written file. +// +// Watch the parent directory and use Event.Name to filter out files you're not +// interested in. There is an example of this in cmd/fsnotify/file.go. +func (w *Watcher) Add(path string) error { return w.b.Add(path) } + +// AddWith is like [Watcher.Add], but allows adding options. When using Add() +// the defaults described below are used. +// +// Possible options are: +// +// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on +// other platforms. The default is 64K (65536 bytes). +func (w *Watcher) AddWith(path string, opts ...addOpt) error { return w.b.AddWith(path, opts...) } + +// Remove stops monitoring the path for changes. +// +// Directories are always removed non-recursively. For example, if you added +// /tmp/dir and /tmp/dir/subdir then you will need to remove both. +// +// Removing a path that has not yet been added returns [ErrNonExistentWatch]. +// +// Returns nil if [Watcher.Close] was called. +func (w *Watcher) Remove(path string) error { return w.b.Remove(path) } + +// Close removes all watches and closes the Events channel. +func (w *Watcher) Close() error { return w.b.Close() } + +// WatchList returns all paths explicitly added with [Watcher.Add] (and are not +// yet removed). +// +// The order is undefined, and may differ per call. Returns nil if +// [Watcher.Close] was called. +func (w *Watcher) WatchList() []string { return w.b.WatchList() } + +// Supports reports if all the listed operations are supported by this platform. +// +// Create, Write, Remove, Rename, and Chmod are always supported. It can only +// return false for an Op starting with Unportable. +func (w *Watcher) xSupports(op Op) bool { return w.b.xSupports(op) } + +func (o Op) String() string { + var b strings.Builder + if o.Has(Create) { + b.WriteString("|CREATE") + } + if o.Has(Remove) { + b.WriteString("|REMOVE") + } + if o.Has(Write) { + b.WriteString("|WRITE") + } + if o.Has(xUnportableOpen) { + b.WriteString("|OPEN") + } + if o.Has(xUnportableRead) { + b.WriteString("|READ") + } + if o.Has(xUnportableCloseWrite) { + b.WriteString("|CLOSE_WRITE") + } + if o.Has(xUnportableCloseRead) { + b.WriteString("|CLOSE_READ") + } + if o.Has(Rename) { + b.WriteString("|RENAME") + } + if o.Has(Chmod) { + b.WriteString("|CHMOD") + } + if b.Len() == 0 { + return "[no events]" + } + return b.String()[1:] +} + +// Has reports if this operation has the given operation. +func (o Op) Has(h Op) bool { return o&h != 0 } + +// Has reports if this event has the given operation. +func (e Event) Has(op Op) bool { return e.Op.Has(op) } + +// String returns a string representation of the event with their path. +func (e Event) String() string { + if e.renamedFrom != "" { + return fmt.Sprintf("%-13s %q ← %q", e.Op.String(), e.Name, e.renamedFrom) + } + return fmt.Sprintf("%-13s %q", e.Op.String(), e.Name) +} + +type ( + backend interface { + Add(string) error + AddWith(string, ...addOpt) error + Remove(string) error + WatchList() []string + Close() error + xSupports(Op) bool + } + addOpt func(opt *withOpts) + withOpts struct { + bufsize int + op Op + noFollow bool + sendCreate bool + } +) + +var debug = func() bool { + // Check for exactly "1" (rather than mere existence) so we can add + // options/flags in the future. I don't know if we ever want that, but it's + // nice to leave the option open. + return os.Getenv("FSNOTIFY_DEBUG") == "1" +}() + +var defaultOpts = withOpts{ + bufsize: 65536, // 64K + op: Create | Write | Remove | Rename | Chmod, +} + +func getOptions(opts ...addOpt) withOpts { + with := defaultOpts + for _, o := range opts { + if o != nil { + o(&with) + } + } + return with +} + +// WithBufferSize sets the [ReadDirectoryChangesW] buffer size. +// +// This only has effect on Windows systems, and is a no-op for other backends. +// +// The default value is 64K (65536 bytes) which is the highest value that works +// on all filesystems and should be enough for most applications, but if you +// have a large burst of events it may not be enough. You can increase it if +// you're hitting "queue or buffer overflow" errors ([ErrEventOverflow]). +// +// [ReadDirectoryChangesW]: https://learn.microsoft.com/en-gb/windows/win32/api/winbase/nf-winbase-readdirectorychangesw +func WithBufferSize(bytes int) addOpt { + return func(opt *withOpts) { opt.bufsize = bytes } +} + +// WithOps sets which operations to listen for. The default is [Create], +// [Write], [Remove], [Rename], and [Chmod]. +// +// Excluding operations you're not interested in can save quite a bit of CPU +// time; in some use cases there may be hundreds of thousands of useless Write +// or Chmod operations per second. +// +// This can also be used to add unportable operations not supported by all +// platforms; unportable operations all start with "Unportable": +// [UnportableOpen], [UnportableRead], [UnportableCloseWrite], and +// [UnportableCloseRead]. +// +// AddWith returns an error when using an unportable operation that's not +// supported. Use [Watcher.Support] to check for support. +func withOps(op Op) addOpt { + return func(opt *withOpts) { opt.op = op } +} + +// WithNoFollow disables following symlinks, so the symlinks themselves are +// watched. +func withNoFollow() addOpt { + return func(opt *withOpts) { opt.noFollow = true } +} + +// "Internal" option for recursive watches on inotify. +func withCreate() addOpt { + return func(opt *withOpts) { opt.sendCreate = true } +} + +var enableRecurse = false + +// Check if this path is recursive (ends with "/..." or "\..."), and return the +// path with the /... stripped. +func recursivePath(path string) (string, bool) { + path = filepath.Clean(path) + if !enableRecurse { // Only enabled in tests for now. + return path, false + } + if filepath.Base(path) == "..." { + return filepath.Dir(path), true + } + return path, false +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/darwin.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/darwin.go new file mode 100644 index 000000000..0b01bc182 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/darwin.go @@ -0,0 +1,39 @@ +//go:build darwin + +package internal + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + ErrSyscallEACCES = syscall.EACCES + ErrUnixEACCES = unix.EACCES +) + +var maxfiles uint64 + +func SetRlimit() { + // Go 1.19 will do this automatically: https://go-review.googlesource.com/c/go/+/393354/ + var l syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + if err == nil && l.Cur != l.Max { + l.Cur = l.Max + syscall.Setrlimit(syscall.RLIMIT_NOFILE, &l) + } + maxfiles = l.Cur + + if n, err := syscall.SysctlUint32("kern.maxfiles"); err == nil && uint64(n) < maxfiles { + maxfiles = uint64(n) + } + + if n, err := syscall.SysctlUint32("kern.maxfilesperproc"); err == nil && uint64(n) < maxfiles { + maxfiles = uint64(n) + } +} + +func Maxfiles() uint64 { return maxfiles } +func Mkfifo(path string, mode uint32) error { return unix.Mkfifo(path, mode) } +func Mknod(path string, mode uint32, dev int) error { return unix.Mknod(path, mode, dev) } diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_darwin.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_darwin.go new file mode 100644 index 000000000..928319fb0 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_darwin.go @@ -0,0 +1,57 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ABSOLUTE", unix.NOTE_ABSOLUTE}, + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_BACKGROUND", unix.NOTE_BACKGROUND}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_CRITICAL", unix.NOTE_CRITICAL}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXITSTATUS", unix.NOTE_EXITSTATUS}, + {"NOTE_EXIT_CSERROR", unix.NOTE_EXIT_CSERROR}, + {"NOTE_EXIT_DECRYPTFAIL", unix.NOTE_EXIT_DECRYPTFAIL}, + {"NOTE_EXIT_DETAIL", unix.NOTE_EXIT_DETAIL}, + {"NOTE_EXIT_DETAIL_MASK", unix.NOTE_EXIT_DETAIL_MASK}, + {"NOTE_EXIT_MEMORY", unix.NOTE_EXIT_MEMORY}, + {"NOTE_EXIT_REPARENTED", unix.NOTE_EXIT_REPARENTED}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FFAND", unix.NOTE_FFAND}, + {"NOTE_FFCOPY", unix.NOTE_FFCOPY}, + {"NOTE_FFCTRLMASK", unix.NOTE_FFCTRLMASK}, + {"NOTE_FFLAGSMASK", unix.NOTE_FFLAGSMASK}, + {"NOTE_FFNOP", unix.NOTE_FFNOP}, + {"NOTE_FFOR", unix.NOTE_FFOR}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_FUNLOCK", unix.NOTE_FUNLOCK}, + {"NOTE_LEEWAY", unix.NOTE_LEEWAY}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_MACHTIME", unix.NOTE_MACHTIME}, + {"NOTE_MACH_CONTINUOUS_TIME", unix.NOTE_MACH_CONTINUOUS_TIME}, + {"NOTE_NONE", unix.NOTE_NONE}, + {"NOTE_NSECONDS", unix.NOTE_NSECONDS}, + {"NOTE_OOB", unix.NOTE_OOB}, + //{"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, -0x100000 (?!) + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_REAP", unix.NOTE_REAP}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_SECONDS", unix.NOTE_SECONDS}, + {"NOTE_SIGNAL", unix.NOTE_SIGNAL}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRIGGER", unix.NOTE_TRIGGER}, + {"NOTE_USECONDS", unix.NOTE_USECONDS}, + {"NOTE_VM_ERROR", unix.NOTE_VM_ERROR}, + {"NOTE_VM_PRESSURE", unix.NOTE_VM_PRESSURE}, + {"NOTE_VM_PRESSURE_SUDDEN_TERMINATE", unix.NOTE_VM_PRESSURE_SUDDEN_TERMINATE}, + {"NOTE_VM_PRESSURE_TERMINATE", unix.NOTE_VM_PRESSURE_TERMINATE}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_dragonfly.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_dragonfly.go new file mode 100644 index 000000000..3186b0c34 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_dragonfly.go @@ -0,0 +1,33 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FFAND", unix.NOTE_FFAND}, + {"NOTE_FFCOPY", unix.NOTE_FFCOPY}, + {"NOTE_FFCTRLMASK", unix.NOTE_FFCTRLMASK}, + {"NOTE_FFLAGSMASK", unix.NOTE_FFLAGSMASK}, + {"NOTE_FFNOP", unix.NOTE_FFNOP}, + {"NOTE_FFOR", unix.NOTE_FFOR}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_OOB", unix.NOTE_OOB}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRIGGER", unix.NOTE_TRIGGER}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_freebsd.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_freebsd.go new file mode 100644 index 000000000..f69fdb930 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_freebsd.go @@ -0,0 +1,42 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ABSTIME", unix.NOTE_ABSTIME}, + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_CLOSE", unix.NOTE_CLOSE}, + {"NOTE_CLOSE_WRITE", unix.NOTE_CLOSE_WRITE}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FFAND", unix.NOTE_FFAND}, + {"NOTE_FFCOPY", unix.NOTE_FFCOPY}, + {"NOTE_FFCTRLMASK", unix.NOTE_FFCTRLMASK}, + {"NOTE_FFLAGSMASK", unix.NOTE_FFLAGSMASK}, + {"NOTE_FFNOP", unix.NOTE_FFNOP}, + {"NOTE_FFOR", unix.NOTE_FFOR}, + {"NOTE_FILE_POLL", unix.NOTE_FILE_POLL}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_MSECONDS", unix.NOTE_MSECONDS}, + {"NOTE_NSECONDS", unix.NOTE_NSECONDS}, + {"NOTE_OPEN", unix.NOTE_OPEN}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_READ", unix.NOTE_READ}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_SECONDS", unix.NOTE_SECONDS}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRIGGER", unix.NOTE_TRIGGER}, + {"NOTE_USECONDS", unix.NOTE_USECONDS}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_kqueue.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_kqueue.go new file mode 100644 index 000000000..607e683bd --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_kqueue.go @@ -0,0 +1,32 @@ +//go:build freebsd || openbsd || netbsd || dragonfly || darwin + +package internal + +import ( + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" +) + +func Debug(name string, kevent *unix.Kevent_t) { + mask := uint32(kevent.Fflags) + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %10d:%-60s → %q\n", + time.Now().Format("15:04:05.000000000"), mask, strings.Join(l, " | "), name) +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_linux.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_linux.go new file mode 100644 index 000000000..35c734be4 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_linux.go @@ -0,0 +1,56 @@ +package internal + +import ( + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" +) + +func Debug(name string, mask, cookie uint32) { + names := []struct { + n string + m uint32 + }{ + {"IN_ACCESS", unix.IN_ACCESS}, + {"IN_ATTRIB", unix.IN_ATTRIB}, + {"IN_CLOSE", unix.IN_CLOSE}, + {"IN_CLOSE_NOWRITE", unix.IN_CLOSE_NOWRITE}, + {"IN_CLOSE_WRITE", unix.IN_CLOSE_WRITE}, + {"IN_CREATE", unix.IN_CREATE}, + {"IN_DELETE", unix.IN_DELETE}, + {"IN_DELETE_SELF", unix.IN_DELETE_SELF}, + {"IN_IGNORED", unix.IN_IGNORED}, + {"IN_ISDIR", unix.IN_ISDIR}, + {"IN_MODIFY", unix.IN_MODIFY}, + {"IN_MOVE", unix.IN_MOVE}, + {"IN_MOVED_FROM", unix.IN_MOVED_FROM}, + {"IN_MOVED_TO", unix.IN_MOVED_TO}, + {"IN_MOVE_SELF", unix.IN_MOVE_SELF}, + {"IN_OPEN", unix.IN_OPEN}, + {"IN_Q_OVERFLOW", unix.IN_Q_OVERFLOW}, + {"IN_UNMOUNT", unix.IN_UNMOUNT}, + } + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + var c string + if cookie > 0 { + c = fmt.Sprintf("(cookie: %d) ", cookie) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %-30s → %s%q\n", + time.Now().Format("15:04:05.000000000"), strings.Join(l, "|"), c, name) +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_netbsd.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_netbsd.go new file mode 100644 index 000000000..e5b3b6f69 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_netbsd.go @@ -0,0 +1,25 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_openbsd.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_openbsd.go new file mode 100644 index 000000000..1dd455bc5 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_openbsd.go @@ -0,0 +1,28 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + // {"NOTE_CHANGE", unix.NOTE_CHANGE}, // Not on 386? + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EOF", unix.NOTE_EOF}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRUNCATE", unix.NOTE_TRUNCATE}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_solaris.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_solaris.go new file mode 100644 index 000000000..f1b2e73bd --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_solaris.go @@ -0,0 +1,45 @@ +package internal + +import ( + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" +) + +func Debug(name string, mask int32) { + names := []struct { + n string + m int32 + }{ + {"FILE_ACCESS", unix.FILE_ACCESS}, + {"FILE_MODIFIED", unix.FILE_MODIFIED}, + {"FILE_ATTRIB", unix.FILE_ATTRIB}, + {"FILE_TRUNC", unix.FILE_TRUNC}, + {"FILE_NOFOLLOW", unix.FILE_NOFOLLOW}, + {"FILE_DELETE", unix.FILE_DELETE}, + {"FILE_RENAME_TO", unix.FILE_RENAME_TO}, + {"FILE_RENAME_FROM", unix.FILE_RENAME_FROM}, + {"UNMOUNTED", unix.UNMOUNTED}, + {"MOUNTEDOVER", unix.MOUNTEDOVER}, + {"FILE_EXCEPTION", unix.FILE_EXCEPTION}, + } + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %10d:%-30s → %q\n", + time.Now().Format("15:04:05.000000000"), mask, strings.Join(l, " | "), name) +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_windows.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_windows.go new file mode 100644 index 000000000..52bf4ce53 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/debug_windows.go @@ -0,0 +1,40 @@ +package internal + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "golang.org/x/sys/windows" +) + +func Debug(name string, mask uint32) { + names := []struct { + n string + m uint32 + }{ + {"FILE_ACTION_ADDED", windows.FILE_ACTION_ADDED}, + {"FILE_ACTION_REMOVED", windows.FILE_ACTION_REMOVED}, + {"FILE_ACTION_MODIFIED", windows.FILE_ACTION_MODIFIED}, + {"FILE_ACTION_RENAMED_OLD_NAME", windows.FILE_ACTION_RENAMED_OLD_NAME}, + {"FILE_ACTION_RENAMED_NEW_NAME", windows.FILE_ACTION_RENAMED_NEW_NAME}, + } + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %-65s → %q\n", + time.Now().Format("15:04:05.000000000"), strings.Join(l, " | "), filepath.ToSlash(name)) +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/freebsd.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/freebsd.go new file mode 100644 index 000000000..5ac8b5079 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/freebsd.go @@ -0,0 +1,31 @@ +//go:build freebsd + +package internal + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + ErrSyscallEACCES = syscall.EACCES + ErrUnixEACCES = unix.EACCES +) + +var maxfiles uint64 + +func SetRlimit() { + // Go 1.19 will do this automatically: https://go-review.googlesource.com/c/go/+/393354/ + var l syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + if err == nil && l.Cur != l.Max { + l.Cur = l.Max + syscall.Setrlimit(syscall.RLIMIT_NOFILE, &l) + } + maxfiles = uint64(l.Cur) +} + +func Maxfiles() uint64 { return maxfiles } +func Mkfifo(path string, mode uint32) error { return unix.Mkfifo(path, mode) } +func Mknod(path string, mode uint32, dev int) error { return unix.Mknod(path, mode, uint64(dev)) } diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/internal.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/internal.go new file mode 100644 index 000000000..7daa45e19 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/internal.go @@ -0,0 +1,2 @@ +// Package internal contains some helpers. +package internal diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/unix.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/unix.go new file mode 100644 index 000000000..b251fb803 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/unix.go @@ -0,0 +1,31 @@ +//go:build !windows && !darwin && !freebsd && !plan9 + +package internal + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + ErrSyscallEACCES = syscall.EACCES + ErrUnixEACCES = unix.EACCES +) + +var maxfiles uint64 + +func SetRlimit() { + // Go 1.19 will do this automatically: https://go-review.googlesource.com/c/go/+/393354/ + var l syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + if err == nil && l.Cur != l.Max { + l.Cur = l.Max + syscall.Setrlimit(syscall.RLIMIT_NOFILE, &l) + } + maxfiles = uint64(l.Cur) +} + +func Maxfiles() uint64 { return maxfiles } +func Mkfifo(path string, mode uint32) error { return unix.Mkfifo(path, mode) } +func Mknod(path string, mode uint32, dev int) error { return unix.Mknod(path, mode, dev) } diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/unix2.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/unix2.go new file mode 100644 index 000000000..37dfeddc2 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/unix2.go @@ -0,0 +1,7 @@ +//go:build !windows + +package internal + +func HasPrivilegesForSymlink() bool { + return true +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/windows.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/windows.go new file mode 100644 index 000000000..896bc2e5a --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/internal/windows.go @@ -0,0 +1,41 @@ +//go:build windows + +package internal + +import ( + "errors" + + "golang.org/x/sys/windows" +) + +// Just a dummy. +var ( + ErrSyscallEACCES = errors.New("dummy") + ErrUnixEACCES = errors.New("dummy") +) + +func SetRlimit() {} +func Maxfiles() uint64 { return 1<<64 - 1 } +func Mkfifo(path string, mode uint32) error { return errors.New("no FIFOs on Windows") } +func Mknod(path string, mode uint32, dev int) error { return errors.New("no device nodes on Windows") } + +func HasPrivilegesForSymlink() bool { + var sid *windows.SID + err := windows.AllocateAndInitializeSid( + &windows.SECURITY_NT_AUTHORITY, + 2, + windows.SECURITY_BUILTIN_DOMAIN_RID, + windows.DOMAIN_ALIAS_RID_ADMINS, + 0, 0, 0, 0, 0, 0, + &sid) + if err != nil { + return false + } + defer windows.FreeSid(sid) + token := windows.Token(0) + member, err := token.IsMember(sid) + if err != nil { + return false + } + return member || token.IsElevated() +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/shared.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/shared.go new file mode 100644 index 000000000..3ee9b58f1 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/shared.go @@ -0,0 +1,64 @@ +package fsnotify + +import "sync" + +type shared struct { + Events chan Event + Errors chan error + done chan struct{} + mu sync.Mutex +} + +func newShared(ev chan Event, errs chan error) *shared { + return &shared{ + Events: ev, + Errors: errs, + done: make(chan struct{}), + } +} + +// Returns true if the event was sent, or false if watcher is closed. +func (w *shared) sendEvent(e Event) bool { + if e.Op == 0 { + return true + } + select { + case <-w.done: + return false + case w.Events <- e: + return true + } +} + +// Returns true if the error was sent, or false if watcher is closed. +func (w *shared) sendError(err error) bool { + if err == nil { + return true + } + select { + case <-w.done: + return false + case w.Errors <- err: + return true + } +} + +func (w *shared) isClosed() bool { + select { + case <-w.done: + return true + default: + return false + } +} + +// Mark as closed; returns true if it was already closed. +func (w *shared) close() bool { + w.mu.Lock() + defer w.mu.Unlock() + if w.isClosed() { + return true + } + close(w.done) + return false +} diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/staticcheck.conf b/src/yangerd/vendor/github.com/fsnotify/fsnotify/staticcheck.conf new file mode 100644 index 000000000..8fa7351f0 --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/staticcheck.conf @@ -0,0 +1,3 @@ +checks = ['all', + '-U1000', # Don't complain about unused functions. +] diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/system_bsd.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/system_bsd.go new file mode 100644 index 000000000..f65e8fe3e --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/system_bsd.go @@ -0,0 +1,7 @@ +//go:build freebsd || openbsd || netbsd || dragonfly + +package fsnotify + +import "golang.org/x/sys/unix" + +const openMode = unix.O_NONBLOCK | unix.O_RDONLY | unix.O_CLOEXEC diff --git a/src/yangerd/vendor/github.com/fsnotify/fsnotify/system_darwin.go b/src/yangerd/vendor/github.com/fsnotify/fsnotify/system_darwin.go new file mode 100644 index 000000000..a29fc7aab --- /dev/null +++ b/src/yangerd/vendor/github.com/fsnotify/fsnotify/system_darwin.go @@ -0,0 +1,8 @@ +//go:build darwin + +package fsnotify + +import "golang.org/x/sys/unix" + +// note: this constant is not defined on BSD +const openMode = unix.O_EVTONLY | unix.O_CLOEXEC diff --git a/src/yangerd/vendor/github.com/godbus/dbus/v5/.cirrus.yml b/src/yangerd/vendor/github.com/godbus/dbus/v5/.cirrus.yml new file mode 100644 index 000000000..6e2090296 --- /dev/null +++ b/src/yangerd/vendor/github.com/godbus/dbus/v5/.cirrus.yml @@ -0,0 +1,11 @@ +# See https://cirrus-ci.org/guide/FreeBSD/ +freebsd_instance: + image_family: freebsd-14-3 + +task: + name: Test on FreeBSD + install_script: pkg install -y go125 dbus + test_script: | + /usr/local/etc/rc.d/dbus onestart && \ + eval `dbus-launch --sh-syntax` && \ + go125 test -v ./... diff --git a/src/yangerd/vendor/github.com/godbus/dbus/v5/.golangci.yml b/src/yangerd/vendor/github.com/godbus/dbus/v5/.golangci.yml new file mode 100644 index 000000000..5bbdd9342 --- /dev/null +++ b/src/yangerd/vendor/github.com/godbus/dbus/v5/.golangci.yml @@ -0,0 +1,13 @@ +version: "2" + +linters: + enable: + - unconvert + - unparam + exclusions: + presets: + - std-error-handling + +formatters: + enable: + - gofumpt diff --git a/src/yangerd/vendor/github.com/godbus/dbus/v5/CONTRIBUTING.md b/src/yangerd/vendor/github.com/godbus/dbus/v5/CONTRIBUTING.md new file mode 100644 index 000000000..c88f9b2bd --- /dev/null +++ b/src/yangerd/vendor/github.com/godbus/dbus/v5/CONTRIBUTING.md @@ -0,0 +1,50 @@ +# How to Contribute + +## Getting Started + +- Fork the repository on GitHub +- Read the [README](README.markdown) for build and test instructions +- Play with the project, submit bugs, submit patches! + +## Contribution Flow + +This is a rough outline of what a contributor's workflow looks like: + +- Create a topic branch from where you want to base your work (usually master). +- Make commits of logical units. +- Make sure your commit messages are in the proper format (see below). +- Push your changes to a topic branch in your fork of the repository. +- Make sure the tests pass, and add any new tests as appropriate. +- Submit a pull request to the original repository. + +Thanks for your contributions! + +### Format of the Commit Message + +We follow a rough convention for commit messages that is designed to answer two +questions: what changed and why. The subject line should feature the what and +the body of the commit should describe the why. + +``` +scripts: add the test-cluster command + +this uses tmux to setup a test cluster that you can easily kill and +start for debugging. + +Fixes #38 +``` + +The format can be described more formally as follows: + +``` +: + + + +