Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ $(BUILD_DIR)/test-pthread: tests/test-pthread.c | $(BUILD_DIR)
@echo " CROSS $< (with -lpthread)"
$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -o $@ $< -lpthread

# test-scm-creds blocks accept in a pthread while the listener option changes.
$(BUILD_DIR)/test-scm-creds: tests/test-scm-creds.c | $(BUILD_DIR)
@echo " CROSS $< (with -lpthread)"
$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -o $@ $< -lpthread

# test-shim-cred-race spawns a pthread reader while the main thread
# toggles setresuid; the reader spins on the identity fast path.
$(BUILD_DIR)/test-shim-cred-race: tests/test-shim-cred-race.c | $(BUILD_DIR)
Expand Down
15 changes: 11 additions & 4 deletions src/syscall/abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,12 @@ enum {
SOCK_OPT_TCP_KEEPINTVL,
SOCK_OPT_IPV6_V6ONLY,
SOCK_OPT_PASSCRED,
SOCK_OPT_IP_TOS,
SOCK_OPT_IP_TTL,
SOCK_OPT_IP_HDRINCL,
SOCK_OPT_IP_PKTINFO,
SOCK_OPT_IP_RECVTTL,
SOCK_OPT_IP_RECVTOS,
/* IP_MTU_DISCOVER value stored verbatim so getsockopt round-trips the
* Linux PMTUD mode the guest set. The host accepts the value but does
* not honour every Linux mode; see sys_setsockopt for the IP_DONTFRAG
Expand All @@ -697,10 +703,11 @@ typedef struct {
} sock_opt_cache_t;

typedef struct {
int type; /* FD_CLOSED, FD_STDIO, FD_REGULAR, FD_DIR */
int host_fd; /* Underlying macOS file descriptor */
int linux_flags; /* Linux open flags (for CLOEXEC tracking) */
void *dir; /* DIR* for FD_DIR entries (NULL otherwise) */
int type; /* FD_CLOSED, FD_STDIO, FD_REGULAR, FD_DIR */
int host_fd; /* Underlying macOS file descriptor */
uint64_t generation; /* Bumped each time this guest fd slot is reused. */
int linux_flags; /* Linux open flags (for CLOEXEC tracking) */
void *dir; /* DIR* for FD_DIR entries (NULL otherwise) */
char proc_path[FD_VIRTUAL_PATH_MAX]; /* Virtual /proc dir root for *at */
int seals; /* F_SEAL_* bits (non-zero only for memfd_create fds) */
sock_opt_cache_t sock; /* Socket option cache (zeroed for non-sockets) */
Expand Down
15 changes: 12 additions & 3 deletions src/syscall/fdtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER; /* Lock order: 3 */

/* FD table. */
fd_entry_t fd_table[FD_TABLE_SIZE];
static uint64_t fd_next_generation = 1;

/* RLIMIT_NOFILE tracking. */
/* Guest-side soft limit for RLIMIT_NOFILE. fd_alloc checks this.
Expand Down Expand Up @@ -77,6 +78,7 @@ static inline void fd_init_entry(int fd,
fd_bitmap_set_used(fd);
fd_table[fd].type = type;
fd_table[fd].host_fd = host_fd;
fd_table[fd].generation = fd_next_generation++;
fd_table[fd].linux_flags = 0;
fd_table[fd].dir = NULL;
fd_table[fd].proc_path[0] = '\0';
Expand Down Expand Up @@ -154,9 +156,16 @@ void fdtable_init(void)
memset(fd_free_bitmap, 0xFF, sizeof(fd_free_bitmap));

/* Pre-open stdin/stdout/stderr */
fd_table[0] = (fd_entry_t) {.type = FD_STDIO, .host_fd = STDIN_FILENO};
fd_table[1] = (fd_entry_t) {.type = FD_STDIO, .host_fd = STDOUT_FILENO};
fd_table[2] = (fd_entry_t) {.type = FD_STDIO, .host_fd = STDERR_FILENO};
fd_next_generation = 1;
fd_table[0] = (fd_entry_t) {.type = FD_STDIO,
.host_fd = STDIN_FILENO,
.generation = fd_next_generation++};
fd_table[1] = (fd_entry_t) {.type = FD_STDIO,
.host_fd = STDOUT_FILENO,
.generation = fd_next_generation++};
fd_table[2] = (fd_entry_t) {.type = FD_STDIO,
.host_fd = STDERR_FILENO,
.generation = fd_next_generation++};
fd_bitmap_set_used(0);
fd_bitmap_set_used(1);
fd_bitmap_set_used(2);
Expand Down
26 changes: 25 additions & 1 deletion src/syscall/net-abi.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ int socket_small_int_normalize(int level, int optname, int value)
(optname == LINUX_SO_KEEPALIVE || optname == LINUX_SO_REUSEADDR ||
optname == LINUX_SO_ACCEPTCONN || optname == LINUX_SO_REUSEPORT ||
optname == LINUX_SO_BROADCAST || optname == LINUX_SO_DONTROUTE ||
optname == LINUX_SO_OOBINLINE)) ||
optname == LINUX_SO_OOBINLINE || optname == LINUX_SO_PASSCRED)) ||
(level == LINUX_IPPROTO_TCP && optname == LINUX_TCP_NODELAY) ||
(level == LINUX_IPPROTO_IP &&
(optname == LINUX_IP_HDRINCL || optname == LINUX_IP_PKTINFO ||
optname == LINUX_IP_RECVTTL || optname == LINUX_IP_RECVTOS)) ||
(level == LINUX_IPPROTO_IPV6 && optname == LINUX_IPV6_V6ONLY))
return value != 0;

Expand All @@ -46,6 +49,7 @@ int socket_opt_uses_small_int(int level, int optname)
case LINUX_SO_SNDBUF:
case LINUX_SO_TYPE:
case LINUX_SO_ERROR:
case LINUX_SO_PASSCRED:
return 1;
default:
return 0;
Expand All @@ -64,6 +68,20 @@ int socket_opt_uses_small_int(int level, int optname)
}
}

if (level == LINUX_IPPROTO_IP) {
switch (optname) {
case LINUX_IP_TOS:
case LINUX_IP_TTL:
case LINUX_IP_HDRINCL:
case LINUX_IP_PKTINFO:
case LINUX_IP_RECVTTL:
case LINUX_IP_RECVTOS:
return 1;
default:
return 0;
}
}

return level == LINUX_IPPROTO_IPV6 && optname == LINUX_IPV6_V6ONLY;
}

Expand Down Expand Up @@ -136,6 +154,12 @@ int translate_small_int_sockopt(int level,
}
}

if (level == LINUX_IPPROTO_IP) {
*mac_level = IPPROTO_IP;
*mac_optname = translate_ip_sockopt_to_mac(optname);
return *mac_optname >= 0;
}

if (level == LINUX_IPPROTO_IPV6 && optname == LINUX_IPV6_V6ONLY) {
*mac_level = IPPROTO_IPV6;
*mac_optname = IPV6_V6ONLY;
Expand Down
16 changes: 14 additions & 2 deletions src/syscall/net-msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@
/* Linux SCM_MAX_FD: maximum number of file descriptors in SCM_RIGHTS */
#define LINUX_SCM_MAX_FD 253

/* Linux only delivers SCM_CREDENTIALS on AF_UNIX sockets even when
* SO_PASSCRED is set, so PASSCRED toggled on AF_INET / AF_INET6 must
* stay a no-op.
*/
static bool host_socket_is_unix(int host_fd)
{
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
return getsockname(host_fd, (struct sockaddr *) &ss, &slen) == 0 &&
ss.ss_family == AF_UNIX;
}

static int translate_scm_rights_fds(int *fds, size_t nfds)
{
if (nfds > LINUX_SCM_MAX_FD)
Expand Down Expand Up @@ -597,7 +609,7 @@ int64_t sys_recvmsg(guest_t *g, int fd, uint64_t msg_gva, int flags)
int passcred_val = 0;
if (net_socket_cached_int_get(fd, LINUX_SOL_SOCKET, LINUX_SO_PASSCRED,
&passcred_val) &&
passcred_val) {
passcred_val && host_socket_is_unix(host_ref.fd)) {
linux_ucred_t cred = {
.pid = (int32_t) proc_get_pid(),
.uid = proc_get_uid(),
Expand Down Expand Up @@ -655,7 +667,7 @@ int64_t sys_recvmsg(guest_t *g, int fd, uint64_t msg_gva, int flags)
int injected = 0, passcred_val = 0;
if (net_socket_cached_int_get(fd, LINUX_SOL_SOCKET, LINUX_SO_PASSCRED,
&passcred_val) &&
passcred_val) {
passcred_val && host_socket_is_unix(host_ref.fd)) {
linux_ucred_t cred = {
.pid = (int32_t) proc_get_pid(),
.uid = proc_get_uid(),
Expand Down
61 changes: 57 additions & 4 deletions src/syscall/net-sockopt.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,26 @@ static int net_sock_opt_index_for(int level, int optname)
}
if (level == LINUX_IPPROTO_IPV6 && optname == LINUX_IPV6_V6ONLY)
return SOCK_OPT_IPV6_V6ONLY;
if (level == LINUX_IPPROTO_IP && optname == LINUX_IP_MTU_DISCOVER)
return SOCK_OPT_IP_MTU_DISCOVER;
if (level == LINUX_IPPROTO_IP) {
switch (optname) {
case LINUX_IP_TOS:
return SOCK_OPT_IP_TOS;
case LINUX_IP_TTL:
return SOCK_OPT_IP_TTL;
case LINUX_IP_HDRINCL:
return SOCK_OPT_IP_HDRINCL;
case LINUX_IP_PKTINFO:
return SOCK_OPT_IP_PKTINFO;
case LINUX_IP_RECVTTL:
return SOCK_OPT_IP_RECVTTL;
case LINUX_IP_RECVTOS:
return SOCK_OPT_IP_RECVTOS;
case LINUX_IP_MTU_DISCOVER:
return SOCK_OPT_IP_MTU_DISCOVER;
default:
return -1;
}
}
return -1;
}

Expand All @@ -140,6 +158,34 @@ int net_socket_cached_int_get(int guest_fd, int level, int optname, int *value)
return net_sock_cache_get(guest_fd, idx, value);
}

int net_socket_cached_int_get_if_generation(int guest_fd,
uint64_t generation,
int level,
int optname,
int *value)
{
if (level == LINUX_SOL_SOCKET && optname == LINUX_SO_ERROR)
return 0;

int idx = net_sock_opt_index_for(level, optname);
if (idx < 0 || !RANGE_CHECK(guest_fd, 0, FD_TABLE_SIZE) || !value)
return 0;

if (thread_is_single_active()) {
fd_entry_t *entry = &fd_table[guest_fd];
if (entry->type == FD_SOCKET && entry->generation == generation)
return sock_opt_get(entry, idx, value);
return 0;
}

pthread_mutex_lock(&fd_lock);
fd_entry_t *entry = &fd_table[guest_fd];
bool ok = entry->type == FD_SOCKET && entry->generation == generation &&
sock_opt_get(entry, idx, value);
pthread_mutex_unlock(&fd_lock);
return ok;
}

void net_socket_cached_int_set(int guest_fd, int level, int optname, int value)
{
if (level == LINUX_SOL_SOCKET && optname == LINUX_SO_ERROR)
Expand All @@ -155,7 +201,7 @@ void net_socket_cache_init_defaults(int guest_fd, int domain, int real_type)
static const int zero_opts[] = {
SOCK_OPT_KEEPALIVE, SOCK_OPT_REUSEADDR, SOCK_OPT_ACCEPTCONN,
SOCK_OPT_REUSEPORT, SOCK_OPT_BROADCAST, SOCK_OPT_DONTROUTE,
SOCK_OPT_OOBINLINE,
SOCK_OPT_OOBINLINE, SOCK_OPT_PASSCRED,
};

net_socket_cache_set_many_zero(guest_fd, zero_opts, ARRAY_SIZE(zero_opts));
Expand All @@ -168,12 +214,19 @@ void net_socket_cache_init_defaults(int guest_fd, int domain, int real_type)
net_socket_cache_set_index(guest_fd, SOCK_OPT_IPV6_V6ONLY, 0);
}

void net_socket_cache_init_accept(int guest_fd)
void net_socket_cache_init_accept(int guest_fd, int inherit_passcred)
{
static const int zero_opts[] = {
SOCK_OPT_ACCEPTCONN, SOCK_OPT_REUSEPORT, SOCK_OPT_BROADCAST,
SOCK_OPT_DONTROUTE, SOCK_OPT_OOBINLINE,
};

net_socket_cache_set_many_zero(guest_fd, zero_opts, ARRAY_SIZE(zero_opts));

/* AF_UNIX accept inherits SO_PASSCRED from the listener. For local
* connects the accept path receives the value captured when the
* connection was queued; otherwise it falls back to the listener value.
*/
net_socket_cache_set_index(guest_fd, SOCK_OPT_PASSCRED,
inherit_passcred ? 1 : 0);
}
9 changes: 8 additions & 1 deletion src/syscall/net-sockopt.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@

#pragma once

#include <stdint.h>

int net_socket_fd_is_valid(int guest_fd);
int net_socket_cached_int_get(int guest_fd, int level, int optname, int *value);
int net_socket_cached_int_get_if_generation(int guest_fd,
uint64_t generation,
int level,
int optname,
int *value);
void net_socket_cached_int_set(int guest_fd, int level, int optname, int value);
void net_socket_cache_set_index(int guest_fd, int idx, int value);
void net_socket_cache_init_defaults(int guest_fd, int domain, int real_type);
void net_socket_cache_init_accept(int guest_fd);
void net_socket_cache_init_accept(int guest_fd, int inherit_passcred);
Loading
Loading