/* This software was written by Dirk Engling It is considered beerware. Prost. Skol. Cheers or whatever. $Id$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IPC_PACKETSIZE 4096 #define MAGIC_EXIT_CODE 42 enum { IAM_DAEMON, IAM_CLIENT, IAM_FORKSLAVE }; static int g_uds; static int g_whoami = IAM_CLIENT; static struct pidfh * g_pidfilehandle; static int g_fork_slave_fd; static char g_ipc_packet[IPC_PACKETSIZE]; static int * const g_ipc_packet_int = (int*)g_ipc_packet; /* For house keeping a list of all processes we attach to jails (probes), with an initial vector size of 128. The vector never shrinks. */ #define PROBES_VECTOR_SIZE 128 static pid_t * g_probes; static size_t g_probes_size; typedef struct { int m_jid; int m_flags; char *m_commandline; char *m_proctitle; } daemon_task; /* Forward declarations */ static void term_handler( int signal ); static void kill_all_probes( void ); static int check_for_jail( int jid ); static int copy_daemontask( daemon_task ** out, daemon_task * const in ); static int add_task_to_kqueue( int kq, daemon_task * task_in ); static pid_t fork_and_jail( int jid, char * proctitle ); static void fork_and_execve( int kq, daemon_task * task ); static int fork_fork_slave( ); static void exerr( char * message ); static void warn( char * message ); static void usage( char * command ); /* This handler ensures that we clean up our probes if asked to terminate gracefully */ static void term_handler( int signal ) { if( signal == SIGTERM ) exit(0); } /* Report error through the appropriate notification channel. Currently this just writes to stderr, which hopefully still is there. */ static void exerr( char * message ) { switch( g_whoami ) { case IAM_DAEMON: syslog( LOG_ERR, "Error %s\n", message ); break; case IAM_CLIENT: fprintf( stderr, "Error %s\n", message ); break; case IAM_FORKSLAVE: /* TODO */ (void)message; break; } exit( 11 ); } /* Report a non-fatal situation */ static void warn( char * message ) { syslog( LOG_WARNING, "%s\n", message ); } /* Report syntax of command line arguments to the user */ static void usage( char * cmd ) { fprintf( stderr, "%s -D [-ppidfile] [-fipcsockpath]\n" "%s -c command -j jid [-t proctitle] [-r]\n", cmd, cmd ); exit( 1 ); } /* This fork slave is an extra process that is spawned very early so that we do not leak information into the jail via copied memory. For communication the fork slave keeps a bi-directional pipe open to the daemon. */ static void fork_slave( int master_fd ) { struct sigaction sa; /* explain why the user sees two processes in ps */ setproctitle( "fork slave" ); /* We do not care for the spawned process -- it is checked for in the daemons kqueue filter. So just ignore SIGCHLD */ memset( &sa, 0, sizeof( sa ) ); sigemptyset(&sa.sa_mask); sa.sa_flags = SA_NOCLDWAIT; if( sigaction(SIGCHLD, &sa, NULL) == -1 ) exerr( "when trying to enable auto reap" ); /* Wait for command from master */ while(1) { switch( read( master_fd, g_ipc_packet, sizeof(g_ipc_packet) ) ) { case -1: exerr( "reading commands from master's socket" ); case IPC_PACKETSIZE: /* Decode packet and throw a forked child */ *(pid_t*)g_ipc_packet = fork_and_jail( g_ipc_packet_int[0], g_ipc_packet + sizeof(int) ); if( write( master_fd, g_ipc_packet, sizeof(pid_t) ) != sizeof(pid_t) ) exerr( "replying to master" ); break; case 0: /* Remote end closed, bye */ exit(0); default: exerr( "ignoring corrupt command packet" ); break; } } } static int fork_fork_slave( ) { int sockets[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) < 0) exerr("opening stream socket pair"); switch( fork() ) { case -1: exerr("forking fork slave"); break; case 0: /* I am child, close master's socket fd */ close( sockets[0] ); g_whoami = IAM_FORKSLAVE; pidfile_close( g_pidfilehandle ); g_pidfilehandle = NULL; fork_slave( sockets[1] ); /* Never returns */ exit(0); default: /* I am master, close child's socket fd */ close( sockets[1] ); return sockets[0]; } /* Should not happen*/ return -1; } /* Helper function to check if a jail id is valid */ static int check_for_jail( int jid ) { struct iovec iov[2]; iov[0].iov_base = "jid\0"; iov[0].iov_len = 4; iov[1].iov_base = &jid; iov[1].iov_len = sizeof(jid); if( jail_get( iov, 2, 0 ) != -1 ) return 0; return -1; } static pid_t fork_and_jail( int jid, char * proctitle ) { int sig; pid_t pid = fork(); if( !pid ) { sigset_t sigset; /* Set proctitle so that jail's pgrep -f can identify the process */ if( proctitle && *proctitle ) setproctitle( "%s", proctitle ); /* Throw ourself into the jail */ if( jail_attach( jid ) ) exerr( "when attaching to jail" ); /* wait for SIGHUP */ sigemptyset(&sigset); sigaddset(&sigset, SIGHUP); sigprocmask(SIG_BLOCK, &sigset, NULL); while( !sigwait( &sigset, &sig ) ) if( sig == SIGHUP ) exit( MAGIC_EXIT_CODE ); exit(0); } return pid; } static int copy_daemontask( daemon_task ** out, daemon_task * const in ) { daemon_task * t = (daemon_task *)malloc( sizeof( daemon_task ) ); *out = t; if( !t ) return -1; t->m_jid = in->m_jid; t->m_flags = in->m_flags; t->m_commandline = in->m_commandline ? strdup( in->m_commandline ): 0; t->m_proctitle = in->m_proctitle ? strdup( in->m_proctitle ) : 0; /* If all strings could be copied, return array */ if( ( !in->m_commandline || t->m_commandline ) && ( !in->m_proctitle || t->m_proctitle ) ) return 0; free( t->m_commandline ); free( t->m_proctitle ); free( t ); *out = 0; return -1; } static void fork_and_execve( int kq, daemon_task * t_in ) { char * shell = "/bin/sh"; char * envp[] = { "PATH=/bin:/sbin:/usr/bin:/usr/sbin", NULL }; pid_t pid; pid = fork(); switch( pid ) { case -1: warn("Failed forking command line process" ); break; case 0: /* start a new process group */ (void) setsid(); /* Execute command line provided by user */ if( execle(shell, shell, "-c", t_in->m_commandline, (char *)0, envp) == -1 ) _exit(0); /* Never reached */ break; default: /* If no respawn requested, just let the command finish */ if( !(t_in->m_flags & 0x01) ) return; /* else add process to our process watch list, so we get notified, once it finishes to be able to respawn. ("else" to open block) */ else { struct kevent ke; daemon_task * t; /* Try to take a copy of task struct. If this fails, then only respawn fails. */ if( copy_daemontask( &t, t_in ) ) return; /* Signal that this is a process that shall respawn the task in jail */ t->m_flags |= 0x02; memset( &ke, 0, sizeof ke ); EV_SET( &ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, t ); if( kevent( kq, &ke, 1, NULL, 0, NULL ) == -1 ) { /* If adding the event fails, get rid of struct */ warn( "Can not put respawn watcher pid on the kqueue" ); free( t->m_commandline ); free( t->m_proctitle ); free( t ); } } break; } } static void kill_all_probes( void ) { size_t i; if( g_probes ) for( i = 0; i < g_probes_size; ++i ) if( g_probes[i] ) kill( g_probes[i], SIGTERM ); g_probes_size = 0; free( g_probes ); g_probes = 0; pidfile_remove( g_pidfilehandle ); } static int add_task_to_kqueue( int kq, daemon_task * t_in ) { struct kevent ke; daemon_task * t; pid_t pid; if( check_for_jail( t_in->m_jid ) ) { syslog( LOG_ERR, "Invalid jail id: %d", t_in->m_jid ); return -1; } /* Take a copy of the task structure */ if( copy_daemontask( &t, t_in ) ) return -1; /* Forge a command packet for fork slave and send it via control socket */ memset( g_ipc_packet, 0, IPC_PACKETSIZE ); g_ipc_packet_int[0] = t->m_jid; if( t->m_proctitle ) strncpy( g_ipc_packet + sizeof(int), t->m_proctitle, IPC_PACKETSIZE - sizeof(int) ); if( write( g_fork_slave_fd, g_ipc_packet, IPC_PACKETSIZE ) != IPC_PACKETSIZE ) exerr( "sending task to fork slave" ); if( read( g_fork_slave_fd, g_ipc_packet, sizeof(pid_t) ) < (ssize_t)sizeof(pid_t) ) exerr( "receiving pid from fork slave" ); /* Expect reply from fork slave */ pid = *(pid_t*)g_ipc_packet; /* Associate pid with command line to execute and add to our kqueue */ memset( &ke, 0, sizeof ke ); EV_SET( &ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, t ); if( kevent( kq, &ke, 1, NULL, 0, NULL ) == 0 ) { size_t i; /* Account for new pid */ for( i = 0; i < g_probes_size; ++i ) if( !g_probes[i] ) { g_probes[i] = pid; return 0; } /* No space for pid entry => make room */ if( i == g_probes_size ) { size_t bytes = sizeof(pid_t) * g_probes_size; pid_t *probes = realloc( g_probes, 4 * bytes ); if( probes ) { /* Erase new memory */ memset( probes + g_probes_size, 0, 3 * bytes ); probes[g_probes_size] = pid; g_probes_size *= 4; g_probes = probes; return 0; } } /* If we added a kevent filter but failed to store the pid for our house keeping, remove the kqueuei filter again (and kill probe) */ EV_SET( &ke, pid, EVFILT_PROC, EV_DELETE, NOTE_EXIT, 0, t ); kevent( kq, &ke, 1, NULL, 0, NULL ); } /* Avoid an unused task in the jail. Kill it. */ warn( "Can not put pid on the kqueue. Killing task." ); kill( pid, SIGKILL ); free( t->m_commandline ); free( t->m_proctitle ); free( t ); return -1; } /* jaildaemon -D <-ppidfile> <-fipcsockpath> -c command -j jid -t proctitle <-r> */ int main( int argc, char **argv ) { int kq, i; int o_force_daemon = 0; int o_daemonize = 0, o_jid = -1, o_respawn = 0; char *o_command = NULL, *o_pidfile = NULL, *o_proctitle = NULL; char *o_uds_path = "/var/run/jaildaemon.pipe"; struct kevent ke; struct sockaddr_un addr; struct sigaction sa; size_t ipc_bytes = IPC_PACKETSIZE; /* If we are not started from root, there is not much we can do, neither access the unix domain socket.*/ if( getuid() != 0 ) exerr( "when starting. Need to run as root." ); i=1; while(i) { switch( getopt( argc, argv, "Drt:c:j:p:f:" ) ) { case -1: i=0; break; case 'D': o_daemonize = 1; break; case 'r': o_respawn = 1; break; case 't': o_proctitle = optarg; break; case 'c': o_command = optarg; break; case 'j': o_jid = strtol( optarg, 0, 0 ); break; case 'p': o_pidfile = optarg; break; case 'f': o_uds_path = optarg; break; case '?': usage( argv[0]); exit(0); break; } } /* Daemonize and start a fork slave while there is no file descriptors or initialized memory yet. Communicate with this slave via socketpair */ if( o_daemonize ) { g_pidfilehandle = pidfile_open(o_pidfile, 0600, NULL ); if (!g_pidfilehandle) { if (errno == EEXIST) exerr( "jaildaemon already running." ); /* If we cannot create pidfile from other reasons, only warn. */ warn( "Cannot open or create pidfile" ); } if( daemon(1,0) == -1 ) { pidfile_remove(g_pidfilehandle); exerr( "daemonzing" ); } pidfile_write(g_pidfilehandle); g_fork_slave_fd = fork_fork_slave( ); openlog( "jaildaemon", 0, LOG_DAEMON ); setlogmask(LOG_UPTO(LOG_INFO)); g_whoami = IAM_DAEMON; } else { /* Need a command line, and jid if not a daemon */ if( !o_command || o_jid <= 0 ) usage( argv[0] ); } /* Setup unix domain socket descriptors */ g_uds = socket(AF_UNIX, SOCK_DGRAM, 0); if( g_uds < 0 ) exerr( "Can not create control channel." ); if(1) { size_t packet_size = 2 * IPC_PACKETSIZE; socklen_t pss = sizeof(packet_size); /* Allow huge packets on our unix domain socket */ setsockopt( g_uds, SOL_SOCKET, SO_SNDBUF, &packet_size, pss ); setsockopt( g_uds, SOL_SOCKET, SO_RCVBUF, &packet_size, pss ); } memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strncpy(addr.sun_path, o_uds_path, sizeof(addr.sun_path)-1); if( !o_daemonize ) { /* If we're not supposed to daemonize, just try to pipe the request to the daemon already running and exit Packed packet format: int m_flags ( 0x01 respawn, 0x02 executing, to be respawned ) int m_jid int m_commandline_length int m_proctitle_length char[] command_line \0 char[] proctitle \0 */ size_t o_command_len = strlen(o_command); size_t o_proctitle_len = o_proctitle ? strlen( o_proctitle ) : 0; char *text_off = (char*)(g_ipc_packet_int + 4); if( text_off + 2 + o_command_len + o_proctitle_len > g_ipc_packet + IPC_PACKETSIZE ) exerr( "Command line and proc title too long" ); g_ipc_packet_int[0] = o_respawn; g_ipc_packet_int[1] = o_jid; g_ipc_packet_int[2] = o_command_len; g_ipc_packet_int[3] = o_proctitle_len; memcpy( text_off, o_command, o_command_len + 1 ); if( o_proctitle_len ) { text_off += o_command_len + 1; strncpy( text_off, o_proctitle, o_proctitle_len + 1 ); } ipc_bytes = sendto( g_uds, g_ipc_packet, IPC_PACKETSIZE, 0, (struct sockaddr*)&addr, sizeof(addr) ); if( ipc_bytes != IPC_PACKETSIZE ) exerr( "sending command to daemon. Maybe it is not running?" ); exit(0); } /* Send test DGRAM through the unix domain socket. If this succeeds, there likely is another daemon already listening. You have to force the daemon to start in this case */ if( sendto( g_uds, g_ipc_packet, IPC_PACKETSIZE, 0, (struct sockaddr*)&addr, sizeof(addr) ) == 0 ) { /* TODO: Force not implemented yet */ if( !o_force_daemon ) exerr( "Found command channel. Refusing to overwrite a working one." " Another server may be running. Force with -f."); else warn( "Forcing start of daemon despite working command channel." ); } /* Create the unix domain socket to receive commands on */ unlink(o_uds_path); if (bind(g_uds, (struct sockaddr*)&addr, sizeof(addr)) == -1) exerr( "binding to command channel. Maybe another daemon is running?" ); /* We do not care for the spawned process -- it is checked for in our kqueue filter. So just ignore SIGCHLD */ memset( &sa, 0, sizeof( sa ) ); sa.sa_flags = SA_NOCLDWAIT; if( sigaction(SIGCHLD, &sa, NULL) == -1 ) exerr( "when trying to enable auto reap" ); /* When dying gracefully, this signal handler sends TERM signals to all probes */ sa.sa_handler = term_handler; if( sigaction(SIGTERM, &sa, NULL) == -1 ) exerr( "when trying to install TERM handler" ); /* Create our kqueue */ if( ( kq = kqueue( ) ) == -1 ) exerr( "when create kqueue" ); /* Add our command uds to our kevent list */ memset( &ke, 0, sizeof(ke) ); EV_SET( &ke, g_uds, EVFILT_READ, EV_ADD, 0, 0, 0); kevent( kq, &ke, 1, NULL, 0, NULL ); /* We want to be notified if our command uds is removed, so we can quit */ EV_SET( &ke, g_uds, EVFILT_VNODE, EV_ADD, NOTE_DELETE | NOTE_LINK, 0, 0); kevent( kq, &ke, 1, NULL, 0, NULL ); /* We want to be notified if the fork slave died. This is a good time to die, too */ EV_SET( &ke, g_fork_slave_fd, EVFILT_READ, EV_ADD, 0, 0, 0); kevent( kq, &ke, 1, NULL, 0, NULL ); /* Prepare probe pids list, initally 128 processes long, vector grows by factor 4, when exhausted */ g_probes = malloc( sizeof(pid_t) * PROBES_VECTOR_SIZE ); g_probes_size = PROBES_VECTOR_SIZE; if( !g_probes ) exerr( "allocating memory." ); memset( g_probes, 0, sizeof(pid_t) * PROBES_VECTOR_SIZE ); atexit( kill_all_probes ); /* If daemon was started with some initial script, fire it now -- this leaks some information in the command line to all jails and thus is disabled if( o_command ) { daemon_task task; task.m_jid = o_jid; task.m_flags = o_respawn ? 0x01 : 0x00; task.m_commandline = o_command; task.m_proctitle = o_proctitle; add_task_to_kqueue( kq, &task ); } */ /* Main loop */ while( 1 ) { memset( &ke, 0, sizeof(ke) ); switch( kevent( kq, NULL, 0, &ke, 1, NULL ) ) { case -1: if( errno == EINTR ) continue; exerr( "when reading from kqueue" ); case 0: continue; default: /* We should only see one event, because we asked for 1 */ break; } switch( ke.filter ) { case EVFILT_PROC: if( ke.fflags & NOTE_EXIT ) { size_t i; daemon_task * task = (daemon_task *)ke.udata; if( !task ) continue; /* If this task was watched to respawn a daemon in the jail, do it now */ if( task->m_flags & 0x02 ) { task->m_flags &= ~0x02; add_task_to_kqueue( kq, task ); /* If the process exited with the correct magic code, execute the associated command */ } else if( WEXITSTATUS(ke.data) == MAGIC_EXIT_CODE ) fork_and_execve( kq, task ); free( task->m_commandline ); free( task->m_proctitle ); free( task ); /* Remove process filter from kqueue */ EV_SET( &ke, ke.ident, EVFILT_PROC, EV_DELETE, NOTE_EXIT, 0, NULL ); kevent( kq, &ke, 1, NULL, 0, NULL ); /* Remove pid from our probes list */ for( i = 0; i < g_probes_size; ++i ) if( g_probes[i] == (pid_t)ke.ident ) g_probes[i] = 0; } break; case EVFILT_READ: if( (int)ke.ident == g_uds ) { char *text_off = (char*)(g_ipc_packet_int + 4); socklen_t fromlen; daemon_task task; /* Some data arrived at our admin pipe, parse the request. If the format is not recognized, throw away the complete request */ ipc_bytes = recvfrom(g_uds, g_ipc_packet, sizeof g_ipc_packet, 0, (struct sockaddr*)&addr, &fromlen); /* parse request, fail immediately for any packet not of size IPC_PACKETSIZE */ if( ipc_bytes != IPC_PACKETSIZE ) continue; task.m_flags = g_ipc_packet_int[0]; task.m_jid = g_ipc_packet_int[1]; task.m_commandline = text_off; text_off += g_ipc_packet_int[2]; /* Sanity check on string length, expect terminator */ if( text_off > (char *)( g_ipc_packet + IPC_PACKETSIZE ) || *text_off ) { warn( "Received invalid command packet" ); continue; } task.m_proctitle = g_ipc_packet_int[3] ? ++text_off : 0; text_off += g_ipc_packet_int[3]; /* Sanity check on string length, expect terminator */ if( text_off > (char *)(g_ipc_packet + IPC_PACKETSIZE) || *text_off ) { warn( "Received invalid command packet" ); continue; } /* Takes a copy of our task and all string members */ add_task_to_kqueue( kq, &task ); } else if( (int)ke.ident == g_fork_slave_fd ) { /* If we see activity on the socket to the fork slave in this place, that can only mean that the slave died (EOF), or that something strange is going on over there. For now we only warn, we may switch to exerr() in the future. */ if( ke.flags == EV_EOF ) exerr( "Fork slave died. We die, too." ); else warn( "Spurious packet from fork slave." ); } break; case EVFILT_VNODE: if( (int)ke.ident == g_uds && ke.fflags == NOTE_DELETE ) exerr( "Control channel was deleted. Quitting." ); break; default: break; } } }