/* This software was written by Dirk Engling It is considered beerware. Prost. Skol. Cheers or whatever. $Id$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IPC_PACKETSIZE 2048 #define MAGIC_EXIT_CODE 42 enum { IAM_DAEMON, IAM_CLIENT, IAM_FORKSLAVE }; enum { TASK_SINGLESHOT, TASK_RESPAWN, TASK_RESPAWN_IMMEDIATE, TASK_RESPAWNING }; static int g_uds; static int g_whoami = IAM_CLIENT; static int g_fork_slave_fd; static char g_ipc_packet[IPC_PACKETSIZE]; static int * const g_ipc_packet_int = (int*)g_ipc_packet; static struct pidfh * g_pidfilehandle; static char * g_uds_path = "/var/run/jaildaemon.pipe"; /* For house keeping a list of all processes we attach to jails (probes), with an initial vector size of 128. The vector never shrinks. */ #define PROBES_VECTOR_SIZE 128 static pid_t * g_probes; static size_t g_probes_size; typedef struct { int m_jid; int m_flags; uid_t m_uid; gid_t m_gid; char *m_commandline; char *m_proctitle; } daemon_task; /* Forward declarations */ static void term_handler( int signal ); static void kill_all_probes( void ); static void remove_files( void ); static int check_for_jail( int jid ); static int copy_daemontask( daemon_task ** out, daemon_task * const in ); static int add_task_to_kqueue( int kq, daemon_task * task_in ); static pid_t fork_and_jail( int jid, uid_t uid, gid_t gid, char * proctitle ); static void fork_and_execve( int kq, daemon_task * task ); static int fork_fork_slave( void ); static void exerr( char * message, ... ); static void usage( char * command ); /* This handler ensures that we clean up our probes if asked to terminate gracefully */ static void term_handler( int signal ) { if( signal == SIGTERM ) exit(0); } /* Report error through the appropriate notification channel. Currently this just writes to stderr, which hopefully still is there. */ static void exerr( char * message, ... ) { va_list args; va_start(args, message); switch( g_whoami ) { case IAM_DAEMON: vsyslog( LOG_ERR, message, args ); break; case IAM_CLIENT: verrx( 1, message, args ); /* Never returns */ break; case IAM_FORKSLAVE: /* TODO */ (void)message; (void)args; break; } exit( 1 ); } /* Report syntax of command line arguments to the user */ static void usage( char * cmd ) { fprintf( stderr, "%s -D [-p pidfile] [-f ipcsockpath]\n" "%s -c command -j jid [-t proctitle] [-rR] [-u uid] [-g gid]" " [-f ipcsockpath]\n", cmd, cmd ); exit( 1 ); } /* This fork slave is an extra process that is spawned very early so that we do not leak information into the jail via copied memory. For communication the fork slave keeps a bi-directional pipe open to the daemon. */ static void fork_slave( int master_fd ) { struct sigaction sa; /* explain why the user sees two processes in ps */ setproctitle( "fork slave" ); /* We do not care for the spawned process -- it is checked for in the daemons kqueue filter. So just ignore SIGCHLD */ memset( &sa, 0, sizeof( sa ) ); sigemptyset(&sa.sa_mask); sa.sa_flags = SA_NOCLDWAIT; if( sigaction(SIGCHLD, &sa, NULL) == -1 ) exerr( "Error: Can not enable auto reap." ); sigemptyset(&sa.sa_mask); sa.sa_handler = SIG_DFL; if( sigaction(SIGHUP, &sa, NULL) == -1 ) exerr( "Error: Can not un-ignore SIGHUP." ); /* Wait for command from master */ while(1) { switch( read( master_fd, g_ipc_packet, sizeof(g_ipc_packet) ) ) { case -1: exerr( "Error: Can not read command from master's socket." ); case IPC_PACKETSIZE: /* Decode packet and throw a forked child */ *(pid_t*)g_ipc_packet = fork_and_jail( g_ipc_packet_int[0], (uid_t)g_ipc_packet_int[1], (gid_t)g_ipc_packet_int[2], g_ipc_packet + 3 * sizeof(int) ); if( write( master_fd, g_ipc_packet, sizeof(pid_t) ) != sizeof(pid_t) ) exerr( "Error: Can not reply to master." ); break; case 0: /* Remote end closed, bye */ exit(0); default: exerr( "Error: Received corrupt command packet." ); break; } } } static int fork_fork_slave( void ) { int sockets[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) < 0) exerr( "Error: Can not open stream socket pair." ); switch( fork() ) { case -1: pidfile_remove( g_pidfilehandle ); exerr( "Error: Can not fork fork slave." ); break; case 0: /* I am child, close master's socket fd */ close( sockets[0] ); /* Close IPC handle and wipe value */ close( g_uds ); g_uds = 0; /* Close pid file and wipe value */ pidfile_close( g_pidfilehandle ); g_pidfilehandle = NULL; g_whoami = IAM_FORKSLAVE; fork_slave( sockets[1] ); /* Never returns */ exit(0); default: /* I am master, close child's socket fd */ close( sockets[1] ); return sockets[0]; } /* Should not happen*/ return -1; } /* Helper function to check if a jail id is valid */ static int check_for_jail( int jid ) { struct iovec iov[2]; iov[0].iov_base = "jid\0"; iov[0].iov_len = 4; iov[1].iov_base = &jid; iov[1].iov_len = sizeof(jid); if( jail_get( iov, 2, 0 ) != -1 ) return 0; return -1; } static pid_t fork_and_jail( int jid, uid_t uid, gid_t gid, char * proctitle ) { int sig; pid_t pid = fork(); if( !pid ) { sigset_t sigset; /* Set proctitle so that jail's pgrep -f can identify the process */ if( proctitle && *proctitle ) setproctitle( "%s", proctitle ); else setproctitle( "PROBE" ); /* Throw ourself into the jail */ if( jail_attach( jid ) ) exerr( "Error: Can not attach process to jail %d.", jid ); /* If we're supposed to drop privileges, do it now */ setgid( gid ); setuid( uid ); /* wait for SIGHUP */ sigemptyset(&sigset); sigaddset(&sigset, SIGHUP); sigprocmask(SIG_BLOCK, &sigset, NULL); while( !sigwait( &sigset, &sig ) ) if( sig == SIGHUP ) exit( MAGIC_EXIT_CODE ); exit(0); } return pid; } static int copy_daemontask( daemon_task ** out, daemon_task * const in ) { daemon_task * t = (daemon_task *)malloc( sizeof( daemon_task ) ); *out = t; if( !t ) return -1; t->m_jid = in->m_jid; t->m_flags = in->m_flags; t->m_uid = in->m_uid; t->m_gid = in->m_gid; t->m_commandline = in->m_commandline ? strdup( in->m_commandline ) : 0; t->m_proctitle = in->m_proctitle ? strdup( in->m_proctitle ) : 0; /* If all strings could be copied, return array */ if( ( !in->m_commandline || t->m_commandline ) && ( !in->m_proctitle || t->m_proctitle ) ) return 0; free( t->m_commandline ); free( t->m_proctitle ); free( t ); *out = 0; return -1; } static void fork_and_execve( int kq, daemon_task * t_in ) { char * shell = "/bin/sh"; char * envp[] = { "PATH=/bin:/sbin:/usr/bin:/usr/sbin", NULL }; struct kevent ke; daemon_task * t; pid_t pid = fork(); switch( pid ) { case -1: warn("Failed forking command line process" ); break; case 0: /* start a new process group */ (void) setsid(); /* Execute command line provided by user */ if( execle(shell, shell, "-c", t_in->m_commandline, (char *)0, envp) == -1 ) _exit(0); /* Never reached */ break; default: /* If no respawn requested, just let the command finish */ switch( t_in->m_flags ) { case TASK_SINGLESHOT: return; case TASK_RESPAWN: /* Try to take a copy of task struct. If this fails, then only respawn fails. */ if( copy_daemontask( &t, t_in ) ) return; /* Signal that this is a process that shall respawn the task in jail */ t->m_flags = TASK_RESPAWNING; /* add process to our process watch list, so we get notified, once it finishes to be able to respawn. */ memset( &ke, 0, sizeof ke ); EV_SET( &ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, t ); if( kevent( kq, &ke, 1, NULL, 0, NULL ) == -1 ) { /* If adding the event fails, get rid of struct */ warn( "Can not put respawn watcher pid on the kqueue" ); free( t->m_commandline ); free( t->m_proctitle ); free( t ); } break; case TASK_RESPAWN_IMMEDIATE: add_task_to_kqueue( kq, t_in ); break; } break; } } static void kill_all_probes( void ) { size_t i; if( g_probes ) for( i = 0; i < g_probes_size; ++i ) if( g_probes[i] ) kill( g_probes[i], SIGTERM ); g_probes_size = 0; free( g_probes ); g_probes = 0; } static void remove_files( void ) { pidfile_remove( g_pidfilehandle ); unlink(g_uds_path); } static int add_task_to_kqueue( int kq, daemon_task * t_in ) { struct kevent ke; daemon_task * t; pid_t pid; if( check_for_jail( t_in->m_jid ) ) { syslog( LOG_ERR, "Invalid jail id: %d", t_in->m_jid ); return -1; } /* Take a copy of the task structure */ if( copy_daemontask( &t, t_in ) ) return -1; /* Forge a command packet for fork slave and send it via control socket */ memset( g_ipc_packet, 0, IPC_PACKETSIZE ); g_ipc_packet_int[0] = t->m_jid; g_ipc_packet_int[1] = t->m_uid; g_ipc_packet_int[2] = t->m_gid; if( t->m_proctitle ) strncpy( g_ipc_packet + 3 * sizeof(int), t->m_proctitle, IPC_PACKETSIZE - 3 * sizeof(int) ); if( write( g_fork_slave_fd, g_ipc_packet, IPC_PACKETSIZE ) != IPC_PACKETSIZE ) exerr( "Error: Can not send task to fork slave." ); if( read( g_fork_slave_fd, g_ipc_packet, sizeof(pid_t) ) < (ssize_t)sizeof(pid_t) ) exerr( "Error: Can not receive pid from fork slave." ); /* Expect reply from fork slave */ pid = *(pid_t*)g_ipc_packet; /* Associate pid with command line to execute and add to our kqueue */ memset( &ke, 0, sizeof ke ); EV_SET( &ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, t ); if( kevent( kq, &ke, 1, NULL, 0, NULL ) == 0 ) { size_t i; /* Account for new pid */ for( i = 0; i < g_probes_size; ++i ) if( !g_probes[i] ) { g_probes[i] = pid; /* SUCCESS */ return 0; } /* No space for pid entry => make room */ if( i == g_probes_size ) { size_t bytes = sizeof(pid_t) * g_probes_size; pid_t *probes = realloc( g_probes, 4 * bytes ); if( probes ) { /* Erase new memory */ memset( probes + g_probes_size, 0, 3 * bytes ); probes[g_probes_size] = pid; g_probes_size *= 4; g_probes = probes; /* SUCCESS */ return 0; } } /* FAIL branch */ /* If we added a kevent filter but failed to store the pid for our house keeping, remove the kqueuei filter again (and kill probe) */ EV_SET( &ke, pid, EVFILT_PROC, EV_DELETE, NOTE_EXIT, 0, t ); kevent( kq, &ke, 1, NULL, 0, NULL ); } /* Avoid an unused task in the jail. Kill it. */ warn( "Can not put pid on the kqueue. Killing task." ); kill( pid, SIGKILL ); free( t->m_commandline ); free( t->m_proctitle ); free( t ); return -1; } /* jaildaemon -D [-ppidfile] [-fipcsockpath] jaildaemon -c command -j jid [-t proctitle] [-rR] [-u uid] [-g gid] [-fipsockpath] */ int main( int argc, char **argv ) { pid_t second_pid; int kq, i; int o_force_daemon = 0; int o_daemonize = 0, o_jid = -1, o_respawn = TASK_SINGLESHOT; char *o_command = NULL, *o_pidfile = NULL, *o_proctitle = NULL; uid_t o_uid = 0; gid_t o_gid = 0; struct kevent ke; struct sockaddr_un addr; struct sigaction sa; size_t ipc_bytes = 2 * IPC_PACKETSIZE; /* init value for setsockopt */ /* If we are not started from root, there is not much we can do, neither access the unix domain socket.*/ if( getuid() != 0 ) exerr( "Error: Need to run as root." ); i=1; while(i) { switch( getopt( argc, argv, "DFrRt:c:j:p:u:g:f:" ) ) { case -1: i=0; break; case 'D': o_daemonize = 1; break; case 'r': o_respawn = TASK_RESPAWN; break; case 'R': o_respawn = TASK_RESPAWN_IMMEDIATE; break; case 't': o_proctitle = optarg; break; case 'c': o_command = optarg; break; case 'j': o_jid = jail_getid(optarg); break; case 'u': o_uid = strtol( optarg, 0, 0 ); break; case 'g': o_gid = strtol( optarg, 0, 0 ); break; case 'p': o_pidfile = optarg; break; case 'f': g_uds_path = optarg; break; case 'F': o_force_daemon = 1; break; case '?': usage( argv[0]); exit(0); break; } } /* Need a command line, and jid if not a daemon */ if( !o_daemonize && ( !o_command || o_jid <= 0 ) ) usage( argv[0] ); /* Setup unix domain socket descriptors */ if( ( g_uds = socket( AF_UNIX, SOCK_DGRAM, 0 ) ) < 0 ) exerr( "Error: Can not create control channel." ); /* Allow huge packets on our unix domain socket */ setsockopt( g_uds, SOL_SOCKET, SO_SNDBUF, &ipc_bytes, sizeof(ipc_bytes) ); setsockopt( g_uds, SOL_SOCKET, SO_RCVBUF, &ipc_bytes, sizeof(ipc_bytes) ); memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strncpy(addr.sun_path, g_uds_path, sizeof(addr.sun_path)-1); if( !o_daemonize ) { /* In utility mode try to pipe the request to the daemon already running and exit Packed packet format: int m_flags: SINGLESHOT, RESPAWN, RESPAWN_IMMEDIATE, RESPAWNING int m_jid int m_uid int m_gid int m_commandline_length int m_proctitle_length char[] command_line \0 char[] proctitle \0 */ size_t o_command_len = strlen(o_command); size_t o_proctitle_len = o_proctitle ? strlen( o_proctitle ) : 0; char *text_off = (char*)(g_ipc_packet_int + 6); if( text_off + 2 + o_command_len + o_proctitle_len > g_ipc_packet + IPC_PACKETSIZE ) exerr( "Error: Command line and proc title are too long" ); g_ipc_packet_int[0] = o_respawn; g_ipc_packet_int[1] = o_jid; g_ipc_packet_int[2] = o_uid; g_ipc_packet_int[3] = o_gid; g_ipc_packet_int[4] = o_command_len; g_ipc_packet_int[5] = o_proctitle_len; memcpy( text_off, o_command, o_command_len + 1 ); if( o_proctitle_len ) { text_off += o_command_len + 1; strncpy( text_off, o_proctitle, o_proctitle_len + 1 ); } ipc_bytes = sendto( g_uds, g_ipc_packet, IPC_PACKETSIZE, 0, (struct sockaddr*)&addr, sizeof(addr) ); if( ipc_bytes != IPC_PACKETSIZE ) exerr( "Error: Can not send command to daemon." " Maybe it is not running?" ); exit(0); } /* The utility mode code finished with the exit(0) above. We're daemon. */ if( !( g_pidfilehandle = pidfile_open(o_pidfile, 0600, &second_pid ) ) ) { if (errno == EEXIST) exerr( "Error: %s already running (pid %d).", argv[0], (int)second_pid ); /* If we cannot create pidfile from other reasons, only warn. */ warn( "Cannot open or create pidfile" ); } /* Send test DGRAM through the unix domain socket. If this succeeds, there likely is another daemon already listening. You have to force the daemon to start in this case */ if( sendto( g_uds, g_ipc_packet, 0, 0, (struct sockaddr*)&addr, sizeof(addr) ) == 0 ) { if( !o_force_daemon ) exerr( "Error: Detected a working command channel on %s.\n" "Refusing to overwrite a working one. Another server may" " be running. Force with -F.", g_uds_path ); else warn( "Forcing start of daemon despite working command channel." ); } /* Daemonize and start a fork slave while there is no file descriptors or initialized memory yet. Communicate with this slave via socketpair */ if( daemon(1,0) == -1 ) { pidfile_remove(g_pidfilehandle); exerr( "Error: Can not daemonize" ); } pidfile_write(g_pidfilehandle); /* Spawn fork slave */ g_fork_slave_fd = fork_fork_slave( ); /* Register pid file remover after fork() so that fork slave wont remove our pid file, also unlink our pipe at exit */ atexit( remove_files ); /* Initialize syslog facilities */ openlog( "jaildaemon", 0, LOG_DAEMON ); setlogmask(LOG_UPTO(LOG_INFO)); /* From now we log through syslog */ g_whoami = IAM_DAEMON; /* Create the unix domain socket to receive commands on, N.B. error goes to syslog, now */ unlink(g_uds_path); if (bind(g_uds, (struct sockaddr*)&addr, sizeof(addr)) == -1) exerr( "Error: Can not create command channel." ); /* We do not care for the spawned process -- it is checked for in our kqueue filter. So just ignore SIGCHLD */ memset( &sa, 0, sizeof( sa ) ); sa.sa_flags = SA_NOCLDWAIT; if( sigaction(SIGCHLD, &sa, NULL) == -1 ) exerr( "Error: Can not enabling auto reap." ); /* When dying gracefully, this signal handler sends TERM signals to all probes */ sa.sa_handler = term_handler; if( sigaction(SIGTERM, &sa, NULL) == -1 ) exerr( "Error: Can not install TERM handler." ); /* Create our kqueue */ if( ( kq = kqueue( ) ) == -1 ) exerr( "Error: Can not create kqueue." ); /* Add our command uds to our kevent list */ memset( &ke, 0, sizeof(ke) ); EV_SET( &ke, g_uds, EVFILT_READ, EV_ADD, 0, 0, 0); kevent( kq, &ke, 1, NULL, 0, NULL ); /* We want to be notified if our command uds is removed, so we can quit */ EV_SET( &ke, g_uds, EVFILT_VNODE, EV_ADD, NOTE_DELETE | NOTE_LINK, 0, 0); kevent( kq, &ke, 1, NULL, 0, NULL ); /* We want to be notified if the fork slave died. This is a good time to die, too */ EV_SET( &ke, g_fork_slave_fd, EVFILT_READ, EV_ADD, 0, 0, 0); kevent( kq, &ke, 1, NULL, 0, NULL ); /* Prepare probe pids list, initally 128 processes long, vector grows by factor 4, when exhausted */ g_probes = malloc( sizeof(pid_t) * PROBES_VECTOR_SIZE ); g_probes_size = PROBES_VECTOR_SIZE; if( !g_probes ) exerr( "Error: Out of memory." ); memset( g_probes, 0, sizeof(pid_t) * PROBES_VECTOR_SIZE ); atexit( kill_all_probes ); /* Main loop */ while( 1 ) { memset( &ke, 0, sizeof(ke) ); switch( kevent( kq, NULL, 0, &ke, 1, NULL ) ) { case -1: if( errno == EINTR ) continue; exerr( "Error: Can not read from kqueue." ); case 0: continue; default: /* We should only see one event, because we asked for 1 */ break; } switch( ke.filter ) { case EVFILT_PROC: if( ke.fflags & NOTE_EXIT ) { size_t i; daemon_task * task = (daemon_task *)ke.udata; if( !task ) continue; /* If this task was watched to respawn a daemon in the jail, do it now */ if( task->m_flags == TASK_RESPAWNING ) { task->m_flags = TASK_RESPAWN; add_task_to_kqueue( kq, task ); /* If the process exited with the correct magic code, execute the associated command */ } else if( WEXITSTATUS(ke.data) == MAGIC_EXIT_CODE ) fork_and_execve( kq, task ); free( task->m_commandline ); free( task->m_proctitle ); free( task ); /* Remove process filter from kqueue */ EV_SET( &ke, ke.ident, EVFILT_PROC, EV_DELETE, NOTE_EXIT, 0, NULL ); kevent( kq, &ke, 1, NULL, 0, NULL ); /* Remove pid from our probes list */ for( i = 0; i < g_probes_size; ++i ) if( g_probes[i] == (pid_t)ke.ident ) g_probes[i] = 0; } break; case EVFILT_READ: if( (int)ke.ident == g_uds ) { char *text_off = (char*)(g_ipc_packet_int + 6); socklen_t fromlen; daemon_task task; /* Some data arrived at our admin pipe, parse the request. If the format is not recognized, throw away the complete request */ ipc_bytes = recvfrom(g_uds, g_ipc_packet, sizeof g_ipc_packet, 0, (struct sockaddr*)&addr, &fromlen); /* parse request, fail immediately for any packet not of size IPC_PACKETSIZE */ if( ipc_bytes != IPC_PACKETSIZE ) continue; task.m_flags = g_ipc_packet_int[0]; task.m_jid = g_ipc_packet_int[1]; task.m_uid = g_ipc_packet_int[2]; task.m_gid = g_ipc_packet_int[3]; task.m_commandline = text_off; text_off += g_ipc_packet_int[4]; /* Sanity check on string length, expect terminator */ if( text_off > (char *)( g_ipc_packet + IPC_PACKETSIZE ) || *text_off ) { warn( "Received invalid command packet" ); continue; } task.m_proctitle = g_ipc_packet_int[5] ? ++text_off : 0; text_off += g_ipc_packet_int[5]; /* Sanity check on string length, expect terminator */ if( text_off > (char *)(g_ipc_packet + IPC_PACKETSIZE) || *text_off ) { warn( "Received invalid command packet" ); continue; } /* Takes a copy of our task and all string members */ add_task_to_kqueue( kq, &task ); } else if( (int)ke.ident == g_fork_slave_fd ) { /* If we see activity on the socket to the fork slave in this place, that can only mean that the slave died (EOF), or that something strange is going on over there. For now we only warn, we may switch to exerr() in the future. */ if( ke.flags == EV_EOF ) exerr( "Error: Fork slave died. We die, too." ); else warn( "Spurious packet from fork slave." ); } break; case EVFILT_VNODE: if( (int)ke.ident == g_uds && ke.fflags == NOTE_DELETE ) exerr( "Error: Control channel was deleted. Quitting." ); break; default: break; } } }