diff --git a/README.md b/README.md index ff4dd6a..b1c8f9d 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ repo](https://github.com/snoyberg/docker-testing#readme). ### Usage -> pid1 [-e|--env ENV] [-u|--user USER] [-g|--group GROUP] [-w|--workdir DIR] [-t|--timeout TIMEOUT] [-s|--single] COMMAND [ARG1 ARG2 ... ARGN] +> pid1 [-e|--env ENV] [-u|--user USER] [-g|--group GROUP] [-w|--workdir DIR] [-t|--timeout TIMEOUT] COMMAND [ARG1 ARG2 ... ARGN] Where: * `-e`, `--env` `ENV` - Override environment variable from given name=value @@ -33,7 +33,15 @@ Where: executing COMMAND * `-w`, `--workdir` `DIR` - chdir to `DIR` before executing COMMAND * `-t`, `--timeout` `TIMEOUT` - timeout (in seconds) to wait for all child processes to exit -* `-s`, `--single` - flag if we should only send SIGTERM to the immediate child process + +`WARNING`: by default pid1 will first send the TERM signal to it's "immediate child" process. +In most scenarios that will be the only process running but in some cases that will be the +"main" process that could have spawned it's own children. In this scenario it's prudent to shutdown +the "main" process first, since usually it has mechanisms in place to shut down it's children. If +we were to shutdown a child process before "main" was shutdown it might try to restart it. +This is why, if the "main" process doesn't exit within `timeout` we will proceed to send the TERM +signal to all processes and wait **again** for `timeout` until we finally send the KILL signal to all +processes. This is a **breaking change since 0.1.3.0**. The recommended use case for this executable is to embed it in a Docker image. Assuming you've placed it at `/sbin/pid1`, the two commonly recommended usages diff --git a/app/Main.hs b/app/Main.hs index 724337a..3097b21 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -17,8 +17,7 @@ options defaultEnv = , Option ['u'] ["user"] (ReqArg setRunUser "USER") "run command as user" , Option ['g'] ["group"] (ReqArg setRunGroup "GROUP") "run command as group" , Option ['w'] ["workdir"] (ReqArg setRunWorkDir "DIR") "command working directory" - , Option ['t'] ["timeout"] (ReqArg (setRunExitTimeoutSec . read) "TIMEOUT") "timeout (in seconds) to wait for all child processes to exit" - , Option ['s'] ["single"] (NoArg (setRunSignalImmediateChildOnly True)) "Flag if we should only send SIGTERM to the immediate child process" ] + , Option ['t'] ["timeout"] (ReqArg (setRunExitTimeoutSec . read) "TIMEOUT") "timeout (in seconds) to wait for all child processes to exit" ] where optEnv env' kv = let kvp = fmap (drop 1) $ span (/= '=') kv in kvp:filter ((fst kvp /=) . fst) env' diff --git a/src/System/Process/PID1.hs b/src/System/Process/PID1.hs index 72e027e..83f0b45 100644 --- a/src/System/Process/PID1.hs +++ b/src/System/Process/PID1.hs @@ -7,12 +7,10 @@ module System.Process.PID1 , getRunGroup , getRunUser , getRunWorkDir - , getRunSignalImmediateChildOnly , run , runWithOptions , setRunEnv , setRunExitTimeoutSec - , setRunSignalImmediateChildOnly , setRunGroup , setRunUser , setRunWorkDir @@ -54,7 +52,6 @@ data RunOptions = RunOptions -- timeout (in seconds) to wait for all child processes to exit after -- receiving SIGTERM or SIGINT signal , runExitTimeoutSec :: Int - , runSignalImmediateChildOnly :: Bool } deriving Show -- | return default `RunOptions` @@ -66,8 +63,7 @@ defaultRunOptions = RunOptions , runUser = Nothing , runGroup = Nothing , runWorkDir = Nothing - , runExitTimeoutSec = 5 - , runSignalImmediateChildOnly = False } + , runExitTimeoutSec = 5 } -- | Get environment variable overrides for the given `RunOptions` -- @@ -131,19 +127,6 @@ getRunExitTimeoutSec = runExitTimeoutSec setRunExitTimeoutSec :: Int -> RunOptions -> RunOptions setRunExitTimeoutSec sec opts = opts { runExitTimeoutSec = sec } --- | Return boolean flag if we should only send SIGTERM to the immediate child process --- ---- @since 0.1.3.0 -getRunSignalImmediateChildOnly :: RunOptions -> Bool -getRunSignalImmediateChildOnly = runSignalImmediateChildOnly - --- | Set boolean flag if we should only send SIGTERM to the immediate child process --- --- @since 0.1.3.0 -setRunSignalImmediateChildOnly :: Bool -> RunOptions -> RunOptions -setRunSignalImmediateChildOnly x opts = opts { runSignalImmediateChildOnly = x } - - -- | Run the given command with specified arguments, with optional environment -- variable override (default is to use the current process's environment). -- @@ -185,16 +168,15 @@ runWithOptions opts cmd args = do for_ (runWorkDir opts) setCurrentDirectory let env' = runEnv opts timeout = runExitTimeoutSec opts - single = runSignalImmediateChildOnly opts -- check if we should act as pid1 or just exec the process myID <- getProcessID if myID == 1 - then runAsPID1 single cmd args env' timeout + then runAsPID1 cmd args env' timeout else executeFile cmd True args env' -- | Run as a child with signal handling and orphan reaping. -runAsPID1 :: Bool -> FilePath -> [String] -> Maybe [(String, String)] -> Int -> IO a -runAsPID1 single cmd args env' timeout = do +runAsPID1 :: FilePath -> [String] -> Maybe [(String, String)] -> Int -> IO a +runAsPID1 cmd args env' timeout = do -- Set up an MVar to indicate we're ready to start killing all -- children processes. Then start a thread waiting for that -- variable to be filled and do the actual killing. @@ -223,10 +205,7 @@ runAsPID1 single cmd args env' timeout = do _ <- forkIO $ do takeMVar killChildrenVar - if single then - killImmediateChild child timeout - else - killAllChildren timeout + killAllChildren child timeout -- Loop on reaping child processes reap startKilling child @@ -269,30 +248,21 @@ reap startKilling child = do startKilling | otherwise -> return () -killAllChildren :: Int -> IO () -killAllChildren timeout = do - -- Send all children processes the TERM signal - signalProcess sigTERM (-1) `catch` \e -> +killAllChildren :: CPid -> Int -> IO () +killAllChildren cid timeout = do + -- Send the direct child process the TERM signal + signalProcess sigTERM cid `catch` \e -> if isDoesNotExistError e then return () else throwIO e - -- Wait for `timeout` seconds. We don't need to put in any logic about - -- whether there are still child processes; if all children have - -- exited, then the reap loop will exit and our process will shut - -- down. + -- Wait for `timeout` seconds and allow the 'main' process to take care + -- of shutting down any child processes itself. threadDelay $ timeout * 1000 * 1000 - -- OK, some children didn't exit. Now time to get serious! - signalProcess sigKILL (-1) `catch` \e -> - if isDoesNotExistError e - then return () - else throwIO e - -killImmediateChild :: CPid -> Int -> IO () -killImmediateChild cid timeout = do - -- Send immediate child processes the TERM signal - signalProcess sigTERM cid `catch` \e -> + -- If the 'main' process did not handle shutting down the rest of the + -- child processes we will signal SIGTERM to them directly. + signalProcess sigTERM (-1) `catch` \e -> if isDoesNotExistError e then return () else throwIO e