sysmgr

a simplistic service supervisor (deprecated)
git clone git://git.ckyln.com/~cem/sysmgr.git
Log | Files | Refs | README | LICENSE

sysmgr (8885B)


      1 #!/bin/sh -e
      2 
      3 ################################################################################
      4 # A modular system-supervisor written in POSIX shell written with              #
      5 # Carbs Linux[1] and KISS Linux[2] in mind.                                    #
      6 #                                                                              #
      7 # [1]: https://carbslinux.org                                                  #
      8 # [2]: https://k1ss.org                                                        #
      9 #                                                                              #
     10 # Copyright (c) 2020 - Cem Keylan                                              #
     11 # See LICENSE for copyright information                                        #
     12 #                                                                              #
     13 # Please report bugs to <cem at ckyln dot com> or open an issue at             #
     14 # https://github.com/cemkeylan/sysmgr                                          #
     15 ################################################################################
     16 
     17 
     18 log()   { printf '%s\n' "$@" ;}
     19 out()   { log "$@" >&2 ;}
     20 error() { printf 'error: %s\n' "$@" >&2 ;}
     21 die()   { error "$@" "exiting..." ; exit 1;}
     22 
     23 usage() {
     24     # Define a different out function just for the usage function.
     25     out() {
     26         # This prints the synopsis, adds an empty line, prints arguments,
     27         # adds another empty line and prints version information.
     28         SYNOPSIS="$1"; shift
     29         log "Usage: $SYNOPSIS" "" "$@" "" "sysmgr-$version" >&2
     30     }
     31     case "${0##*/}" in
     32         sysmgr)   out "sysmgr" "See sysmgr(8) for detailed information." ;;
     33         runsyssv) out "runsyssv [service]" ;;
     34         svctl)    out "svctl [command] [service...]" \
     35                       "start/stop/restart  Start/stop/restart services" \
     36                       "kill                Send a SIGKILL to services" \
     37                       "once                Start services once" \
     38                       "status              Check service statuses" \
     39                       "up/down             Same as start/stop"
     40     esac
     41     exit 0
     42 }
     43 
     44 checkprocess() {
     45     # If no arguments are given, do not continue.
     46     # This may happen when getpid returns empty.
     47     [ "$1" ] || return 1
     48 
     49     # This function checks if the process is still alive and returns 0 or 1
     50     # accordingly. It checks the /proc directory first (which exists in most
     51     # Linux systems, but not all) and fallbacks to kill if it doesn't. There are
     52     # three reasons for preferring /proc over kill:
     53     #
     54     # 1: We don't need to spawn an extra process.
     55     # 2: We can check processes we don't own, which eliminates the requirement
     56     #    of being root.
     57     # 3: Checking the existence of a directory is much faster and more efficient
     58     #    than sending a signal to a process.
     59     #
     60     # However, this isn't the portable way, that's why we fallback to kill(1).
     61     [ -d "/proc/$1" ] || kill -0 "$1"
     62 }
     63 
     64 cleanup() {
     65     # Clean the service run directory so that it can be restarted. Do not remove
     66     # the run directory if lock file exists.
     67     rm -f -- "$RUNDIR/${service##*/}/pid" "${RUNDIR}/${service##*/}/syspid"
     68 
     69     [ -e "$RUNDIR/${service##*/}/lock" ] && return
     70     rm -rf -- "${RUNDIR:?}/${service##*/}"
     71 
     72 }
     73 
     74 term() {
     75     # This function is executed when the sysmgr receives an interrupt or a
     76     # hangup signal. It enters the termination state where it forwards SIGTERM
     77     # to every other runsyssv process that have their process ids in the RUNDIR.
     78 
     79     for process in "$RUNDIR"/*/syspid ; do
     80         read -r pid < "$process"
     81         kill -15 "$pid" 2>/dev/null
     82     done
     83 
     84     # Wait for the redirections to happen
     85     sleep 1
     86 
     87     # Remove the RUNDIR so we can do a fresh start when we are re-initiating the
     88     # program.
     89     rm -rf -- "${RUNDIR}"
     90 
     91     exit 0
     92 }
     93 
     94 getpid() {
     95     # This is a function to retrieve the pid from the RUNDIR
     96     unset pid
     97 
     98     [ -f "$RUNDIR/$1/${2:-pid}" ] ||
     99         { error "pid file for $1 could not be found" ; return 1 ;}
    100     read -r pid < "$RUNDIR/$1/${2:-pid}"
    101 }
    102 
    103 redirectsignal() {
    104 
    105     # We redirect signal that was sent to runsyssv so that those programs are
    106     # stopped with the exact kill command. Adding a lock file ensures that the
    107     # directory is not cleaned up.
    108     sig="$1"
    109 
    110     log "${sig:-TERM}" > "${RUNDIR:?}/${service##*/}/lock"
    111     kill "-${sig:-TERM}" "$svpid" 2>/dev/null
    112 
    113 }
    114 
    115 fn_sysmgr() {
    116     [ "$1" ] && usage
    117 
    118     # Start sanity checks. We first check that we have the "$SYSDIR" variable.
    119     # We then check whether the given SYSDIR exists, and has service files
    120     # installed.
    121     [ "$SYSDIR" ] || die "Please specify service directory"
    122     [ -d "$SYSDIR" ] || die "$SYSDIR does not exist."
    123     [ "$(ls -1 "$SYSDIR")" ] || error "No service file is found"
    124     mkdir -p "$RUNDIR" || die
    125 
    126     # Add pid to $RUNDIR before starting loops
    127     log "$$" > "$RUNDIR/pid"
    128 
    129     # We redirect signals to the 'term' function so that we send kill signals to
    130     # all sysmgr processes.
    131     trap term INT HUP QUIT ABRT TERM
    132 
    133     # Lots of loops here. The first while loop is to make sure that the sysmgr
    134     # does not exist. The for loop is to run every single service on the
    135     # $SYSDIR. We then fork the runsyssv function to the background. This
    136     # ensures that we don't have to wait until runsyssv has finished, which is a
    137     # program that is not supposed to exit.
    138     while sleep 1 ; do
    139         [ "$(ls -A "$SYSDIR" )" ] && for service in "$SYSDIR"/* ; do
    140             [ -x "$service" ] || error "$service is not an executable file"
    141             ! [ -d "$RUNDIR/${service##*/}" ] && runsyssv "$service" &
    142         done
    143     done
    144 }
    145 
    146 fn_runsyssv() {
    147 
    148     [ "$1" ] || usage
    149     case "$1" in -h|--help|help) usage ;; esac
    150 
    151     # Record service name in a variable
    152     service="$1"
    153 
    154     # This is the simplest way of checking whether a service is running (or
    155     # killed by the user with ctl, so that it does not run again).
    156     [ -e "$RUNDIR/${service##*/}" ] && exit 1
    157 
    158     # Create the run directory for the service where we will be adding the pid
    159     # value when we start the process.
    160     mkdir -p "$RUNDIR/${service##*/}"
    161 
    162     # Start the service script. If the service fails exit with failure code 1.
    163     # If the service exits without a failure (which it probably shouldn't) exit
    164     # with code 0.
    165     "$service" &
    166     svpid="$!"
    167     log "$svpid" > "$RUNDIR/${service##*/}/pid"
    168     log "$$"     > "$RUNDIR/${service##*/}/syspid"
    169 
    170     for sig in INT HUP QUIT ABRT TERM ; do
    171         # We want to trap every signal with their own value so that we kill the
    172         # service with the requested signal.
    173         # shellcheck disable=SC2064
    174         trap "redirectsignal $sig" $sig
    175     done
    176 
    177     trap "redirectsignal KILL" USR1
    178 
    179     # Wait until service goes down.
    180     wait
    181 
    182     # Do a cleanup when the service is killed.
    183     cleanup
    184 }
    185 
    186 fn_svctl() {
    187     # Check if the RUNDIR exists
    188     [ -d "$RUNDIR" ] || die "$RUNDIR could not be found, are you sure sysmgr is running?"
    189 
    190     # Check that there are at least two arguments, get the first argument into a
    191     # job variable.
    192     [ "$2" ] || usage; job="$1"; shift
    193     for service; do
    194 
    195         # We get the basename of the service, so that it allows a user to do
    196         # operations such as
    197         #
    198         #     'svctl status /var/sysmgr/*'
    199         #
    200         # and get a status list of all available services.
    201         service=${service##*/}
    202 
    203         # This will retrieve the process id from the service directory.
    204         getpid "$service" 2>/dev/null ||:
    205 
    206         case "$job" in
    207             stat|status)
    208                 if checkprocess "$pid"; then
    209                     out "$service: OK"
    210                 else
    211                     out "$service: DOWN"
    212                 fi
    213                 ;;
    214             restart)
    215                 fn_svctl kill "$service"
    216                 fn_svctl start "$service"
    217                 ;;
    218             kill)
    219                 log KILL > "$RUNDIR/$service/lock"
    220                 kill -USR1 "$pid"
    221                 ;;
    222             down|stop)
    223                 log TERM > "$RUNDIR/$service/lock"
    224                 kill -TERM "$pid"
    225                 ;;
    226             up|start)
    227                 checkprocess "$pid" || rm -rf -- "${RUNDIR:?}/$service" ;;
    228             once)
    229                 # This will place a lockfile upon start, so sysmgr will not
    230                 # attempt to restart it, if it goes down.
    231                 fn_svctl start "$service"
    232                 sleep 1
    233                 log once > "$RUNDIR/$service/lock"
    234                 ;;
    235             *)  usage ;;
    236         esac
    237     done
    238 }
    239 
    240 main() {
    241     RUNDIR=${RUNDIR:-/run/sysmgr} SYSDIR=${SYSDIR:-/var/sysmgr}
    242     version=0.3.0
    243 
    244     # Call the appropriate function depending on the name of the program.
    245     case "${0##*/}" in
    246         sysmgr|runsyssv|svctl) "fn_${0##*/}" "$@" ;;
    247         *) out "${0##*/} is not a sysmgr function" ; exit 1 ; esac
    248 }
    249 
    250 main "$@"