sysmgr

a simplistic service supervisor (deprecated)
git clone git://git.ckyln.com/~cem/sysmgr.git
Log | Files | Refs | README | LICENSE

sysmgr (8841B)


      1 #!/bin/sh -e
      2 
      3 ################################################################################
      4 # A modular system-supervisor written in POSIX shell written with              #
      5 # Carbs Linux[1] and KISS Linux[2] in mind.                                    #
      6 #                                                                              #
      7 # [1]: https://carbslinux.org                                                  #
      8 # [2]: https://k1ss.org                                                        #
      9 #                                                                              #
     10 # Copyright (c) 2020 - Cem Keylan                                              #
     11 # Distributed under the terms of GNU GPLv3                                     #
     12 #                                                                              #
     13 # Please report bugs to <cem at ckyln dot com> or open an issue at             #
     14 # https://github.com/cemkeylan/sysmgr                                          #
     15 ################################################################################
     16 
     17 
     18 log()   { printf '%s\n' "$@" ;}
     19 out()   { log "$@" >&2 ;}
     20 error() { printf 'error: %s\n' "$@" >&2 ;}
     21 die()   { error "$@" "exiting..." ; exit 1;}
     22 
     23 usage() {
     24     # Define a different out function just for the usage function.
     25     out() {
     26         # This prints the synopsis, adds an empty line, prints arguments,
     27         # adds another empty line and prints version information.
     28         SYNOPSIS="$1"; shift
     29         log "Usage: $SYNOPSIS" "" "$@" "" "sysmgr-$version" >&2
     30     }
     31     case "${0##*/}" in
     32         sysmgr)   out "sysmgr" "See sysmgr(8) for detailed information." ;;
     33         runsyssv) out "runsyssv [service]" ;;
     34         svctl)    out "svctl [command] [service...]" \
     35                       "start/stop/restart  Start/stop/restart services" \
     36                       "kill                Send a SIGKILL to services" \
     37                       "once                Start services once" \
     38                       "status              Check service statuses" \
     39                       "up/down             Same as start/stop"
     40     esac
     41     exit 0
     42 }
     43 
     44 checkprocess() {
     45     # If no arguments are given, do not continue.
     46     # This may happen when getpid returns empty.
     47     [ "$1" ] || return 1
     48 
     49     # This function checks if the process is still alive and returns 0 or 1
     50     # accordingly. It checks the /proc directory first (which exists in most
     51     # Linux systems, but not all) and fallbacks to kill if it doesn't. There are
     52     # three reasons for preferring /proc over kill:
     53     #
     54     # 1: We don't need to spawn an extra process.
     55     # 2: We can check processes we don't own, which eliminates the requirement
     56     #    of being root.
     57     # 3: Checking the existence of a directory is much faster and more efficient
     58     #    than sending a signal to a process.
     59     #
     60     # However, this isn't the portable way, that's why we fallback to kill(1).
     61     [ -d "/proc/$1" ] || kill -0 "$1"
     62 }
     63 
     64 cleanup() {
     65 
     66     # Clean the service run directory so that it can be restarted. Do not remove
     67     # the run directory if lock file exists.
     68 
     69     rm -f -- "$RUNDIR/${service##*/}/pid" "${RUNDIR}/${service##*/}/syspid"
     70 
     71     [ -e "$RUNDIR/${service##*/}/lock" ] && return
     72     rm -rf -- "${RUNDIR:?}/${service##*/}"
     73 
     74 }
     75 
     76 term() {
     77 
     78     # This function is executed when the sysmgr receives an interrupt or a
     79     # hangup signal. It enters the termination state where it forwards SIGTERM
     80     # to every other runsyssv process that have their process ids in the RUNDIR.
     81 
     82     for process in "$RUNDIR"/*/syspid ; do
     83         read -r pid < "$process"
     84         kill -15 "$pid" 2>/dev/null
     85     done
     86 
     87     # Wait for the redirections to happen
     88     sleep 1
     89 
     90     # Remove the RUNDIR so we can do a fresh start when we are re-initiating the
     91     # program.
     92     rm -rf -- "${RUNDIR}"
     93 
     94     exit 0
     95 }
     96 
     97 getpid() {
     98     # This is a function to retrieve the pid from the RUNDIR
     99     unset pid
    100 
    101     [ -f "$RUNDIR/$1/${2:-pid}" ] ||
    102         { error "pid file for $1 could not be found" ; return 1 ;}
    103     read -r pid < "$RUNDIR/$1/${2:-pid}"
    104 }
    105 
    106 redirectsignal() {
    107 
    108     # We redirect signal that was sent to runsyssv so that those programs are
    109     # stopped with the exact kill command. Adding a lock file ensures that the
    110     # directory is not cleaned up.
    111     sig="$1"
    112 
    113     log "${sig:-TERM}" > "${RUNDIR:?}/${service##*/}/lock"
    114     kill "-${sig:-TERM}" "$svpid" 2>/dev/null
    115 
    116 }
    117 
    118 fn_sysmgr() {
    119     [ "$1" ] && usage
    120 
    121     # Start sanity checks. We first check that we have the "$SYSDIR" variable.
    122     # We then check whether the given SYSDIR exists, and has service files
    123     # installed.
    124     [ "$SYSDIR" ] || die "Please specify service directory"
    125     [ -d "$SYSDIR" ] || die "$SYSDIR does not exist."
    126     [ "$(ls -1 "$SYSDIR")" ] || error "No service file is found"
    127     mkdir -p "$RUNDIR" || die
    128 
    129     # Add pid to $RUNDIR before starting loops
    130     log "$$" > "$RUNDIR/pid"
    131 
    132     # We redirect signals to the 'term' function so that we send kill signals to
    133     # all sysmgr processes.
    134     trap term INT HUP QUIT ABRT TERM
    135 
    136     # Lots of loops here. The first while loop is to make sure that the sysmgr
    137     # does not exist. The for loop is to run every single service on the
    138     # $SYSDIR. We then fork the runsyssv function to the background. This
    139     # ensures that we don't have to wait until runsyssv has finished, which is a
    140     # program that is not supposed to exit.
    141     while sleep 1 ; do
    142         [ "$(ls -A "$SYSDIR" )" ] && for service in "$SYSDIR"/* ; do
    143             [ -x "$service" ] || error "$service is not an executable file"
    144             ! [ -d "$RUNDIR/${service##*/}" ] && runsyssv "$service" &
    145         done
    146     done
    147 }
    148 
    149 fn_runsyssv() {
    150 
    151     [ "$1" ] || usage
    152     case "$1" in -h|--help|help) usage ;; esac
    153 
    154     # Record service name in a variable
    155     service="$1"
    156 
    157     # This is the simplest way of checking whether a service is running (or
    158     # killed by the user with ctl, so that it does not run again).
    159     [ -e "$RUNDIR/${service##*/}" ] && exit 1
    160 
    161     # Create the run directory for the service where we will be adding the pid
    162     # value when we start the process.
    163     mkdir -p "$RUNDIR/${service##*/}"
    164 
    165     # Start the service script. If the service fails exit with failure code 1.
    166     # If the service exits without a failure (which it probably shouldn't) exit
    167     # with code 0.
    168     "$service" &
    169     svpid="$!"
    170     log "$svpid" > "$RUNDIR/${service##*/}/pid"
    171     log "$$"     > "$RUNDIR/${service##*/}/syspid"
    172 
    173     for sig in INT HUP QUIT ABRT TERM ; do
    174         # We want to trap every signal with their own value so that we kill the
    175         # service with the requested signal.
    176         # shellcheck disable=SC2064
    177         trap "redirectsignal $sig" $sig
    178     done
    179 
    180     # Wait until service goes down.
    181     wait
    182 
    183     # Do a cleanup when the service is killed.
    184     cleanup
    185 }
    186 
    187 fn_svctl() {
    188     # Check if the RUNDIR exists
    189     [ -d "$RUNDIR" ] || die "$RUNDIR could not be found, are you sure sysmgr is running?"
    190 
    191     # Check that there are at least two arguments, get the first argument into a
    192     # job variable.
    193     [ "$2" ] || usage; job="$1"; shift
    194     for service; do
    195 
    196         # We get the basename of the service, so that it allows a user to do
    197         # operations such as
    198         #
    199         #     'svctl status /var/sysmgr/*'
    200         #
    201         # and get a status list of all available services.
    202         service=${service##*/}
    203 
    204         # This will retrieve the process id from the service directory.
    205         getpid "$service" 2>/dev/null ||:
    206 
    207         case "$job" in
    208             stat|status)
    209                 if checkprocess "$pid"; then
    210                     out "$service: OK"
    211                 else
    212                     out "$service: DOWN"
    213                 fi
    214                 ;;
    215             restart)
    216                 fn_svctl kill "$service"
    217                 fn_svctl start "$service"
    218                 ;;
    219             kill)
    220                 log 9 > "$RUNDIR/$service/lock"
    221                 kill -9 "$pid"
    222                 ;;
    223             down|stop)
    224                 log 15 > "$RUNDIR/$service/lock"
    225                 kill -15 "$pid"
    226                 ;;
    227             up|start)
    228                 checkprocess "$pid" || rm -rf -- "${RUNDIR:?}/$service" ;;
    229             once)
    230                 # This will place a lockfile upon start, so sysmgr will not
    231                 # attempt to restart it, if it goes down.
    232                 fn_svctl start "$service"
    233                 sleep 1
    234                 log once > "$RUNDIR/$service/lock"
    235                 ;;
    236             *)  usage ;;
    237         esac
    238     done
    239 }
    240 
    241 main() {
    242     RUNDIR=${RUNDIR:-/run/sysmgr} SYSDIR=${SYSDIR:-/var/sysmgr}
    243     version=0.3.0
    244 
    245     # Call the appropriate function depending on the name of the program.
    246     case "${0##*/}" in
    247         sysmgr|runsyssv|svctl) "fn_${0##*/}" "$@" ;;
    248         *) out "${0##*/} is not a sysmgr function" ; exit 1 ; esac
    249 }
    250 
    251 main "$@"