sysmgr (8885B)
1 #!/bin/sh -e 2 3 ################################################################################ 4 # A modular system-supervisor written in POSIX shell written with # 5 # Carbs Linux[1] and KISS Linux[2] in mind. # 6 # # 7 # [1]: https://carbslinux.org # 8 # [2]: https://k1ss.org # 9 # # 10 # Copyright (c) 2020 - Cem Keylan # 11 # See LICENSE for copyright information # 12 # # 13 # Please report bugs to <cem at ckyln dot com> or open an issue at # 14 # https://github.com/cemkeylan/sysmgr # 15 ################################################################################ 16 17 18 log() { printf '%s\n' "$@" ;} 19 out() { log "$@" >&2 ;} 20 error() { printf 'error: %s\n' "$@" >&2 ;} 21 die() { error "$@" "exiting..." ; exit 1;} 22 23 usage() { 24 # Define a different out function just for the usage function. 25 out() { 26 # This prints the synopsis, adds an empty line, prints arguments, 27 # adds another empty line and prints version information. 28 SYNOPSIS="$1"; shift 29 log "Usage: $SYNOPSIS" "" "$@" "" "sysmgr-$version" >&2 30 } 31 case "${0##*/}" in 32 sysmgr) out "sysmgr" "See sysmgr(8) for detailed information." ;; 33 runsyssv) out "runsyssv [service]" ;; 34 svctl) out "svctl [command] [service...]" \ 35 "start/stop/restart Start/stop/restart services" \ 36 "kill Send a SIGKILL to services" \ 37 "once Start services once" \ 38 "status Check service statuses" \ 39 "up/down Same as start/stop" 40 esac 41 exit 0 42 } 43 44 checkprocess() { 45 # If no arguments are given, do not continue. 46 # This may happen when getpid returns empty. 47 [ "$1" ] || return 1 48 49 # This function checks if the process is still alive and returns 0 or 1 50 # accordingly. It checks the /proc directory first (which exists in most 51 # Linux systems, but not all) and fallbacks to kill if it doesn't. There are 52 # three reasons for preferring /proc over kill: 53 # 54 # 1: We don't need to spawn an extra process. 55 # 2: We can check processes we don't own, which eliminates the requirement 56 # of being root. 57 # 3: Checking the existence of a directory is much faster and more efficient 58 # than sending a signal to a process. 59 # 60 # However, this isn't the portable way, that's why we fallback to kill(1). 61 [ -d "/proc/$1" ] || kill -0 "$1" 62 } 63 64 cleanup() { 65 # Clean the service run directory so that it can be restarted. Do not remove 66 # the run directory if lock file exists. 67 rm -f -- "$RUNDIR/${service##*/}/pid" "${RUNDIR}/${service##*/}/syspid" 68 69 [ -e "$RUNDIR/${service##*/}/lock" ] && return 70 rm -rf -- "${RUNDIR:?}/${service##*/}" 71 72 } 73 74 term() { 75 # This function is executed when the sysmgr receives an interrupt or a 76 # hangup signal. It enters the termination state where it forwards SIGTERM 77 # to every other runsyssv process that have their process ids in the RUNDIR. 78 79 for process in "$RUNDIR"/*/syspid ; do 80 read -r pid < "$process" 81 kill -15 "$pid" 2>/dev/null 82 done 83 84 # Wait for the redirections to happen 85 sleep 1 86 87 # Remove the RUNDIR so we can do a fresh start when we are re-initiating the 88 # program. 89 rm -rf -- "${RUNDIR}" 90 91 exit 0 92 } 93 94 getpid() { 95 # This is a function to retrieve the pid from the RUNDIR 96 unset pid 97 98 [ -f "$RUNDIR/$1/${2:-pid}" ] || 99 { error "pid file for $1 could not be found" ; return 1 ;} 100 read -r pid < "$RUNDIR/$1/${2:-pid}" 101 } 102 103 redirectsignal() { 104 105 # We redirect signal that was sent to runsyssv so that those programs are 106 # stopped with the exact kill command. Adding a lock file ensures that the 107 # directory is not cleaned up. 108 sig="$1" 109 110 log "${sig:-TERM}" > "${RUNDIR:?}/${service##*/}/lock" 111 kill "-${sig:-TERM}" "$svpid" 2>/dev/null 112 113 } 114 115 fn_sysmgr() { 116 [ "$1" ] && usage 117 118 # Start sanity checks. We first check that we have the "$SYSDIR" variable. 119 # We then check whether the given SYSDIR exists, and has service files 120 # installed. 121 [ "$SYSDIR" ] || die "Please specify service directory" 122 [ -d "$SYSDIR" ] || die "$SYSDIR does not exist." 123 [ "$(ls -1 "$SYSDIR")" ] || error "No service file is found" 124 mkdir -p "$RUNDIR" || die 125 126 # Add pid to $RUNDIR before starting loops 127 log "$$" > "$RUNDIR/pid" 128 129 # We redirect signals to the 'term' function so that we send kill signals to 130 # all sysmgr processes. 131 trap term INT HUP QUIT ABRT TERM 132 133 # Lots of loops here. The first while loop is to make sure that the sysmgr 134 # does not exist. The for loop is to run every single service on the 135 # $SYSDIR. We then fork the runsyssv function to the background. This 136 # ensures that we don't have to wait until runsyssv has finished, which is a 137 # program that is not supposed to exit. 138 while sleep 1 ; do 139 [ "$(ls -A "$SYSDIR" )" ] && for service in "$SYSDIR"/* ; do 140 [ -x "$service" ] || error "$service is not an executable file" 141 ! [ -d "$RUNDIR/${service##*/}" ] && runsyssv "$service" & 142 done 143 done 144 } 145 146 fn_runsyssv() { 147 148 [ "$1" ] || usage 149 case "$1" in -h|--help|help) usage ;; esac 150 151 # Record service name in a variable 152 service="$1" 153 154 # This is the simplest way of checking whether a service is running (or 155 # killed by the user with ctl, so that it does not run again). 156 [ -e "$RUNDIR/${service##*/}" ] && exit 1 157 158 # Create the run directory for the service where we will be adding the pid 159 # value when we start the process. 160 mkdir -p "$RUNDIR/${service##*/}" 161 162 # Start the service script. If the service fails exit with failure code 1. 163 # If the service exits without a failure (which it probably shouldn't) exit 164 # with code 0. 165 "$service" & 166 svpid="$!" 167 log "$svpid" > "$RUNDIR/${service##*/}/pid" 168 log "$$" > "$RUNDIR/${service##*/}/syspid" 169 170 for sig in INT HUP QUIT ABRT TERM ; do 171 # We want to trap every signal with their own value so that we kill the 172 # service with the requested signal. 173 # shellcheck disable=SC2064 174 trap "redirectsignal $sig" $sig 175 done 176 177 trap "redirectsignal KILL" USR1 178 179 # Wait until service goes down. 180 wait 181 182 # Do a cleanup when the service is killed. 183 cleanup 184 } 185 186 fn_svctl() { 187 # Check if the RUNDIR exists 188 [ -d "$RUNDIR" ] || die "$RUNDIR could not be found, are you sure sysmgr is running?" 189 190 # Check that there are at least two arguments, get the first argument into a 191 # job variable. 192 [ "$2" ] || usage; job="$1"; shift 193 for service; do 194 195 # We get the basename of the service, so that it allows a user to do 196 # operations such as 197 # 198 # 'svctl status /var/sysmgr/*' 199 # 200 # and get a status list of all available services. 201 service=${service##*/} 202 203 # This will retrieve the process id from the service directory. 204 getpid "$service" 2>/dev/null ||: 205 206 case "$job" in 207 stat|status) 208 if checkprocess "$pid"; then 209 out "$service: OK" 210 else 211 out "$service: DOWN" 212 fi 213 ;; 214 restart) 215 fn_svctl kill "$service" 216 fn_svctl start "$service" 217 ;; 218 kill) 219 log KILL > "$RUNDIR/$service/lock" 220 kill -USR1 "$pid" 221 ;; 222 down|stop) 223 log TERM > "$RUNDIR/$service/lock" 224 kill -TERM "$pid" 225 ;; 226 up|start) 227 checkprocess "$pid" || rm -rf -- "${RUNDIR:?}/$service" ;; 228 once) 229 # This will place a lockfile upon start, so sysmgr will not 230 # attempt to restart it, if it goes down. 231 fn_svctl start "$service" 232 sleep 1 233 log once > "$RUNDIR/$service/lock" 234 ;; 235 *) usage ;; 236 esac 237 done 238 } 239 240 main() { 241 RUNDIR=${RUNDIR:-/run/sysmgr} SYSDIR=${SYSDIR:-/var/sysmgr} 242 version=0.3.0 243 244 # Call the appropriate function depending on the name of the program. 245 case "${0##*/}" in 246 sysmgr|runsyssv|svctl) "fn_${0##*/}" "$@" ;; 247 *) out "${0##*/} is not a sysmgr function" ; exit 1 ; esac 248 } 249 250 main "$@"