source: tags/ms_r16q3/SH/arb_launcher

Last change on this file was 14509, checked in by westram, 8 years ago
  • fix for #492
    • call pt- and name-servers as daemons, i.e.
      • do not forward any kill signals
      • remove them from the joblist
      • redirect their output into a logfile
    • this is only done if arb is called from launcher
      • trigger is envar ARB_SERVER_LOG (contains the name of the logfile used for daemon output)
      • logfile gets tailed to console; tail is killed when launcher terminates ⇒ doesnt block
  • Property svn:executable set to *
File size: 12.3 KB
Line 
1#!/bin/bash
2
3set -u
4
5trace() {
6    local MSG=$1
7    echo "[arb_launcher[${ARB_LAUNCHER:-}]: $1]"
8}
9debug() {
10    local MSG=$1
11    # to debug uncomment next line:
12    # trace "DEBUG: $MSG"
13}
14
15send_to_launcher() {
16    local NAMED_PIPE=$1
17    local CMD=$2
18
19    debug "Sending '$CMD' to $NAMED_PIPE"
20    echo "$CMD" >$NAMED_PIPE
21    sleep 1
22    debug "send_to_launcher terminates"
23}
24
25pipe_command() {
26    local NAMED_PIPE=$1; shift
27    local CMD=$1; shift
28    local LOGDIR=${1:-}; shift # LOGDIR may be empty/undef -> dont signal crash
29
30    trace "Starting '$CMD'.."
31    $CMD
32    local EXITCODE=${PIPESTATUS[0]}
33    if [ $EXITCODE == 0 ]; then
34        trace "'$CMD' has terminated with success"
35    else
36        trace "'$CMD' has terminated with error $EXITCODE"
37        if [ -n "$LOGDIR" ]; then
38            if [ $EXITCODE = 1 ]; then
39                touch $LOGDIR/failed
40            else
41                touch $LOGDIR/crashed
42            fi
43        fi
44    fi
45
46    send_to_launcher $NAMED_PIPE 'cmd_terminated'
47    debug "pipe_command terminates"
48}
49
50read_line() {
51    local NAMED_PIPE=$1
52    local LINE=""
53
54    if read ATTEMPT <$NAMED_PIPE; then
55        LINE=$ATTEMPT
56    fi
57    echo $LINE
58}
59
60listen_pipe_unlogged() {
61    local NAMED_PIPE=$1; shift
62    local LOGDIR=${1:-}; shift # LOGDIR may be empty/undef -> dont log
63    local RUNNING=1
64    local STARTED=0
65    # RUNNING is set to 1 (otherwise listen_pipe would terminate instantly)
66
67    while (($RUNNING > 0))
68      do
69      LINE=`read_line $NAMED_PIPE 2>/dev/null`
70      if [[ ! -z "$LINE" ]]; then
71          debug "'$NAMED_PIPE' received '$LINE'"
72          if [[ "$LINE" == 'TERMINATE' ]]; then
73              trace "Received request to TERMINATE"
74              break;
75          else
76              if [[ "$LINE" == 'cmd_terminated' ]]; then
77                  RUNNING=$(($RUNNING - 1))
78                  if (($RUNNING>0)); then
79                      trace "Still have $RUNNING arb processes.."
80                  fi
81              else
82                  if [[ "$LINE" == 'allow_termination' ]]; then
83                      RUNNING=$(($RUNNING - 1))
84                  else
85                      pipe_command $NAMED_PIPE "$LINE" $LOGDIR &
86                      RUNNING=$(($RUNNING + 1))
87                      STARTED=$(($STARTED + 1))
88                      debug "RUNNING=$RUNNING"
89                      debug "STARTED=$STARTED"
90                  fi
91              fi
92          fi
93      fi
94    done
95
96    if (($RUNNING==0)); then
97        if (($STARTED>0)); then
98            trace "All launched processes terminated"
99        else
100            trace "Nothing was ever launched"
101        fi
102    else
103        trace "Still have $RUNNING arb-processes - terminating nevertheless"
104    fi
105
106    debug "listen_pipe_unlogged waits for subshells ..."
107    wait
108
109    trace "cleaning up arb session"
110    arb_clean show_session
111    arb_clean session
112
113    debug "listen_pipe_unlogged terminates"
114}
115
116shared_library_dependencies() {
117    case `uname` in
118        Linux)
119            LIST_DYNLIBS="ldd"
120            BINARIES="bin/arb_ntree lib/libARBDB.so lib/libCORE.so lib/libWINDOW.so"
121            ;;
122        Darwin)
123            LIST_DYNLIBS="otool -L"
124            # Darwin ARB links internal stuff static
125            BINARIES="bin/arb_ntree"
126            ;;
127        *)
128            LIST_DYNLIBS="echo UNSUPPORTED_OS "
129            ;;
130    esac
131    for binary in $BINARIES; do
132        echo -e "Library dependencies for $ARBHOME/$binary:"
133        $LIST_DYNLIBS $ARBHOME/$binary
134    done   
135}
136
137wrapped_info() {
138    local TAG=$1; shift
139    local CMD=$1; shift
140    echo "--------------------"
141    echo "[$TAG start]"
142    eval $CMD
143    echo "[$TAG end]"
144    echo ""
145}
146
147collect_system_information() {
148    echo "System information"
149    echo ""
150    echo "The information below has been collected by ARB."
151    echo "Please do not publish without being aware that it might contain personal information."
152    echo ""
153
154    local ARB_RELEVANT="| grep -i ARB"
155
156    wrapped_info "version" "$ARBHOME/bin/arb_ntree --help"
157    wrapped_info "environment" "printenv $ARB_RELEVANT"
158    wrapped_info "OS" "lsb_release -a"
159    wrapped_info "kernel" "uname -mrs ; uname -a ; cat /proc/version"
160    wrapped_info "shared libraries" "shared_library_dependencies"
161    wrapped_info "disk" "df -h"
162    wrapped_info "memory" "free -m ; cat /proc/meminfo"
163    wrapped_info "user limits" "ulimit -a"
164    wrapped_info "ARB processes" "ps aux $ARB_RELEVANT"
165    wrapped_info "KDE desktop version" "konqueror --version"
166    wrapped_info "Gnome desktop version" "gnome-panel --version"
167    wrapped_info "CPU" "cat /proc/cpuinfo"
168    wrapped_info "X server" "xdpyinfo"
169    # wrapped_info "X" "Y"
170}
171
172erase_old_logs() {
173    local LOGBASE=$1
174    if [ -d "$LOGBASE" ]; then
175        # remove files older than 7 days inside and below LOGBASE
176        local OLD=$(( 60 * 24 * 7 ))
177        find $LOGBASE -type f -cmin +$OLD -exec rm {} \;
178        # remove empty directories inside and below LOGBASE
179        find $LOGBASE -type d -depth -empty -mindepth 1 -exec rmdir {} \;
180    fi
181}
182
183listen_pipe() {
184    # this is just a wrapper around listen_pipe_unlogged.
185    # wrapper performs ARB session logging
186    local NAMED_PIPE=$1
187
188    if [ -z ${ARB_PROP:-} ]; then
189        # should never come here, if arb has been started via script 'arb'
190        # (e.g. happens when arb_ntree was started from debugger and then 'start second database' has been called)
191        listen_pipe_unlogged $NAMED_PIPE
192    else
193        local LOGBASE=$ARB_PROP/logs
194        local LOGDIRID=`date '+%Y%m%d_%H%M%S'`.$$
195        local LOGDIR=$LOGBASE/$LOGDIRID
196        local NTREE_STATUS=
197
198        mkdir -p $LOGDIR
199
200        if [ -d "$LOGDIR" ]; then
201            local RUNLOG=$LOGDIR/run.log
202            local SERVERLOG=$LOGDIR/server.log
203            local SYSLOG=$LOGDIR/sys.info
204            local CRASHFLAG=$LOGDIR/crashed
205            local FAILFLAG=$LOGDIR/failed
206
207            # tell arb to start servers as logging daemons
208            export ARB_SERVER_LOG=$SERVERLOG
209            echo "`date` arb server.log created by arb_launcher" > $SERVERLOG
210
211            # forward server output to launcher-tty (non-blocking)
212            tail -f $SERVERLOG &
213            local TAILPID=$!
214
215            ( ( collect_system_information 2>&1 ) > $SYSLOG ; erase_old_logs $LOGBASE ) &
216            ( listen_pipe_unlogged $NAMED_PIPE $LOGDIR ) 2>&1 | tee $RUNLOG
217
218            if [ -e $CRASHFLAG ]; then
219                # only detects crashes of arb_ntree
220                # (clients are not started via arb_launcher and they usually crash when server exits)
221                NTREE_STATUS=crash
222            else
223                if [ -e $FAILFLAG ]; then
224                    NTREE_STATUS=fail
225                fi
226            fi
227
228            if [ "$NTREE_STATUS" != "" ]; then
229                echo "abnormal termination (NTREE_STATUS='$NTREE_STATUS')" >> $RUNLOG
230            else
231                echo "normal termination" >> $RUNLOG
232            fi
233
234            local TARBALLNAME=session.$LOGDIRID.tgz
235
236            debug "killing tail on server-log (pid=$TAILPID)"
237            kill ${TAILPID}
238
239            echo "`date` arb_launcher terminates now. leftover servers may continue logging into this file" >> $SERVERLOG
240            echo "`date` End of log (now archive into $LOGBASE/$TARBALLNAME)" >> $RUNLOG
241
242            ( cd $LOGBASE ; tar -zcf $TARBALLNAME $LOGDIRID )
243            rm -f $RUNLOG $SYSLOG $CRASHFLAG $FAILFLAG
244            rmdir $LOGDIR
245
246            local FULLTARBALL=$LOGBASE/$TARBALLNAME
247            echo ""
248            echo "Session log has been stored in $FULLTARBALL"
249
250            local LATESTLINK=~/ARB_last_session.tgz
251            if [ -h $LATESTLINK ]; then
252                rm $LATESTLINK
253            fi
254            if [ -e $LATESTLINK ]; then
255                echo "$LATESTLINK already exists and is no symlink"
256            else
257                (cd ~; ln -s $FULLTARBALL $LATESTLINK )
258                echo "    and is also accessible via $LATESTLINK"
259            fi
260
261            if [ "$NTREE_STATUS" != "" ]; then
262                echo ""
263                if [ $NTREE_STATUS = "crash" ]; then
264                    echo "ARB crashed :-("
265                    echo "To report this goto http://bugs.arb-home.de/wiki/BugReport"
266                    echo "Please include the session log(s) mentioned above!"
267                    echo ""
268                else
269                    echo "ARB terminated abnormally"
270                fi
271                echo "[press ENTER]"
272                read A
273            fi
274
275            true
276        else
277            echo "Error creating directory '$LOGDIR'"
278            false
279        fi
280    fi
281}
282
283killtree() {
284    local _pid=$1
285    local _sig=${2:-TERM}
286
287    debug "killtree pid=${_pid} with sig=${_sig} pid=$$"
288    kill -stop ${_pid} # stop quickly forking parent from producing childs
289    killchilds ${_pid} ${_sig}
290    kill ${_sig} ${_pid}
291}
292killchilds() {
293    local _pid=$1
294    local _sig=${2:-TERM}
295
296    debug "killchilds pid=${_pid} with sig=${_sig} pid=$$"
297    for _child in $(ps -o pid --no-headers --ppid ${_pid}); do
298        killtree ${_child} ${_sig}
299    done
300}
301
302term_handler() {
303    local NAMED_PIPE=$1
304
305    trace "Killing ARB session for ARB_PID=$ARB_PID"
306    arb_clean session
307    debug "arb_clean done - now killing process tree"
308    killchilds $$ -TERM
309    debug "killchilds done - exiting $$"
310    exit
311}
312
313create_pipe_reader() {
314    local NAMED_PIPE=$1
315    local PARENT_PID=$2
316
317    if [ -z "${ARB_LAUNCHER:-}" ]; then
318        export ARB_LAUNCHER=0
319    else
320        export ARB_LAUNCHER=$(($ARB_LAUNCHER+1))
321    fi
322
323    debug "Creating named pipe '$NAMED_PIPE'"
324
325    # (i did not manage to recover from SIGINT w/o termination of listen_pipe)
326    # => disable SIGINT handler
327    trap '' INT
328    trap "term_handler $NAMED_PIPE" TERM
329    trap "rm -f $NAMED_PIPE" EXIT
330
331    { mkfifo -m 600 $NAMED_PIPE && listen_pipe $NAMED_PIPE ; debug "listen_pipe done" ; } || \
332      { echo "Error creating pipe '$NAMED_PIPE'" ; kill $PARENT_PID ; }
333
334    debug "Pipe reader for '$NAMED_PIPE' terminates.."
335    rm -f $NAMED_PIPE
336    debug "Pipe '$NAMED_PIPE' removed"
337}
338
339initial_send_to_launcher() {
340    local NAMED_PIPE=$1
341    local CMD=$2
342
343    send_to_launcher $NAMED_PIPE "$CMD"
344
345    # now allow pipe reader to terminate:
346    send_to_launcher $NAMED_PIPE "allow_termination"
347}
348
349wait_for_pipe() {
350    local NAMED_PIPE=$1
351
352    while [[ ! -p $NAMED_PIPE ]];
353      do
354      echo "Waiting for '$NAMED_PIPE'.."
355      sleep 1
356    done
357    debug "pipe is open"
358}
359
360get_pipe_name() {
361    local SOCKETDIR="$HOME/.arb_tmp/sockets"
362    mkdir -p "$SOCKETDIR"
363    chmod 0700 "$SOCKETDIR"
364    echo "$SOCKETDIR/arb_launcher.$ARB_PID"
365
366    # instead of the above code, use the following to test a pipe-creation failure:
367    # echo "/arb_launcher.$ARB_PID"
368}
369
370launcher() {
371    local ASYNC=0
372    if [ "$1" = "--async" ]; then
373        ASYNC=1
374        shift
375    fi
376    local CMD="$*"
377
378    if [ -z "$ARB_PID" ]; then
379        echo "Error: environment variable ARB_PID is unset. terminating.."
380        false
381    else
382        if [ -z "$1" ]; then
383            echo "Usage: arb_launcher \"shellcommand\""
384            echo ""
385            echo "          runs 'shellcommand'"
386            echo "          "
387            echo "          The initial call to arb_launcher will block until 'shellcommand' terminates."
388            echo ""
389            echo "          Subsequent calls will not block. They are started from the context of the"
390            echo "          initial call. The initial call will wait for all started commands."
391            echo ""
392            echo "       arb_launcher \"TERMINATE\""
393            echo ""
394            echo "          terminate the launcher without waiting for spawned commands."
395            echo ""
396        else
397            debug "Using ARB_PID '$ARB_PID'"
398            local NAMED_PIPE=$(get_pipe_name)
399            debug "Using NAMED_PIPE '$NAMED_PIPE'"
400
401            if [[ ! -p $NAMED_PIPE ]]; then
402                ( wait_for_pipe $NAMED_PIPE ; initial_send_to_launcher $NAMED_PIPE "$CMD" ) &
403                if (( $ASYNC==1 )); then
404                    create_pipe_reader $NAMED_PIPE $$ &
405                else
406                    create_pipe_reader $NAMED_PIPE $$
407                fi
408            else
409                debug "pipe already was open"
410                send_to_launcher $NAMED_PIPE "$CMD"
411            fi
412
413            # if pipe-reader was started from current process
414            # -> blocks until all launched processes have terminated
415            if (( $ASYNC==0 )); then
416                wait
417            fi
418        fi
419    fi
420}
421
422launcher "$@"
423debug "arb_launcher exits!"
424
Note: See TracBrowser for help on using the repository browser.