================================================================================================================================ my @cmd = ( $$config{RSYNC}, "--archive", "--delete", "--rsh=/usr/bin/ssh -o BatchMode=yes", ); push @cmd, "--filter=$_" foreach @{$$servers{$id}{filters}}; push @cmd, $src; push @cmd, $$servers{$id}{backupdir}; my $io_select = IO::Select->new(); my ($stdin, $stdout, $stderr) = (gensym, gensym, gensym); my $pid = open3($stdin, $stdout, $stderr, @cmd); $io_select->add($stdout, $stderr); dprint("rsync $pid running for " . $$servers{$id}{hostname}, 3); my $wpid = 0; my $process_output = ""; while ($wpid == 0) { foreach my $handle ($io_select->can_read(0)) { $process_output .= " $_" while (<$handle>); } dprint("waiting for rsync $pid, " . $$servers{$id}{hostname}, 4); $wpid = waitpid(-1, WNOHANG); sleep 1; } if (kill 0, $pid) { dprint("system waitpid() returned while process $pid still active!", 0); $wpid = 0; waitpid($pid, 0); dprint("failsafe waidpid() returned.", 0); } my $exitval = ($? >> 8); dprint("rsync $pid for " . $$servers{$id}{hostname} . " exit value $exitval", 3); # get residual output from process foreach my $handle ($io_select->can_read(0)) { $process_output .= " $_" while (<$handle>); } if (($exitval != 0) and ($exitval != 23) and ($exitval != 24)) { $errors{$id} = "rsync exit value $exitval\n$process_output"; } ================================================================================================================================ Format of these lines: [thread_id.line_count] seconds_since_start_of_main() debug_message === Expected output, which happens if the script runs with one thread: [12.003] 188.556113 rsync 29332 running for host1.example.tld [12.004] 188.556241 waiting for rsync 29332, host1.example.tld [12.005] 188.556314 waitpid for rsync 29332 returned '0' (err: -1 -> exitcode: 72057594037927935) [12.006] 189.556499 waiting for rsync 29332, host1.example.tld [12.007] 189.556543 waitpid for rsync 29332 returned '0' (err: -1 -> exitcode: 72057594037927935) [ ..last two lines repeated alot.. ] [12.588] 480.639933 waiting for rsync 29332, host1.example.tld [12.589] 480.640027 waitpid for rsync 29332 returned '29332' (err: 0 -> exitcode: 0) [12.590] 481.640248 rsync 29332 for host1.example.tld exit value 0 (CHLD_ERROR: 0) Case 1) from the email: === Unexpected waitpid() returns, which happens if the script runs with multiple threads: [07.003] 37.566046 rsync 29709 running for host1.example.tld [07.004] 37.566134 waiting for rsync 29709, host1.example.tld [07.005] 37.566185 waitpid for rsync 29709 returned '0' (err: -1 -> exitcode: 72057594037927935) [07.006] 38.566389 waiting for rsync 29709, host1.example.tld [07.007] 38.566485 waitpid for rsync 29709 returned '0' (err: -1 -> exitcode: 72057594037927935) [ ..last two lines repeated alot.. ] [07.035] 52.572341 waitpid for rsync 29709 returned '29732' (err: 0 -> exitcode: 0) [07.036] 53.572632 system waitpid() returned while process 29709 still active! [07.037] 248.831477 failsafe waidpid() returned. [07.038] 248.831605 rsync 29709 for host1.example.tld exit value 0 (CHLD_ERROR: 0) Case 2) from the email: === Unexpected '-1' returnvalue from waitpid() while child process ran fine: [15.003] 51.977397 rsync 29732 running for host1.example.tld [15.004] 51.977527 waiting for rsync 29732, host1.example.tld [15.005] 51.977602 waitpid for rsync 29732 returned '0' (err: -1 -> exitcode: 72057594037927935) [ ..last two lines repeated alot.. ] [15.420] 260.053841 waiting for rsync 29732, host1.example.tld [15.421] 260.053933 waitpid for rsync 29732 returned '-1' (err: -1 -> exitcode: 72057594037927935) [15.422] 261.054150 rsync 29732 for host1.example.tld exit value 72057594037927935 (CHLD_ERROR: -1)