SIGCHLD not delivered in a process tree

I am trying to create a process that manage some other process in the way that if a child die then the parent restart the process and the process that depend from it.

The problem is that I notice that if I create a tree structure of process when I restart a process in the middle of this structure I am unable to be signaled when new child process terminates.

I write an example; suppose that we have 3 process, grandparent, parent and child. Grandparent fork and start parent that fork and start child (I put the code at the end of this post). Now if I kill child everything works well, child is restarted correctly.

The problem occurs if I kill parent… The grandparent restart parent that restart child, but if I kill child the process remain in the Zombie state and the SIGCHLD is not delivered to the parent process.

In other words:

  • Start grandparent process and wait that all 3 processes have been up
  • Kill parent process and wait that grandparent restart parent that restart child
  • now kill child process, the process remain in the zombie state.

I’m not able to understand this behavior… I have read a tons of example and documentation about signal and wait, try to reset default handler before the fork in parent and grandparent, but nothing seem to work… Here is the code sample…

grandparent.cpp

#include <cstdio>
#include <string>
#include <cstring>

#include <stdlib.h>
#include <signal.h>
#include <wait.h>

using namespace std;

void startProcess(string processFile);
void childDieHandler(int sig, siginfo_t *child_info, void *context);

FILE            *logFile;
int         currentChildPid;

int main(int argc, char** argv)
{
    currentChildPid = 0;
    logFile = stdout;

    daemon(1,1);


    struct sigaction sa;
    bzero(&sa, sizeof(sa));
    sa.sa_sigaction = childDieHandler;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_SIGINFO;
    sigaction(SIGCHLD, &sa, NULL);

    startProcess("parent");

    while(true) {
        sleep(60);
    }

    return 0;
}

void startProcess(string processFile)
{
    fprintf(logFile, "nGP:Starting new process %sn",processFile.c_str());
    // Get process field and start a new process via fork + execl
    int pid = fork();
    if (pid == -1){
        fprintf(logFile,"GP:*** FORK ERROR on process %s !!!n",processFile.c_str());
        fflush(logFile);
        return;
    }

    // New child process
    if (pid == 0) {

        string execString = get_current_dir_name()+(string)"/"+processFile;
        fprintf(logFile, "GP: %s n",execString.c_str());

    execl(execString.c_str(), processFile.c_str(), NULL);

        fprintf(logFile, "GP:*** ERROR on execv for process %sn",processFile.c_str());
        fflush(logFile);
        exit(1);
    } else {
        // Parent process
        fprintf(logFile, "GP:New process %s pid is %d .n", processFile.c_str(), pid);
        fflush(logFile);
    currentChildPid = pid;
        sleep(2);
    }
}

// Intercept a signal SIGCHLD
void childDieHandler(int sig, siginfo_t *child_info, void *context){
    int status;
    pid_t childPid;
    while((childPid = waitpid(-1,&status, WNOHANG)) > 0) {
        int pid = (int) childPid;
        fprintf(logFile,"GP:*** PROCESS KILLED [pid %d]n",pid);

    sigset_t set;
    sigpending(&set);
    if(sigismember(&set, SIGCHLD)){
        fprintf(logFile, "GP: SIGCHLD is pending or blocked!!!!n");
        fflush(logFile);
    }

        fflush(logFile);

        // identify exited process and then restart it
        if(currentChildPid == childPid){
        // kill any child 
        system("killall child");
        fprintf(logFile,"GP: Restarting parent process...n");
        fflush(logFile);
        startProcess("parent");
    }

    }

    fprintf(logFile,"GP:End of childDieHandler()... [%d]nn",(int)childPid);
    fflush(logFile);
}

parent.cpp

#include <cstdio>
#include <string>
#include <cstring>

#include <stdlib.h>
#include <signal.h>
#include <wait.h>

using namespace std;

void startProcess(string processFile);
void childDieHandler(int sig, siginfo_t *child_info, void *context);

FILE            *logFile;
int         currentChildPid;

int main(int argc, char** argv)
{
    currentChildPid = 0;
    logFile = stdout;

    struct sigaction sa;
    bzero(&sa, sizeof(sa));
    sa.sa_sigaction = childDieHandler;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_SIGINFO;
    sigaction(SIGCHLD, &sa, NULL);

    startProcess("child");

    while(true) {
        sleep(60);
    }

    return 0;
}

void startProcess(string processFile)
{
    fprintf(logFile, "nP : Starting new process %sn",processFile.c_str());
    // Get process field and start a new process via fork + execl
    int pid = fork();
    if (pid == -1){
        fprintf(logFile,"P : *** FORK ERROR on process %s !!!n",processFile.c_str());
        fflush(logFile);
        return;
    }

    // New child process
    if (pid == 0) {
    string execString = get_current_dir_name()+(string)"/"+processFile;
        execl(execString.c_str(), processFile.c_str(), NULL);

        fprintf(logFile, "P : *** ERROR on execv for process %sn",processFile.c_str());
        fflush(logFile);
        exit(1);
    } else {
        // Parent process
        fprintf(logFile, "P : New process %s pid is %d .n", processFile.c_str(), pid);
        fflush(logFile);
    currentChildPid = pid;
        sleep(2);
    }
}

// Intercept a signal SIGCHLD
void childDieHandler(int sig, siginfo_t *child_info, void *context){
    int status;
    pid_t childPid;
    while((childPid = waitpid(-1,&status, WNOHANG)) > 0) {
        int pid = (int) childPid;
        fprintf(logFile,"P : *** PROCESS KILLED [pid %d]n",pid);

    sigset_t set;
    sigpending(&set);
    if(sigismember(&set, SIGCHLD)){
        fprintf(logFile, "P : SIGCHLD is pending or blocked!!!!n");
        fflush(logFile);
    }

        fflush(logFile);

    // identify exited process and then restart it
    if(currentChildPid == childPid){
        fprintf(logFile,"P :  Restarting child process...n");
        fflush(logFile);
        startProcess("child");
    }

    }

    fprintf(logFile,"P : End of childDieHandler()... [%d]nn",(int)childPid);
    fflush(logFile);
}

child.cpp

#include <cstdio>
#include <string>
#include <cstring>

int main(int argc, char** argv)
{
    printf("nC : I'm born...nn");

    while(true) {
        sleep(60);
    }

    return 0;
}

Answer

Well, I have a guess…

Inside the signal handler, the SIGCHLD signal is blocked (i.e., it is a member of the process’s signal mask).

So when the grandparent calls execl from inside the signal handler, the new parent starts up with SIGCHLD blocked. Thus it never sees the signal and never waits for the new child.

Try calling sigprocmask at the beginning of parent.cpp in order to (a) verify this theory and (b) unblock SIGCHLD.