Issue
I'm writing a very simple bash-like shell in C and am currently implementing pipes between commands (i.e. command1 | command2, which should run both commands at the same time with the stdout of the first one connected through a pipe with the stdin of the second one).
I've gotten to the point where something like
shell> echo test | cat | cat
correctly prints "test" to the string, but anything more complicated than that doesn't make it. For example:
shell> ls -1 / | sort | rev
It's (as far as I can tell) equivalent to the previous one in terms of piping, yet this one fails and the other one succeeds.
I'm at a complete loss as to why this is because I've debugged both the main process and the children exhaustively and verified that the processes get launched with the correct connections both in the working and in the not working command.
Here's a simplified version of the code:
// Uncomment to use hardcoded input
// #define USE_HARDCODED_INPUT
#include <stdlib.h>
#include <string.h>
#include <stddef.h> // NULL
#include <errno.h> // ENOENT
#include <stdio.h> // setbuf, printf
#include <unistd.h> // exec, fork
#include <fcntl.h> // open
#include <sys/types.h> // wait
#include <sys/wait.h>
void set_process_FDs(int input, int output, int error)
{
if (input)
{
dup2(input, STDIN_FILENO);
close(input);
}
if (output)
{
dup2(output, STDOUT_FILENO);
close(output);
}
if (error)
{
dup2(error, STDERR_FILENO);
close(error);
}
}
void child_setup(char **argv, int input, int output, int error)
{
if (input || output || error)
set_process_FDs(input, output, error);
execvp(argv[0], argv);
perror("exec()");
exit(1);
}
int launch_process(char **argv, int is_last,
int input, int output, int error)
{
int status;
pid_t pid = fork();
switch(pid)
{
case -1:
perror("fork()");
return 0;
case 0:
child_setup(argv, input, output, error);
return 0;
default:
break;
}
if (is_last)
wait(&status);
return 1;
}
int run_commands(char ***argvv)
{
int no_commands_ran = 0;
int argc;
char **argv = argvv[0];
int in_pipe[2];
int out_pipe[2];
for (int i=0; (argv = argvv[i]); ++i)
{
pipe(out_pipe);
if (i == 0)
in_pipe[0] = 0;
if (!argvv[i+1])
{
close(out_pipe[0]);
close(out_pipe[1]);
out_pipe[1] = 0;
}
for (argc=0; argv[argc]; ++argc);
if (!launch_process(argv, !argvv[i+1],
in_pipe[0], out_pipe[1], 0))
break;
if (i != 0)
{
close(in_pipe[0]);
close(in_pipe[1]);
}
in_pipe[0] = out_pipe[0];
in_pipe[1] = out_pipe[1];
no_commands_ran = i + 1;
}
return no_commands_ran;
}
extern int obtain_order(); // Obtains an order from stdin
int main(void)
{
char ***argvv = NULL;
int argvc;
char *filev[3] = {NULL, NULL, NULL};
int bg;
int ret;
setbuf(stdout, NULL); // Unbuffered
setbuf(stdin, NULL);
while (1)
{
#ifndef USE_HARDCODED_INPUT
printf("%s", "shell> "); // Prompt
ret = obtain_order(&argvv, filev, &bg);
if (ret == 0) // EOF
{
fprintf(stderr, "EOF\n");
break;
}
if (ret == -1)
continue; // Syntax error
argvc = ret - 1; // Line
if (argvc == 0)
continue; // Empty line
if (!run_commands(argvv))
continue; // Error executing command
#else
argvc = 3;
char ***argvv1 = calloc(4, sizeof(char*));
argvv1[0] = calloc(3, sizeof(char*));
argvv1[0][0] = strdup("echo");
argvv1[0][1] = strdup("test");
argvv1[1] = calloc(2, sizeof(char*));
argvv1[1][0] = strdup("cat");
argvv1[2] = calloc(2, sizeof(char*));
argvv1[2][0] = strdup("cat");
char ***argvv2 = calloc(4, sizeof(char*));
argvv2[0] = calloc(4, sizeof(char*));
argvv2[0][0] = strdup("ls");
argvv2[0][1] = strdup("-1");
argvv2[0][2] = strdup("/");
argvv2[1] = calloc(4, sizeof(char*));
argvv2[1][0] = strdup("sort");
argvv2[2] = calloc(4, sizeof(char*));
argvv2[2][0] = strdup("rev");
printf("%s", "shell> echo test | cat | cat\n");
if (!run_commands(argvv1))
continue; // Error executing command
usleep(500);
printf("%s", "shell> ls -1 / | sort | rev\n");
if (!run_commands(argvv2))
continue; // Error executing command
printf("%s", "\nNo more hardcoded commands to run\n");
break;
#endif
}
return 0;
}
obtain_order() is a function located in the parser, which is a simple Yacc parser. It just fills the vector of argvs called argvv with whatever was input in the shell. In case anyone wants to try the code and see the problem, simply uncomment the #define at the beginning to see the behaviour you'd get from typing the problematic commands manually.
Solution
To start, your parent process does not wait for all of its child processes to complete their execution.
This call to wait
does occur after the last child process has been spawned
if (is_last)
wait(&status);
but it does not necessarily wait for the last child process. That is to say, it will return when any one child process has completed execution (or an error occurs).
Properly waiting for all child processes to complete, at the end of run_commands
,
/* ... */
/* reap children */
pid_t pid;
int status;
while ((pid = wait(&status)) > 0)
if (WIFEXITED(status))
fprintf(stderr, "LOG: Child<%ld> process exited with status<%d>\n",
(long) pid,
WEXITSTATUS(status));
return no_commands_ran;
exposes the fact that children after the first are hanging, as wait
blocks execution of the parent program.
(After placing a few fprintf
statements. █ here indicates program is blocking.)
shell> echo test | cat | cat
LOG: Child<30607> (echo)
LOG: Child<30608> (cat)
LOG: Child<30609> (cat)
LOG: Child<30607> process exited with status <0>
█
Without waiting for all child processes, you are creating orphan processes.
As for why these processes fail to terminate, this is due to the fact that certain file descriptors are not being closed.
The call to launch_process
launch_process(argv, !argvv[i+1], in_pipe[0], out_pipe[1], 0)
ensures that in_pipe[0]
and out_pipe[1]
are closed in the child process, but leaks any valid file descriptors in_pipe[1]
or out_pipe[0]
. With those leaked file descriptors still open in the child processes, the associated pipes remain valid, and thus the processes will continue to block while they wait for more data to arrive.
The quickest fix is to change launch_process
to accept both pipes
int launch_process(char **argv, int is_last,
int input[2], int output[2], int error);
pass both pipes
if (!launch_process(argv, !argvv[i+1], in_pipe, out_pipe, 0))
close the excess file descriptors
case 0:
close(input[1]);
close(output[0]);
child_setup(argv, input[0], output[1], error);
return 0;
remove
if (is_last)
wait(&status);
and add the previously shown wait
loop to the end of run_commands
.
Here is a complete example of a working version of your program, with minimal refactoring.
Compile with -DDEBUG
for some additional sleep time, in order to discover file descriptor leaks (there should not be any). Please read the extended comment in main
.
#define _POSIX_C_SOURCE 200809L
#define USE_HARDCODED_INPUT
#define DEBUG_SLEEP_TIME 20
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
void set_process_FDs(int input, int output, int error)
{
if (input) {
dup2(input, STDIN_FILENO);
close(input);
}
if (output) {
dup2(output, STDOUT_FILENO);
close(output);
}
if (error) {
dup2(error, STDERR_FILENO);
close(error);
}
}
void child_setup(char **argv, int input, int output, int error)
{
if (input || output || error)
set_process_FDs(input, output, error);
#ifdef DEBUG
/* a sleep here should allow time to inspect
* `/proc/$PID/fd` for FD leaks, see `main` for details
* if the child process hangs you will have ample time, regardless
*/
sleep(DEBUG_SLEEP_TIME);
#endif
execvp(argv[0], argv);
perror("exec()");
exit(EXIT_FAILURE);
}
int launch_process(char **argv, int is_last,
int input[2], int output[2], int error)
{
pid_t pid = fork();
(void) is_last;
switch(pid) {
case -1:
perror("fork()");
return 0;
case 0:
fprintf(stderr, "LOG: Child<%ld> (%s)\n", (long) getpid(), *argv);
close(input[1]);
close(output[0]);
child_setup(argv, input[0], output[1], error);
return 0;
default:
break;
}
return 1;
}
int run_commands(char ***argvv)
{
int no_commands_ran = 0;
int in_pipe[2];
int out_pipe[2];
char **argv;
for (int i = 0; (argv = argvv[i]); ++i) {
pipe(out_pipe);
if (i == 0)
in_pipe[0] = 0;
if (!argvv[i+1]) {
close(out_pipe[0]);
close(out_pipe[1]);
out_pipe[1] = 0;
}
if (!launch_process(argv, !argvv[i+1], in_pipe, out_pipe, 0))
break;
if (i != 0) {
close(in_pipe[0]);
close(in_pipe[1]);
}
in_pipe[0] = out_pipe[0];
in_pipe[1] = out_pipe[1];
no_commands_ran = i + 1;
}
/* reap children */
pid_t pid;
int status;
while ((pid = wait(&status)) > 0)
if (WIFEXITED(status))
fprintf(stderr, "LOG: Child<%ld> process exited with status<%d>\n",
(long) pid,
WEXITSTATUS(status));
return no_commands_ran;
}
int main(void)
{
fprintf(stderr, "LOG: Parent ID: <%ld>\n", (long) getpid());
#ifdef USE_HARDCODED_INPUT
char ***argvv1 = calloc(4, sizeof(char*));
argvv1[0] = calloc(3, sizeof(char*));
argvv1[0][0] = "echo";
argvv1[0][1] = "test";
argvv1[1] = calloc(2, sizeof(char*));
argvv1[1][0] = "cat";
argvv1[2] = calloc(2, sizeof(char*));
argvv1[2][0] = "cat";
char ***argvv2 = calloc(4, sizeof(char*));
argvv2[0] = calloc(4, sizeof(char*));
argvv2[0][0] = "ls";
argvv2[0][1] = "-1";
argvv2[0][2] = "/";
argvv2[1] = calloc(2, sizeof(char*));
argvv2[1][0] = "sort";
argvv2[2] = calloc(2, sizeof(char*));
argvv2[2][0] = "rev";
puts("shell> echo test | cat | cat");
if (!run_commands(argvv1))
return EXIT_FAILURE;
/* usleep is deprecated */
nanosleep(&(struct timespec) { .tv_nsec = 5e5 }, NULL);
puts("shell> ls -1 / | sort | rev");
if (!run_commands(argvv2))
return EXIT_FAILURE;
puts("No more hardcoded commands to run");
#endif
#ifdef DEBUG
/* compile with -DDEBUG
* placing a sleep here to provide time to discover
* any file descriptor leaks
* inspect `ls -l /proc/$PID/fd`
* only the standard stream fds should exist (0, 1, 2) at
* either debug sleep
* see child_setup as well
*/
sleep(DEBUG_SLEEP_TIME);
#endif
}
Here is a cursory, annotated example of establishing a series of pipes and processes. It works similarly to your example, and might help to further showcase the order in which file descriptors must be opened, duplicated, and closed.
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <wait.h>
int valid(int fd)
{
return fd >= 0;
}
/* these safe_* functions are a non-operation when passed negative values */
void safe_close(int fd)
{
if (valid(fd) && !valid(close(fd)))
perror("close");
}
void safe_dup2(int old, int new)
{
if (valid(old) && valid(new) && !valid(dup2(old, new)))
perror("dup2");
}
void execute(char *args[][8], size_t length)
{
int channel[2] = { -1, -1 };
for (size_t i = 0; i < length; i++) {
/* get previous reader in parent */
int from = channel[0];
/* close previous writer in parent */
safe_close(channel[1]);
/* create current-writer-to-next-reader pipe */
if (!valid(pipe(channel)))
perror("pipe");
int to = (i < length - 1) ? channel[1] : -1;
if (0 == fork()) {
/* duplicate previous reader to stdin in child */
safe_dup2(from, fileno(stdin));
/* close previous reader in child */
safe_close(from);
/* close next reader in current child */
safe_close(channel[0]);
/* duplicate current writer to stdout in child */
safe_dup2(to, fileno(stdout));
/* close current writer in child */
safe_close(channel[1]);
execvp(args[i][0], args[i]);
perror("exec");
exit(EXIT_FAILURE);
}
/* close previous reader in parent */
safe_close(from);
}
/* close final pipe in parent */
safe_close(channel[0]);
safe_close(channel[1]);
/* reap children */
pid_t pid;
int status;
while ((pid = wait(&status)) > 0)
if (WIFEXITED(status))
fprintf(stderr, "LOG: Child<%ld> process exited with status<%d>\n",
(long) pid,
WEXITSTATUS(status));
}
int main(void)
{
char *argv[][8] = {
{ "echo", "test" },
{ "cat" },
{ "cat", "-n" }
};
execute(argv, 3);
char *argv2[][8] = {
{ "ls", "-1", "/" },
{ "sort" },
{ "rev" }
};
execute(argv2, 3);
}
Aside: As an edge case, 0
is a valid file descriptor. set_process_FDs
is flawed in that if STDIN_FILENO
is closed, and a new file descriptor is acquired, it may be zero. if (output)
or if (error)
may not behave as expected.
Answered By - Oka Answer Checked By - Gilberto Lyons (WPSolving Admin)