pacemaker  2.1.2-ada5c3b36
Scalable High-Availability cluster resource manager
services_linux.c
Go to the documentation of this file.
1 /*
2  * Copyright 2010-2021 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #ifndef _GNU_SOURCE
13 # define _GNU_SOURCE
14 #endif
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/wait.h>
19 #include <errno.h>
20 #include <unistd.h>
21 #include <dirent.h>
22 #include <grp.h>
23 #include <string.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 
27 #include "crm/crm.h"
28 #include "crm/common/mainloop.h"
29 #include "crm/services.h"
30 #include "crm/services_internal.h"
31 
32 #include "services_private.h"
33 
34 static void close_pipe(int fildes[]);
35 
36 /* We have two alternative ways of handling SIGCHLD when synchronously waiting
37  * for spawned processes to complete. Both rely on polling a file descriptor to
38  * discover SIGCHLD events.
39  *
40  * If sys/signalfd.h is available (e.g. on Linux), we call signalfd() to
41  * generate the file descriptor. Otherwise, we use the "self-pipe trick"
42  * (opening a pipe and writing a byte to it when SIGCHLD is received).
43  */
44 #ifdef HAVE_SYS_SIGNALFD_H
45 
46 // signalfd() implementation
47 
48 #include <sys/signalfd.h>
49 
50 // Everything needed to manage SIGCHLD handling
51 struct sigchld_data_s {
52  sigset_t mask; // Signals to block now (including SIGCHLD)
53  sigset_t old_mask; // Previous set of blocked signals
54 };
55 
56 // Initialize SIGCHLD data and prepare for use
57 static bool
58 sigchld_setup(struct sigchld_data_s *data)
59 {
60  sigemptyset(&(data->mask));
61  sigaddset(&(data->mask), SIGCHLD);
62 
63  sigemptyset(&(data->old_mask));
64 
65  // Block SIGCHLD (saving previous set of blocked signals to restore later)
66  if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) {
67  crm_err("Wait for child process completion failed: %s "
68  CRM_XS " source=sigprocmask", pcmk_strerror(errno));
69  return false;
70  }
71  return true;
72 }
73 
74 // Get a file descriptor suitable for polling for SIGCHLD events
75 static int
76 sigchld_open(struct sigchld_data_s *data)
77 {
78  int fd;
79 
80  CRM_CHECK(data != NULL, return -1);
81 
82  fd = signalfd(-1, &(data->mask), SFD_NONBLOCK);
83  if (fd < 0) {
84  crm_err("Wait for child process completion failed: %s "
85  CRM_XS " source=signalfd", pcmk_strerror(errno));
86  }
87  return fd;
88 }
89 
90 // Close a file descriptor returned by sigchld_open()
91 static void
92 sigchld_close(int fd)
93 {
94  if (fd > 0) {
95  close(fd);
96  }
97 }
98 
99 // Return true if SIGCHLD was received from polled fd
100 static bool
101 sigchld_received(int fd)
102 {
103  struct signalfd_siginfo fdsi;
104  ssize_t s;
105 
106  if (fd < 0) {
107  return false;
108  }
109  s = read(fd, &fdsi, sizeof(struct signalfd_siginfo));
110  if (s != sizeof(struct signalfd_siginfo)) {
111  crm_err("Wait for child process completion failed: %s "
112  CRM_XS " source=read", pcmk_strerror(errno));
113 
114  } else if (fdsi.ssi_signo == SIGCHLD) {
115  return true;
116  }
117  return false;
118 }
119 
120 // Do anything needed after done waiting for SIGCHLD
121 static void
122 sigchld_cleanup(struct sigchld_data_s *data)
123 {
124  // Restore the original set of blocked signals
125  if ((sigismember(&(data->old_mask), SIGCHLD) == 0)
126  && (sigprocmask(SIG_UNBLOCK, &(data->mask), NULL) < 0)) {
127  crm_warn("Could not clean up after child process completion: %s",
128  pcmk_strerror(errno));
129  }
130 }
131 
132 #else // HAVE_SYS_SIGNALFD_H not defined
133 
134 // Self-pipe implementation (see above for function descriptions)
135 
136 struct sigchld_data_s {
137  int pipe_fd[2]; // Pipe file descriptors
138  struct sigaction sa; // Signal handling info (with SIGCHLD)
139  struct sigaction old_sa; // Previous signal handling info
140 };
141 
142 // We need a global to use in the signal handler
143 volatile struct sigchld_data_s *last_sigchld_data = NULL;
144 
145 static void
146 sigchld_handler()
147 {
148  // We received a SIGCHLD, so trigger pipe polling
149  if ((last_sigchld_data != NULL)
150  && (last_sigchld_data->pipe_fd[1] >= 0)
151  && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) {
152  crm_err("Wait for child process completion failed: %s "
153  CRM_XS " source=write", pcmk_strerror(errno));
154  }
155 }
156 
157 static bool
158 sigchld_setup(struct sigchld_data_s *data)
159 {
160  int rc;
161 
162  data->pipe_fd[0] = data->pipe_fd[1] = -1;
163 
164  if (pipe(data->pipe_fd) == -1) {
165  crm_err("Wait for child process completion failed: %s "
166  CRM_XS " source=pipe", pcmk_strerror(errno));
167  return false;
168  }
169 
170  rc = pcmk__set_nonblocking(data->pipe_fd[0]);
171  if (rc != pcmk_rc_ok) {
172  crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
173  pcmk_rc_str(rc), rc);
174  }
175  rc = pcmk__set_nonblocking(data->pipe_fd[1]);
176  if (rc != pcmk_rc_ok) {
177  crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
178  pcmk_rc_str(rc), rc);
179  }
180 
181  // Set SIGCHLD handler
182  data->sa.sa_handler = sigchld_handler;
183  data->sa.sa_flags = 0;
184  sigemptyset(&(data->sa.sa_mask));
185  if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) {
186  crm_err("Wait for child process completion failed: %s "
187  CRM_XS " source=sigaction", pcmk_strerror(errno));
188  }
189 
190  // Remember data for use in signal handler
192  return true;
193 }
194 
195 static int
196 sigchld_open(struct sigchld_data_s *data)
197 {
198  CRM_CHECK(data != NULL, return -1);
199  return data->pipe_fd[0];
200 }
201 
202 static void
203 sigchld_close(int fd)
204 {
205  // Pipe will be closed in sigchld_cleanup()
206  return;
207 }
208 
209 static bool
210 sigchld_received(int fd)
211 {
212  char ch;
213 
214  if (fd < 0) {
215  return false;
216  }
217 
218  // Clear out the self-pipe
219  while (read(fd, &ch, 1) == 1) /*omit*/;
220  return true;
221 }
222 
223 static void
224 sigchld_cleanup(struct sigchld_data_s *data)
225 {
226  // Restore the previous SIGCHLD handler
227  if (sigaction(SIGCHLD, &(data->old_sa), NULL) < 0) {
228  crm_warn("Could not clean up after child process completion: %s",
229  pcmk_strerror(errno));
230  }
231 
232  close_pipe(data->pipe_fd);
233 }
234 
235 #endif
236 
243 static void
244 close_pipe(int fildes[])
245 {
246  if (fildes[0] >= 0) {
247  close(fildes[0]);
248  fildes[0] = -1;
249  }
250  if (fildes[1] >= 0) {
251  close(fildes[1]);
252  fildes[1] = -1;
253  }
254 }
255 
256 static gboolean
257 svc_read_output(int fd, svc_action_t * op, bool is_stderr)
258 {
259  char *data = NULL;
260  int rc = 0, len = 0;
261  char buf[500];
262  static const size_t buf_read_len = sizeof(buf) - 1;
263 
264 
265  if (fd < 0) {
266  crm_trace("No fd for %s", op->id);
267  return FALSE;
268  }
269 
270  if (is_stderr && op->stderr_data) {
271  len = strlen(op->stderr_data);
272  data = op->stderr_data;
273  crm_trace("Reading %s stderr into offset %d", op->id, len);
274 
275  } else if (is_stderr == FALSE && op->stdout_data) {
276  len = strlen(op->stdout_data);
277  data = op->stdout_data;
278  crm_trace("Reading %s stdout into offset %d", op->id, len);
279 
280  } else {
281  crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len);
282  }
283 
284  do {
285  rc = read(fd, buf, buf_read_len);
286  if (rc > 0) {
287  buf[rc] = 0;
288  crm_trace("Got %d chars: %.80s", rc, buf);
289  data = pcmk__realloc(data, len + rc + 1);
290  len += sprintf(data + len, "%s", buf);
291 
292  } else if (errno != EINTR) {
293  /* error or EOF
294  * Cleanup happens in pipe_done()
295  */
296  rc = FALSE;
297  break;
298  }
299 
300  } while (rc == buf_read_len || rc < 0);
301 
302  if (is_stderr) {
303  op->stderr_data = data;
304  } else {
305  op->stdout_data = data;
306  }
307 
308  return rc;
309 }
310 
311 static int
312 dispatch_stdout(gpointer userdata)
313 {
314  svc_action_t *op = (svc_action_t *) userdata;
315 
316  return svc_read_output(op->opaque->stdout_fd, op, FALSE);
317 }
318 
319 static int
320 dispatch_stderr(gpointer userdata)
321 {
322  svc_action_t *op = (svc_action_t *) userdata;
323 
324  return svc_read_output(op->opaque->stderr_fd, op, TRUE);
325 }
326 
327 static void
328 pipe_out_done(gpointer user_data)
329 {
330  svc_action_t *op = (svc_action_t *) user_data;
331 
332  crm_trace("%p", op);
333 
334  op->opaque->stdout_gsource = NULL;
335  if (op->opaque->stdout_fd > STDOUT_FILENO) {
336  close(op->opaque->stdout_fd);
337  }
338  op->opaque->stdout_fd = -1;
339 }
340 
341 static void
342 pipe_err_done(gpointer user_data)
343 {
344  svc_action_t *op = (svc_action_t *) user_data;
345 
346  op->opaque->stderr_gsource = NULL;
347  if (op->opaque->stderr_fd > STDERR_FILENO) {
348  close(op->opaque->stderr_fd);
349  }
350  op->opaque->stderr_fd = -1;
351 }
352 
353 static struct mainloop_fd_callbacks stdout_callbacks = {
354  .dispatch = dispatch_stdout,
355  .destroy = pipe_out_done,
356 };
357 
358 static struct mainloop_fd_callbacks stderr_callbacks = {
359  .dispatch = dispatch_stderr,
360  .destroy = pipe_err_done,
361 };
362 
363 static void
364 set_ocf_env(const char *key, const char *value, gpointer user_data)
365 {
366  if (setenv(key, value, 1) != 0) {
367  crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value);
368  }
369 }
370 
371 static void
372 set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
373 {
374  char buffer[500];
375 
376  snprintf(buffer, sizeof(buffer), strcmp(key, "OCF_CHECK_LEVEL") != 0 ? "OCF_RESKEY_%s" : "%s", (char *)key);
377  set_ocf_env(buffer, value, user_data);
378 }
379 
380 static void
381 set_alert_env(gpointer key, gpointer value, gpointer user_data)
382 {
383  int rc;
384 
385  if (value != NULL) {
386  rc = setenv(key, value, 1);
387  } else {
388  rc = unsetenv(key);
389  }
390 
391  if (rc < 0) {
392  crm_perror(LOG_ERR, "setenv %s=%s",
393  (char*)key, (value? (char*)value : ""));
394  } else {
395  crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
396  }
397 }
398 
405 static void
406 add_action_env_vars(const svc_action_t *op)
407 {
408  void (*env_setter)(gpointer, gpointer, gpointer) = NULL;
409  if (op->agent == NULL) {
410  env_setter = set_alert_env; /* we deal with alert handler */
411 
412  } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
413  env_setter = set_ocf_env_with_prefix;
414  }
415 
416  if (env_setter != NULL && op->params != NULL) {
417  g_hash_table_foreach(op->params, env_setter, NULL);
418  }
419 
420  if (env_setter == NULL || env_setter == set_alert_env) {
421  return;
422  }
423 
424  set_ocf_env("OCF_RA_VERSION_MAJOR", PCMK_OCF_MAJOR_VERSION, NULL);
425  set_ocf_env("OCF_RA_VERSION_MINOR", PCMK_OCF_MINOR_VERSION, NULL);
426  set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL);
427  set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL);
428 
429  if (op->rsc) {
430  set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL);
431  }
432 
433  if (op->agent != NULL) {
434  set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL);
435  }
436 
437  /* Notes: this is not added to specification yet. Sept 10,2004 */
438  if (op->provider != NULL) {
439  set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL);
440  }
441 }
442 
443 static void
444 pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data)
445 {
446  svc_action_t *op = user_data;
447  char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value);
448  int ret, total = 0, len = strlen(buffer);
449 
450  do {
451  errno = 0;
452  ret = write(op->opaque->stdin_fd, buffer + total, len - total);
453  if (ret > 0) {
454  total += ret;
455  }
456 
457  } while ((errno == EINTR) && (total < len));
458  free(buffer);
459 }
460 
467 static void
468 pipe_in_action_stdin_parameters(const svc_action_t *op)
469 {
470  crm_debug("sending args");
471  if (op->params) {
472  g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op);
473  }
474 }
475 
476 gboolean
478 {
479  svc_action_t *op = data;
480 
481  crm_debug("Scheduling another invocation of %s", op->id);
482 
483  /* Clean out the old result */
484  free(op->stdout_data);
485  op->stdout_data = NULL;
486  free(op->stderr_data);
487  op->stderr_data = NULL;
488  op->opaque->repeat_timer = 0;
489 
490  services_action_async(op, NULL);
491  return FALSE;
492 }
493 
512 int
514 {
515  CRM_CHECK((op != NULL) && !(op->synchronous), return EINVAL);
516 
517  if (op->interval_ms != 0) {
518  // Recurring operations must be either cancelled or rescheduled
519  if (op->cancel) {
522  } else {
523  op->opaque->repeat_timer = g_timeout_add(op->interval_ms,
525  (void *) op);
526  }
527  }
528 
529  if (op->opaque->callback != NULL) {
530  op->opaque->callback(op);
531  }
532 
533  // Stop tracking the operation (as in-flight or blocked)
534  op->pid = 0;
536 
537  if ((op->interval_ms != 0) && !(op->cancel)) {
538  // Do not free recurring actions (they will get freed when cancelled)
540  return EBUSY;
541  }
542 
544  return pcmk_rc_ok;
545 }
546 
547 static void
548 close_op_input(svc_action_t *op)
549 {
550  if (op->opaque->stdin_fd >= 0) {
551  close(op->opaque->stdin_fd);
552  }
553 }
554 
555 static void
556 finish_op_output(svc_action_t *op, bool is_stderr)
557 {
558  mainloop_io_t **source;
559  int fd;
560 
561  if (is_stderr) {
562  source = &(op->opaque->stderr_gsource);
563  fd = op->opaque->stderr_fd;
564  } else {
565  source = &(op->opaque->stdout_gsource);
566  fd = op->opaque->stdout_fd;
567  }
568 
569  if (op->synchronous || *source) {
570  crm_trace("Finish reading %s[%d] %s",
571  op->id, op->pid, (is_stderr? "stdout" : "stderr"));
572  svc_read_output(fd, op, is_stderr);
573  if (op->synchronous) {
574  close(fd);
575  } else {
576  mainloop_del_fd(*source);
577  *source = NULL;
578  }
579  }
580 }
581 
582 // Log an operation's stdout and stderr
583 static void
584 log_op_output(svc_action_t *op)
585 {
586  char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid);
587 
588  crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
589  strcpy(prefix + strlen(prefix) - strlen("error output"), "output");
590  crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
591  free(prefix);
592 }
593 
594 // Truncate exit reasons at this many characters
595 #define EXIT_REASON_MAX_LEN 128
596 
597 static void
598 parse_exit_reason_from_stderr(svc_action_t *op)
599 {
600  const char *reason_start = NULL;
601  const char *reason_end = NULL;
602  const int prefix_len = strlen(PCMK_OCF_REASON_PREFIX);
603 
604  if ((op->stderr_data == NULL) ||
605  // Only OCF agents have exit reasons in stderr
606  !pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) {
607  return;
608  }
609 
610  // Find the last occurrence of the magic string indicating an exit reason
611  for (const char *cur = strstr(op->stderr_data, PCMK_OCF_REASON_PREFIX);
612  cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
613 
614  cur += prefix_len; // Skip over magic string
615  reason_start = cur;
616  }
617 
618  if ((reason_start == NULL) || (reason_start[0] == '\n')
619  || (reason_start[0] == '\0')) {
620  return; // No or empty exit reason
621  }
622 
623  // Exit reason goes to end of line (or end of output)
624  reason_end = strchr(reason_start, '\n');
625  if (reason_end == NULL) {
626  reason_end = reason_start + strlen(reason_start);
627  }
628 
629  // Limit size of exit reason to something reasonable
630  if (reason_end > (reason_start + EXIT_REASON_MAX_LEN)) {
631  reason_end = reason_start + EXIT_REASON_MAX_LEN;
632  }
633 
634  free(op->opaque->exit_reason);
635  op->opaque->exit_reason = strndup(reason_start, reason_end - reason_start);
636 }
637 
648 static void
649 async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
650  int exitcode)
651 {
653 
655  CRM_CHECK(op->pid == pid,
657  PCMK_EXEC_ERROR, "Bug in mainloop handling");
658  return);
659 
660  /* Depending on the priority the mainloop gives the stdout and stderr
661  * file descriptors, this function could be called before everything has
662  * been read from them, so force a final read now.
663  */
664  finish_op_output(op, true);
665  finish_op_output(op, false);
666 
667  close_op_input(op);
668 
669  if (signo == 0) {
670  crm_debug("%s[%d] exited with status %d", op->id, op->pid, exitcode);
671  services__set_result(op, exitcode, PCMK_EXEC_DONE, NULL);
672  log_op_output(op);
673  parse_exit_reason_from_stderr(op);
674 
675  } else if (mainloop_child_timeout(p)) {
676  crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
678  "Process did not exit within specified timeout");
679 
680  } else if (op->cancel) {
681  /* If an in-flight recurring operation was killed because it was
682  * cancelled, don't treat that as a failure.
683  */
684  crm_info("%s[%d] terminated with signal %d (%s)",
685  op->id, op->pid, signo, strsignal(signo));
687 
688  } else {
689  crm_warn("%s[%d] terminated with signal %d (%s)",
690  op->id, op->pid, signo, strsignal(signo));
692  "Process interrupted by signal");
693  }
694 
696 }
697 
711 int
713 {
714  if ((op == NULL) || (op->standard == NULL)) {
715  return PCMK_OCF_UNKNOWN_ERROR;
716  }
717 
718  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
719  && pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
720 
722  }
723 
724 #if SUPPORT_NAGIOS
725  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
726  return NAGIOS_STATE_UNKNOWN;
727  }
728 #endif
729 
730  return PCMK_OCF_UNKNOWN_ERROR;
731 }
732 
746 int
748 {
749  if ((op == NULL) || (op->standard == NULL)) {
750  return PCMK_OCF_UNKNOWN_ERROR;
751  }
752 
753  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
754  && pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
755 
757  }
758 
759 #if SUPPORT_NAGIOS
760  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
761  return NAGIOS_STATE_UNKNOWN;
762  }
763 #endif
764 
765  return PCMK_OCF_NOT_INSTALLED;
766 }
767 
781 int
783 {
784  if ((op == NULL) || (op->standard == NULL)) {
785  return PCMK_OCF_UNKNOWN_ERROR;
786  }
787 
788  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
789  && pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
790 
792  }
793 
794 #if SUPPORT_NAGIOS
795  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
797  }
798 #endif
799 
801 }
802 
817 int
819 {
820  if ((op == NULL) || (op->standard == NULL)) {
821  return PCMK_OCF_UNKNOWN_ERROR;
822  }
823 
824  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
825  && pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
826 
828  }
829 
830 #if SUPPORT_NAGIOS
831  if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
832  return NAGIOS_STATE_UNKNOWN;
833  }
834 #endif
835 
837 }
838 
839 
849 void
851 {
852  switch (error) { /* see execve(2), stat(2) and fork(2) */
853  case ENOENT: /* No such file or directory */
854  case EISDIR: /* Is a directory */
855  case ENOTDIR: /* Path component is not a directory */
856  case EINVAL: /* Invalid executable format */
857  case ENOEXEC: /* Invalid executable format */
860  break;
861  case EACCES: /* permission denied (various errors) */
862  case EPERM: /* permission denied (various errors) */
864  PCMK_EXEC_ERROR, pcmk_rc_str(error));
865  break;
866  default:
868  PCMK_EXEC_ERROR, pcmk_rc_str(error));
869  }
870 }
871 
880 static void
881 exit_child(svc_action_t *op, int exit_status, const char *exit_reason)
882 {
883  if ((op != NULL) && (exit_reason != NULL)
884  && pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF,
885  pcmk__str_none)) {
886  fprintf(stderr, PCMK_OCF_REASON_PREFIX "%s\n", exit_reason);
887  }
888  _exit(exit_status);
889 }
890 
891 static void
892 action_launch_child(svc_action_t *op)
893 {
894  int rc;
895 
896  /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library.
897  * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well.
898  * We do not want this to be inherited by the child process. By resetting this the signal
899  * to the default behavior, we avoid some potential odd problems that occur during OCF
900  * scripts when SIGPIPE is ignored by the environment. */
901  signal(SIGPIPE, SIG_DFL);
902 
903 #if defined(HAVE_SCHED_SETSCHEDULER)
904  if (sched_getscheduler(0) != SCHED_OTHER) {
905  struct sched_param sp;
906 
907  memset(&sp, 0, sizeof(sp));
908  sp.sched_priority = 0;
909 
910  if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
911  crm_warn("Could not reset scheduling policy for %s", op->id);
912  }
913  }
914 #endif
915  if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
916  crm_warn("Could not reset process priority for %s", op->id);
917  }
918 
919  /* Man: The call setpgrp() is equivalent to setpgid(0,0)
920  * _and_ compiles on BSD variants too
921  * need to investigate if it works the same too.
922  */
923  setpgid(0, 0);
924 
926 
927  /* It would be nice if errors in this function could be reported as
928  * execution status (for example, PCMK_EXEC_NO_SECRETS for the secrets error
929  * below) instead of exit status. However, we've already forked, so
930  * exit status is all we have. At least for OCF actions, we can output an
931  * exit reason for the parent to parse.
932  */
933 
934 #if SUPPORT_CIBSECRETS
936  if (rc != pcmk_rc_ok) {
937  if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) {
938  crm_info("Proceeding with stop operation for %s "
939  "despite being unable to load CIB secrets (%s)",
940  op->rsc, pcmk_rc_str(rc));
941  } else {
942  crm_err("Considering %s unconfigured "
943  "because unable to load CIB secrets: %s",
944  op->rsc, pcmk_rc_str(rc));
945  exit_child(op, services__configuration_error(op, false),
946  "Unable to load CIB secrets");
947  }
948  }
949 #endif
950 
951  add_action_env_vars(op);
952 
953  /* Become the desired user */
954  if (op->opaque->uid && (geteuid() == 0)) {
955 
956  // If requested, set effective group
957  if (op->opaque->gid && (setgid(op->opaque->gid) < 0)) {
958  crm_err("Considering %s unauthorized because could not set "
959  "child group to %d: %s",
960  op->id, op->opaque->gid, strerror(errno));
961  exit_child(op, services__authorization_error(op),
962  "Could not set group for child process");
963  }
964 
965  // Erase supplementary group list
966  // (We could do initgroups() if we kept a copy of the username)
967  if (setgroups(0, NULL) < 0) {
968  crm_err("Considering %s unauthorized because could not "
969  "clear supplementary groups: %s", op->id, strerror(errno));
970  exit_child(op, services__authorization_error(op),
971  "Could not clear supplementary groups for child process");
972  }
973 
974  // Set effective user
975  if (setuid(op->opaque->uid) < 0) {
976  crm_err("Considering %s unauthorized because could not set user "
977  "to %d: %s", op->id, op->opaque->uid, strerror(errno));
978  exit_child(op, services__authorization_error(op),
979  "Could not set user for child process");
980  }
981  }
982 
983  // Execute the agent (doesn't return if successful)
984  execvp(op->opaque->exec, op->opaque->args);
985 
986  // An earlier stat() should have avoided most possible errors
987  rc = errno;
989  crm_err("Unable to execute %s: %s", op->id, strerror(rc));
990  exit_child(op, op->rc, "Child process was unable to execute file");
991 }
992 
1000 static void
1001 wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
1002 {
1003  int status = 0;
1004  int timeout = op->timeout;
1005  time_t start = time(NULL);
1006  struct pollfd fds[3];
1007  int wait_rc = 0;
1008  const char *wait_reason = NULL;
1009 
1010  fds[0].fd = op->opaque->stdout_fd;
1011  fds[0].events = POLLIN;
1012  fds[0].revents = 0;
1013 
1014  fds[1].fd = op->opaque->stderr_fd;
1015  fds[1].events = POLLIN;
1016  fds[1].revents = 0;
1017 
1018  fds[2].fd = sigchld_open(data);
1019  fds[2].events = POLLIN;
1020  fds[2].revents = 0;
1021 
1022  crm_trace("Waiting for %s[%d]", op->id, op->pid);
1023  do {
1024  int poll_rc = poll(fds, 3, timeout);
1025 
1026  wait_reason = NULL;
1027 
1028  if (poll_rc > 0) {
1029  if (fds[0].revents & POLLIN) {
1030  svc_read_output(op->opaque->stdout_fd, op, FALSE);
1031  }
1032 
1033  if (fds[1].revents & POLLIN) {
1034  svc_read_output(op->opaque->stderr_fd, op, TRUE);
1035  }
1036 
1037  if ((fds[2].revents & POLLIN) && sigchld_received(fds[2].fd)) {
1038  wait_rc = waitpid(op->pid, &status, WNOHANG);
1039 
1040  if ((wait_rc > 0) || ((wait_rc < 0) && (errno == ECHILD))) {
1041  // Child process exited or doesn't exist
1042  break;
1043 
1044  } else if (wait_rc < 0) {
1045  wait_reason = pcmk_rc_str(errno);
1046  crm_warn("Wait for completion of %s[%d] failed: %s "
1047  CRM_XS " source=waitpid",
1048  op->id, op->pid, wait_reason);
1049  wait_rc = 0; // Act as if process is still running
1050  }
1051  }
1052 
1053  } else if (poll_rc == 0) {
1054  // Poll timed out with no descriptors ready
1055  timeout = 0;
1056  break;
1057 
1058  } else if ((poll_rc < 0) && (errno != EINTR)) {
1059  wait_reason = pcmk_rc_str(errno);
1060  crm_err("Wait for completion of %s[%d] failed: %s "
1061  CRM_XS " source=poll", op->id, op->pid, wait_reason);
1062  break;
1063  }
1064 
1065  timeout = op->timeout - (time(NULL) - start) * 1000;
1066 
1067  } while ((op->timeout < 0 || timeout > 0));
1068 
1069  crm_trace("Stopped waiting for %s[%d]", op->id, op->pid);
1070  finish_op_output(op, true);
1071  finish_op_output(op, false);
1072  close_op_input(op);
1073  sigchld_close(fds[2].fd);
1074 
1075  if (wait_rc <= 0) {
1076 
1077  if ((op->timeout > 0) && (timeout <= 0)) {
1080  "Process did not exit within specified timeout");
1081  crm_warn("%s[%d] timed out after %dms",
1082  op->id, op->pid, op->timeout);
1083 
1084  } else {
1086  PCMK_EXEC_ERROR, wait_reason);
1087  }
1088 
1089  /* If only child hasn't been successfully waited for, yet.
1090  This is to limit killing wrong target a bit more. */
1091  if ((wait_rc == 0) && (waitpid(op->pid, &status, WNOHANG) == 0)) {
1092  if (kill(op->pid, SIGKILL)) {
1093  crm_warn("Could not kill rogue child %s[%d]: %s",
1094  op->id, op->pid, pcmk_strerror(errno));
1095  }
1096  /* Safe to skip WNOHANG here as we sent non-ignorable signal. */
1097  while ((waitpid(op->pid, &status, 0) == (pid_t) -1)
1098  && (errno == EINTR)) {
1099  /* keep waiting */;
1100  }
1101  }
1102 
1103  } else if (WIFEXITED(status)) {
1104  services__set_result(op, WEXITSTATUS(status), PCMK_EXEC_DONE, NULL);
1105  parse_exit_reason_from_stderr(op);
1106  crm_info("%s[%d] exited with status %d", op->id, op->pid, op->rc);
1107 
1108  } else if (WIFSIGNALED(status)) {
1109  int signo = WTERMSIG(status);
1110 
1112  "Process interrupted by signal");
1113  crm_err("%s[%d] terminated with signal %d (%s)",
1114  op->id, op->pid, signo, strsignal(signo));
1115 
1116 #ifdef WCOREDUMP
1117  if (WCOREDUMP(status)) {
1118  crm_warn("%s[%d] dumped core", op->id, op->pid);
1119  }
1120 #endif
1121 
1122  } else {
1123  // Shouldn't be possible to get here
1125  "Unable to wait for child to complete");
1126  }
1127 }
1128 
1145 int
1147 {
1148  int stdout_fd[2];
1149  int stderr_fd[2];
1150  int stdin_fd[2] = {-1, -1};
1151  int rc;
1152  struct stat st;
1153  struct sigchld_data_s data;
1154 
1155  // Catch common failure conditions early
1156  if (stat(op->opaque->exec, &st) != 0) {
1157  rc = errno;
1158  crm_warn("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
1159  op->opaque->exec, pcmk_strerror(rc), rc);
1161  goto done;
1162  }
1163 
1164  if (pipe(stdout_fd) < 0) {
1165  rc = errno;
1166  crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
1167  op->opaque->exec, pcmk_strerror(rc), rc);
1169  goto done;
1170  }
1171 
1172  if (pipe(stderr_fd) < 0) {
1173  rc = errno;
1174 
1175  close_pipe(stdout_fd);
1176 
1177  crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
1178  op->opaque->exec, pcmk_strerror(rc), rc);
1180  goto done;
1181  }
1182 
1184  if (pipe(stdin_fd) < 0) {
1185  rc = errno;
1186 
1187  close_pipe(stdout_fd);
1188  close_pipe(stderr_fd);
1189 
1190  crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
1191  op->opaque->exec, pcmk_strerror(rc), rc);
1193  goto done;
1194  }
1195  }
1196 
1197  if (op->synchronous && !sigchld_setup(&data)) {
1198  close_pipe(stdin_fd);
1199  close_pipe(stdout_fd);
1200  close_pipe(stderr_fd);
1201  sigchld_cleanup(&data);
1203  "Could not manage signals for child process");
1204  goto done;
1205  }
1206 
1207  op->pid = fork();
1208  switch (op->pid) {
1209  case -1:
1210  rc = errno;
1211  close_pipe(stdin_fd);
1212  close_pipe(stdout_fd);
1213  close_pipe(stderr_fd);
1214 
1215  crm_err("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
1216  op->opaque->exec, pcmk_strerror(rc), rc);
1218  if (op->synchronous) {
1219  sigchld_cleanup(&data);
1220  }
1221  goto done;
1222  break;
1223 
1224  case 0: /* Child */
1225  close(stdout_fd[0]);
1226  close(stderr_fd[0]);
1227  if (stdin_fd[1] >= 0) {
1228  close(stdin_fd[1]);
1229  }
1230  if (STDOUT_FILENO != stdout_fd[1]) {
1231  if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) {
1232  crm_warn("Can't redirect output from '%s': %s "
1233  CRM_XS " errno=%d",
1234  op->opaque->exec, pcmk_strerror(errno), errno);
1235  }
1236  close(stdout_fd[1]);
1237  }
1238  if (STDERR_FILENO != stderr_fd[1]) {
1239  if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) {
1240  crm_warn("Can't redirect error output from '%s': %s "
1241  CRM_XS " errno=%d",
1242  op->opaque->exec, pcmk_strerror(errno), errno);
1243  }
1244  close(stderr_fd[1]);
1245  }
1246  if ((stdin_fd[0] >= 0) &&
1247  (STDIN_FILENO != stdin_fd[0])) {
1248  if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) {
1249  crm_warn("Can't redirect input to '%s': %s "
1250  CRM_XS " errno=%d",
1251  op->opaque->exec, pcmk_strerror(errno), errno);
1252  }
1253  close(stdin_fd[0]);
1254  }
1255 
1256  if (op->synchronous) {
1257  sigchld_cleanup(&data);
1258  }
1259 
1260  action_launch_child(op);
1261  CRM_ASSERT(0); /* action_launch_child is effectively noreturn */
1262  }
1263 
1264  /* Only the parent reaches here */
1265  close(stdout_fd[1]);
1266  close(stderr_fd[1]);
1267  if (stdin_fd[0] >= 0) {
1268  close(stdin_fd[0]);
1269  }
1270 
1271  op->opaque->stdout_fd = stdout_fd[0];
1273  if (rc != pcmk_rc_ok) {
1274  crm_warn("Could not set '%s' output non-blocking: %s "
1275  CRM_XS " rc=%d",
1276  op->opaque->exec, pcmk_rc_str(rc), rc);
1277  }
1278 
1279  op->opaque->stderr_fd = stderr_fd[0];
1281  if (rc != pcmk_rc_ok) {
1282  crm_warn("Could not set '%s' error output non-blocking: %s "
1283  CRM_XS " rc=%d",
1284  op->opaque->exec, pcmk_rc_str(rc), rc);
1285  }
1286 
1287  op->opaque->stdin_fd = stdin_fd[1];
1288  if (op->opaque->stdin_fd >= 0) {
1289  // using buffer behind non-blocking-fd here - that could be improved
1290  // as long as no other standard uses stdin_fd assume stonith
1292  if (rc != pcmk_rc_ok) {
1293  crm_warn("Could not set '%s' input non-blocking: %s "
1294  CRM_XS " fd=%d,rc=%d", op->opaque->exec,
1295  pcmk_rc_str(rc), op->opaque->stdin_fd, rc);
1296  }
1297  pipe_in_action_stdin_parameters(op);
1298  // as long as we are handling parameters directly in here just close
1299  close(op->opaque->stdin_fd);
1300  op->opaque->stdin_fd = -1;
1301  }
1302 
1303  // after fds are setup properly and before we plug anything into mainloop
1304  if (op->opaque->fork_callback) {
1305  op->opaque->fork_callback(op);
1306  }
1307 
1308  if (op->synchronous) {
1309  wait_for_sync_result(op, &data);
1310  sigchld_cleanup(&data);
1311  goto done;
1312  }
1313 
1314  crm_trace("Waiting async for '%s'[%d]", op->opaque->exec, op->pid);
1315  mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op,
1317  async_action_complete);
1318 
1320  G_PRIORITY_LOW,
1321  op->opaque->stdout_fd, op,
1322  &stdout_callbacks);
1324  G_PRIORITY_LOW,
1325  op->opaque->stderr_fd, op,
1326  &stderr_callbacks);
1328  return pcmk_rc_ok;
1329 
1330 done:
1331  if (op->synchronous) {
1332  return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error;
1333  } else {
1334  return services__finalize_async_op(op);
1335  }
1336 }
1337 
1338 GList *
1339 services_os_get_single_directory_list(const char *root, gboolean files, gboolean executable)
1340 {
1341  GList *list = NULL;
1342  struct dirent **namelist;
1343  int entries = 0, lpc = 0;
1344  char buffer[PATH_MAX];
1345 
1346  entries = scandir(root, &namelist, NULL, alphasort);
1347  if (entries <= 0) {
1348  return list;
1349  }
1350 
1351  for (lpc = 0; lpc < entries; lpc++) {
1352  struct stat sb;
1353 
1354  if ('.' == namelist[lpc]->d_name[0]) {
1355  free(namelist[lpc]);
1356  continue;
1357  }
1358 
1359  snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name);
1360 
1361  if (stat(buffer, &sb)) {
1362  continue;
1363  }
1364 
1365  if (S_ISDIR(sb.st_mode)) {
1366  if (files) {
1367  free(namelist[lpc]);
1368  continue;
1369  }
1370 
1371  } else if (S_ISREG(sb.st_mode)) {
1372  if (files == FALSE) {
1373  free(namelist[lpc]);
1374  continue;
1375 
1376  } else if (executable
1377  && (sb.st_mode & S_IXUSR) == 0
1378  && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) {
1379  free(namelist[lpc]);
1380  continue;
1381  }
1382  }
1383 
1384  list = g_list_append(list, strdup(namelist[lpc]->d_name));
1385 
1386  free(namelist[lpc]);
1387  }
1388 
1389  free(namelist);
1390  return list;
1391 }
1392 
1393 GList *
1394 services_os_get_directory_list(const char *root, gboolean files, gboolean executable)
1395 {
1396  GList *result = NULL;
1397  char *dirs = strdup(root);
1398  char *dir = NULL;
1399 
1400  if (pcmk__str_empty(dirs)) {
1401  free(dirs);
1402  return result;
1403  }
1404 
1405  for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) {
1406  GList *tmp = services_os_get_single_directory_list(dir, files, executable);
1407 
1408  if (tmp) {
1409  result = g_list_concat(result, tmp);
1410  }
1411  }
1412 
1413  free(dirs);
1414 
1415  return result;
1416 }
Services API.
int rc
Exit status of action (set by library upon completion)
Definition: services.h:153
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:225
void(* callback)(svc_action_t *op)
A dumping ground.
const char * pcmk_strerror(int rc)
Definition: results.c:58
char data[0]
Definition: cpg.c:55
void services_action_free(svc_action_t *op)
Definition: services.c:580
guint interval_ms
Action interval for recurring resource actions, otherwise 0.
Definition: services.h:133
mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks *callbacks)
Definition: mainloop.c:975
char * standard
Resource standard for resource actions, otherwise NULL.
Definition: services.h:136
#define crm_log_output(level, prefix, output)
Definition: logging.h:124
char * id
Definition: services.h:124
mainloop_io_t * stderr_gsource
int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params)
Definition: cib_secrets.c:96
int alphasort(const void *dirent1, const void *dirent2)
gboolean recurring_action_timer(gpointer data)
struct mainloop_io_s mainloop_io_t
Definition: mainloop.h:32
void pcmk__close_fds_in_child(bool)
Definition: io.c:558
void mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *userdata, enum mainloop_child_flags, void(*callback)(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode))
Definition: mainloop.c:1271
struct mainloop_child_s mainloop_child_t
Definition: mainloop.h:33
GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable)
char * rsc
XML ID of resource being executed for resource actions, otherwise NULL.
Definition: services.h:127
int timeout
Action timeout (in milliseconds)
Definition: services.h:144
Action did not complete in time.
Definition: results.h:310
const char * pcmk_rc_str(int rc)
Get a user-friendly description of a return code.
Definition: results.c:432
char * strerror(int errnum)
Wrappers for and extensions to glib mainloop.
Action was cancelled.
Definition: results.h:309
char * strndup(const char *str, size_t len)
int services__generic_error(svc_action_t *op)
void services_action_cleanup(svc_action_t *op)
Definition: services.c:496
int(* dispatch)(gpointer userdata)
Dispatch function for mainloop file descriptor with data ready.
Definition: mainloop.h:137
volatile struct sigchld_data_s * last_sigchld_data
enum svc_action_flags flags
Flag group of enum svc_action_flags.
Definition: services.h:174
#define crm_warn(fmt, args...)
Definition: logging.h:358
void services__set_cancelled(svc_action_t *action)
Definition: services.c:1290
#define PCMK_RESOURCE_CLASS_OCF
Definition: services.h:39
gboolean cancel_recurring_action(svc_action_t *op)
Definition: services.c:633
stonith_t * st
Definition: pcmk_fence.c:28
int rc
Definition: pcmk_fence.c:35
uint32_t pid
Definition: cpg.c:46
svc_action_private_t * opaque
This field should be treated as internal to Pacemaker.
Definition: services.h:180
#define crm_debug(fmt, args...)
Definition: logging.h:362
void * mainloop_child_userdata(mainloop_child_t *child)
Definition: mainloop.c:1053
Parameter invalid (in local context)
Definition: results.h:164
char * stdout_data
Action stdout (set by library)
Definition: services.h:176
Parameter invalid (inherently)
Definition: results.h:168
GHashTable * params
Definition: services.h:151
#define crm_trace(fmt, args...)
Definition: logging.h:363
#define PCMK_OCF_MAJOR_VERSION
Definition: agents.h:40
int setenv(const char *name, const char *value, int why)
int services__finalize_async_op(svc_action_t *op)
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
GList * services_os_get_single_directory_list(const char *root, gboolean files, gboolean executable)
Object for executing external actions.
Definition: services.h:120
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:114
Insufficient privileges.
Definition: results.h:166
void services__set_result(svc_action_t *action, int agent_status, enum pcmk_exec_status exec_status, const char *exit_reason)
Definition: services.c:1264
char * agent
Resource agent name for resource actions, otherwise NULL.
Definition: services.h:142
int synchronous
Definition: services.h:171
Action completed, result is known.
Definition: results.h:308
#define PCMK_OCF_MINOR_VERSION
Definition: agents.h:41
Dependencies not available locally.
Definition: results.h:167
#define PCMK_OCF_REASON_PREFIX
Definition: services.h:51
Unspecified error.
Definition: results.h:163
#define EXIT_REASON_MAX_LEN
#define CRM_XS
Definition: logging.h:54
char * args[MAX_ARGC]
void services_add_inflight_op(svc_action_t *op)
Definition: services.c:830
void services_untrack_op(svc_action_t *op)
Definition: services.c:851
char * action
Name of action being executed for resource actions, otherwise NULL.
Definition: services.h:130
#define PCMK_RESOURCE_CLASS_NAGIOS
Definition: services.h:44
#define PCMK_RESOURCE_CLASS_LSB
Definition: services.h:41
#define crm_perror(level, fmt, args...)
Send a system error message to both the log and stderr.
Definition: logging.h:308
int services__not_installed_error(svc_action_t *op)
#define crm_err(fmt, args...)
Definition: logging.h:357
int services__configuration_error(svc_action_t *op, bool is_fatal)
#define CRM_ASSERT(expr)
Definition: results.h:42
Success.
Definition: results.h:162
void mainloop_clear_child_userdata(mainloop_child_t *child)
Definition: mainloop.c:1059
void services__handle_exec_error(svc_action_t *op, int error)
mainloop_io_t * stdout_gsource
Agent or dependency not available locally.
Definition: results.h:315
void mainloop_del_fd(mainloop_io_t *client)
Definition: mainloop.c:1019
uint32_t pcmk_get_ra_caps(const char *standard)
Get capabilities of a resource agent standard.
Definition: agents.c:31
gboolean services_action_async(svc_action_t *op, void(*action_callback)(svc_action_t *))
Definition: services.c:894
unsigned int timeout
Definition: pcmk_fence.c:32
char * provider
Resource provider for resource actions that require it, otherwise NULL.
Definition: services.h:139
void(* fork_callback)(svc_action_t *op)
Execution failed, may be retried.
Definition: results.h:312
#define crm_info(fmt, args...)
Definition: logging.h:360
int pcmk__set_nonblocking(int fd)
Definition: io.c:517
int services__execute_file(svc_action_t *op)
#define OCF_ROOT_DIR
Definition: config.h:478
int services__authorization_error(svc_action_t *op)
int mainloop_child_timeout(mainloop_child_t *child)
Definition: mainloop.c:1047
char * stderr_data
Action stderr (set by library)
Definition: services.h:175