root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. action_matches
  6. log_finished
  7. log_execute
  8. normalize_action_name
  9. build_rsc_from_xml
  10. create_lrmd_cmd
  11. stop_recurring_timer
  12. free_lrmd_cmd
  13. stonith_recurring_op_helper
  14. start_recurring_timer
  15. start_delay_helper
  16. find_duplicate_action
  17. merge_recurring_duplicate
  18. schedule_lrmd_cmd
  19. create_lrmd_reply
  20. send_client_notify
  21. send_cmd_complete_notify
  22. send_generic_notify
  23. cmd_reset
  24. cmd_finalize
  25. notify_one_client
  26. notify_of_new_client
  27. client_disconnect_cleanup
  28. action_complete
  29. stonith_action_complete
  30. lrmd_stonith_callback
  31. stonith_connection_failed
  32. execd_stonith_start
  33. execd_stonith_stop
  34. execd_stonith_monitor
  35. execute_stonith_action
  36. execute_nonstonith_action
  37. execute_resource_action
  38. free_rsc
  39. process_lrmd_signon
  40. process_lrmd_rsc_register
  41. process_lrmd_get_rsc_info
  42. process_lrmd_rsc_unregister
  43. process_lrmd_rsc_exec
  44. cancel_op
  45. cancel_all_recurring
  46. process_lrmd_rsc_cancel
  47. add_recurring_op_xml
  48. process_lrmd_get_recurring
  49. process_lrmd_message

   1 /*
   2  * Copyright 2012-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/fencing/internal.h>
  12 
  13 #include <glib.h>
  14 
  15 // Check whether we have a high-resolution monotonic clock
  16 #undef PCMK__TIME_USE_CGT
  17 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  18 #  define PCMK__TIME_USE_CGT
  19 #  include <time.h>  /* clock_gettime */
  20 #endif
  21 
  22 #include <unistd.h>
  23 
  24 #include <crm/crm.h>
  25 #include <crm/fencing/internal.h>
  26 #include <crm/services.h>
  27 #include <crm/services_internal.h>
  28 #include <crm/common/mainloop.h>
  29 #include <crm/common/ipc.h>
  30 #include <crm/common/ipc_internal.h>
  31 #include <crm/msg_xml.h>
  32 
  33 #include "pacemaker-execd.h"
  34 
  35 GHashTable *rsc_list = NULL;
  36 
  37 typedef struct lrmd_cmd_s {
  38     int timeout;
  39     guint interval_ms;
  40     int start_delay;
  41     int timeout_orig;
  42 
  43     int call_id;
  44 
  45     int call_opts;
  46     /* Timer ids, must be removed on cmd destruction. */
  47     int delay_id;
  48     int stonith_recurring_id;
  49 
  50     int rsc_deleted;
  51 
  52     int service_flags;
  53 
  54     char *client_id;
  55     char *origin;
  56     char *rsc_id;
  57     char *action;
  58     char *real_action;
  59     char *userdata_str;
  60 
  61     pcmk__action_result_t result;
  62 
  63     /* We can track operation queue time and run time, to be saved with the CIB
  64      * resource history (and displayed in cluster status). We need
  65      * high-resolution monotonic time for this purpose, so we use
  66      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  67      * is disabled).
  68      *
  69      * However, we also need epoch timestamps for recording the time the command
  70      * last ran and the time its return value last changed, for use in time
  71      * displays (as opposed to interval calculations). We keep time_t values for
  72      * this purpose.
  73      *
  74      * The last run time is used for both purposes, so we keep redundant
  75      * monotonic and epoch values for this. Technically the two could represent
  76      * different times, but since time_t has only second resolution and the
  77      * values are used for distinct purposes, that is not significant.
  78      */
  79 #ifdef PCMK__TIME_USE_CGT
  80     /* Recurring and systemd operations may involve more than one executor
  81      * command per operation, so they need info about the original and the most
  82      * recent.
  83      */
  84     struct timespec t_first_run;    // When op first ran
  85     struct timespec t_run;          // When op most recently ran
  86     struct timespec t_first_queue;  // When op was first queued
  87     struct timespec t_queue;        // When op was most recently queued
  88 #endif
  89     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  90     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  91 
  92     bool first_notify_sent;
  93     int last_notify_rc;
  94     int last_notify_op_status;
  95     int last_pid;
  96 
  97     GHashTable *params;
  98 } lrmd_cmd_t;
  99 
 100 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
 101 static gboolean execute_resource_action(gpointer user_data);
 102 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 103 
 104 #ifdef PCMK__TIME_USE_CGT
 105 
 106 /*!
 107  * \internal
 108  * \brief Check whether a struct timespec has been set
 109  *
 110  * \param[in] timespec  Time to check
 111  *
 112  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 113  */
 114 static inline bool
 115 time_is_set(const struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117     return (timespec != NULL) &&
 118            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 119 }
 120 
 121 /*
 122  * \internal
 123  * \brief Set a timespec (and its original if unset) to the current time
 124  *
 125  * \param[out] t_current  Where to store current time
 126  * \param[out] t_orig     Where to copy t_current if unset
 127  */
 128 static void
 129 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 130 {
 131     clock_gettime(CLOCK_MONOTONIC, t_current);
 132     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 133         *t_orig = *t_current;
 134     }
 135 }
 136 
 137 /*!
 138  * \internal
 139  * \brief Return difference between two times in milliseconds
 140  *
 141  * \param[in] now  More recent time (or NULL to use current time)
 142  * \param[in] old  Earlier time
 143  *
 144  * \return milliseconds difference (or 0 if old is NULL or unset)
 145  *
 146  * \note Can overflow on 32bit machines when the differences is around
 147  *       24 days or more.
 148  */
 149 static int
 150 time_diff_ms(const struct timespec *now, const struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152     int diff_ms = 0;
 153 
 154     if (time_is_set(old)) {
 155         struct timespec local_now = { 0, };
 156 
 157         if (now == NULL) {
 158             clock_gettime(CLOCK_MONOTONIC, &local_now);
 159             now = &local_now;
 160         }
 161         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 162                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 163     }
 164     return diff_ms;
 165 }
 166 
 167 /*!
 168  * \internal
 169  * \brief Reset a command's operation times to their original values.
 170  *
 171  * Reset a command's run and queued timestamps to the timestamps of the original
 172  * command, so we report the entire time since then and not just the time since
 173  * the most recent command (for recurring and systemd operations).
 174  *
 175  * \param[in,out] cmd  Executor command object to reset
 176  *
 177  * \note It's not obvious what the queued time should be for a systemd
 178  *       start/stop operation, which might go like this:
 179  *         initial command queued 5ms, runs 3s
 180  *         monitor command queued 10ms, runs 10s
 181  *         monitor command queued 10ms, runs 10s
 182  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 183  *       implementation will report 5ms. If it's 25ms, then we need to
 184  *       subtract 20ms from the total exec time so as not to count it twice.
 185  *       We can implement that later if it matters to anyone ...
 186  */
 187 static void
 188 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190     cmd->t_run = cmd->t_first_run;
 191     cmd->t_queue = cmd->t_first_queue;
 192 }
 193 #endif
 194 
 195 static inline bool
 196 action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198     return (cmd->interval_ms == interval_ms)
 199            && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
 200 }
 201 
 202 /*!
 203  * \internal
 204  * \brief Log the result of an asynchronous command
 205  *
 206  * \param[in] cmd            Command to log result for
 207  * \param[in] exec_time_ms   Execution time in milliseconds, if known
 208  * \param[in] queue_time_ms  Queue time in milliseconds, if known
 209  */
 210 static void
 211 log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 212 {
 213     int log_level = LOG_INFO;
 214     GString *str = g_string_sized_new(100); // reasonable starting size
 215 
 216     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 217         log_level = LOG_DEBUG;
 218     }
 219 
 220     g_string_append_printf(str, "%s %s (call %d",
 221                            cmd->rsc_id, cmd->action, cmd->call_id);
 222     if (cmd->last_pid != 0) {
 223         g_string_append_printf(str, ", PID %d", cmd->last_pid);
 224     }
 225     if (cmd->result.execution_status == PCMK_EXEC_DONE) {
 226         g_string_append_printf(str, ") exited with status %d",
 227                                cmd->result.exit_status);
 228     } else {
 229         pcmk__g_strcat(str, ") could not be executed: ",
 230                        pcmk_exec_status_str(cmd->result.execution_status),
 231                        NULL);
 232     }
 233     if (cmd->result.exit_reason != NULL) {
 234         pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
 235     }
 236 
 237 #ifdef PCMK__TIME_USE_CGT
 238     pcmk__g_strcat(str, " (execution time ",
 239                    pcmk__readable_interval(exec_time_ms), NULL);
 240     if (queue_time_ms > 0) {
 241         pcmk__g_strcat(str, " after being queued ",
 242                        pcmk__readable_interval(queue_time_ms), NULL);
 243     }
 244     g_string_append_c(str, ')');
 245 #endif
 246 
 247     do_crm_log(log_level, "%s", str->str);
 248     g_string_free(str, TRUE);
 249 }
 250 
 251 static void
 252 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254     int log_level = LOG_INFO;
 255 
 256     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 257         log_level = LOG_DEBUG;
 258     }
 259 
 260     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 261                cmd->rsc_id, cmd->action, cmd->call_id);
 262 }
 263 
 264 static const char *
 265 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 266 {
 267     if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
 268         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 269         return "status";
 270     }
 271     return action;
 272 }
 273 
 274 static lrmd_rsc_t *
 275 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 278     lrmd_rsc_t *rsc = NULL;
 279 
 280     rsc = calloc(1, sizeof(lrmd_rsc_t));
 281 
 282     crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
 283 
 284     rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 285     rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
 286     rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
 287     rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
 288     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
 289                                      rsc);
 290 
 291     // Initialize fence device probes (to return "not running")
 292     pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
 293                      PCMK_EXEC_NO_FENCE_DEVICE, NULL);
 294     return rsc;
 295 }
 296 
 297 static lrmd_cmd_t *
 298 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300     int call_options = 0;
 301     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 302     lrmd_cmd_t *cmd = NULL;
 303 
 304     cmd = calloc(1, sizeof(lrmd_cmd_t));
 305 
 306     crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
 307     cmd->call_opts = call_options;
 308     cmd->client_id = strdup(client->id);
 309 
 310     crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
 311     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
 312     crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
 313     crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
 314     cmd->timeout_orig = cmd->timeout;
 315 
 316     cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
 317     cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
 318     cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
 319     cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 320 
 321     cmd->params = xml2list(rsc_xml);
 322 
 323     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
 324         crm_debug("Setting flag to leave pid group on timeout and "
 325                   "only kill action pid for " PCMK__OP_FMT,
 326                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 327         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 328                                                 LOG_TRACE, "Action",
 329                                                 cmd->action, 0,
 330                                                 SVC_ACTION_LEAVE_GROUP,
 331                                                 "SVC_ACTION_LEAVE_GROUP");
 332     }
 333     return cmd;
 334 }
 335 
 336 static void
 337 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339     if (cmd) {
 340         if (cmd->stonith_recurring_id) {
 341             g_source_remove(cmd->stonith_recurring_id);
 342         }
 343         cmd->stonith_recurring_id = 0;
 344     }
 345 }
 346 
 347 static void
 348 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 349 {
 350     stop_recurring_timer(cmd);
 351     if (cmd->delay_id) {
 352         g_source_remove(cmd->delay_id);
 353     }
 354     if (cmd->params) {
 355         g_hash_table_destroy(cmd->params);
 356     }
 357     pcmk__reset_result(&(cmd->result));
 358     free(cmd->origin);
 359     free(cmd->action);
 360     free(cmd->real_action);
 361     free(cmd->userdata_str);
 362     free(cmd->rsc_id);
 363     free(cmd->client_id);
 364     free(cmd);
 365 }
 366 
 367 static gboolean
 368 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 369 {
 370     lrmd_cmd_t *cmd = data;
 371     lrmd_rsc_t *rsc;
 372 
 373     cmd->stonith_recurring_id = 0;
 374 
 375     if (!cmd->rsc_id) {
 376         return FALSE;
 377     }
 378 
 379     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 380 
 381     CRM_ASSERT(rsc != NULL);
 382     /* take it out of recurring_ops list, and put it in the pending ops
 383      * to be executed */
 384     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 385     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 386 #ifdef PCMK__TIME_USE_CGT
 387     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 388 #endif
 389     mainloop_set_trigger(rsc->work);
 390 
 391     return FALSE;
 392 }
 393 
 394 static inline void
 395 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 396 {
 397     if (cmd && (cmd->interval_ms > 0)) {
 398         cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
 399                                                   stonith_recurring_op_helper,
 400                                                   cmd);
 401     }
 402 }
 403 
 404 static gboolean
 405 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 406 {
 407     lrmd_cmd_t *cmd = data;
 408     lrmd_rsc_t *rsc = NULL;
 409 
 410     cmd->delay_id = 0;
 411     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 412 
 413     if (rsc) {
 414         mainloop_set_trigger(rsc->work);
 415     }
 416 
 417     return FALSE;
 418 }
 419 
 420 /*!
 421  * \internal
 422  * \brief Check whether a list already contains the equivalent of a given action
 423  *
 424  * \param[in] action_list  List to search
 425  * \param[in] cmd          Action to search for
 426  */
 427 static lrmd_cmd_t *
 428 find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430     for (const GList *item = action_list; item != NULL; item = item->next) {
 431         lrmd_cmd_t *dup = item->data;
 432 
 433         if (action_matches(cmd, dup->action, dup->interval_ms)) {
 434             return dup;
 435         }
 436     }
 437     return NULL;
 438 }
 439 
 440 static bool
 441 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 442 {
 443     lrmd_cmd_t * dup = NULL;
 444     bool dup_pending = true;
 445 
 446     if (cmd->interval_ms == 0) {
 447         return false;
 448     }
 449 
 450     // Search for a duplicate of this action (in-flight or not)
 451     dup = find_duplicate_action(rsc->pending_ops, cmd);
 452     if (dup == NULL) {
 453         dup_pending = false;
 454         dup = find_duplicate_action(rsc->recurring_ops, cmd);
 455         if (dup == NULL) {
 456             return false;
 457         }
 458     }
 459 
 460     /* Do not merge fencing monitors marked for cancellation, so we can reply to
 461      * the cancellation separately.
 462      */
 463     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 464                      pcmk__str_casei)
 465         && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
 466         return false;
 467     }
 468 
 469     /* This should not occur. If it does, we need to investigate how something
 470      * like this is possible in the controller.
 471      */
 472     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 473              "), merging with previous op entry",
 474              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 475              dup->interval_ms);
 476 
 477     // Merge new action's call ID and user data into existing action
 478     dup->first_notify_sent = false;
 479     free(dup->userdata_str);
 480     dup->userdata_str = cmd->userdata_str;
 481     cmd->userdata_str = NULL;
 482     dup->call_id = cmd->call_id;
 483     free_lrmd_cmd(cmd);
 484     cmd = NULL;
 485 
 486     /* If dup is not pending, that means it has already executed at least once
 487      * and is waiting in the interval. In that case, stop waiting and initiate
 488      * a new instance now.
 489      */
 490     if (!dup_pending) {
 491         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 492                          pcmk__str_casei)) {
 493             stop_recurring_timer(dup);
 494             stonith_recurring_op_helper(dup);
 495         } else {
 496             services_action_kick(rsc->rsc_id,
 497                                  normalize_action_name(rsc, dup->action),
 498                                  dup->interval_ms);
 499         }
 500     }
 501     return true;
 502 }
 503 
 504 static void
 505 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 506 {
 507     CRM_CHECK(cmd != NULL, return);
 508     CRM_CHECK(rsc != NULL, return);
 509 
 510     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 511 
 512     if (merge_recurring_duplicate(rsc, cmd)) {
 513         // Equivalent of cmd has already been scheduled
 514         return;
 515     }
 516 
 517     /* The controller expects the executor to automatically cancel
 518      * recurring operations before a resource stops.
 519      */
 520     if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 521         cancel_all_recurring(rsc, NULL);
 522     }
 523 
 524     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 525 #ifdef PCMK__TIME_USE_CGT
 526     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 527 #endif
 528     mainloop_set_trigger(rsc->work);
 529 
 530     if (cmd->start_delay) {
 531         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
 532     }
 533 }
 534 
 535 static xmlNode *
 536 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 537 {
 538     xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
 539 
 540     crm_xml_add(reply, F_LRMD_ORIGIN, origin);
 541     crm_xml_add_int(reply, F_LRMD_RC, rc);
 542     crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
 543     return reply;
 544 }
 545 
 546 static void
 547 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 548 {
 549     xmlNode *update_msg = user_data;
 550     pcmk__client_t *client = value;
 551     int rc;
 552     int log_level = LOG_WARNING;
 553     const char *msg = NULL;
 554 
 555     CRM_CHECK(client != NULL, return);
 556     if (client->name == NULL) {
 557         crm_trace("Skipping notification to client without name");
 558         return;
 559     }
 560     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 561         /* We only want to notify clients of the executor IPC API. If we are
 562          * running as Pacemaker Remote, we may have clients proxied to other
 563          * IPC services in the cluster, so skip those.
 564          */
 565         crm_trace("Skipping executor API notification to client %s",
 566                   pcmk__client_name(client));
 567         return;
 568     }
 569 
 570     rc = lrmd_server_send_notify(client, update_msg);
 571     if (rc == pcmk_rc_ok) {
 572         return;
 573     }
 574 
 575     switch (rc) {
 576         case ENOTCONN:
 577         case EPIPE: // Client exited without waiting for notification
 578             log_level = LOG_INFO;
 579             msg = "Disconnected";
 580             break;
 581 
 582         default:
 583             msg = pcmk_rc_str(rc);
 584             break;
 585     }
 586     do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
 587                pcmk__client_name(client), msg, rc);
 588 }
 589 
 590 static void
 591 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 592 {
 593     xmlNode *notify = NULL;
 594     int exec_time = 0;
 595     int queue_time = 0;
 596 
 597 #ifdef PCMK__TIME_USE_CGT
 598     exec_time = time_diff_ms(NULL, &(cmd->t_run));
 599     queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 600 #endif
 601     log_finished(cmd, exec_time, queue_time);
 602 
 603     /* If the originator requested to be notified only for changes in recurring
 604      * operation results, skip the notification if the result hasn't changed.
 605      */
 606     if (cmd->first_notify_sent
 607         && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
 608         && (cmd->last_notify_rc == cmd->result.exit_status)
 609         && (cmd->last_notify_op_status == cmd->result.execution_status)) {
 610         return;
 611     }
 612 
 613     cmd->first_notify_sent = true;
 614     cmd->last_notify_rc = cmd->result.exit_status;
 615     cmd->last_notify_op_status = cmd->result.execution_status;
 616 
 617     notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 618 
 619     crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 620     crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
 621     crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
 622     crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
 623     crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status);
 624     crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status);
 625     crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
 626     crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
 627 
 628     crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
 629                    (long long) cmd->epoch_last_run);
 630     crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
 631                    (long long) cmd->epoch_rcchange);
 632 #ifdef PCMK__TIME_USE_CGT
 633     crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
 634     crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
 635 #endif
 636 
 637     crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
 638     crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
 639     if(cmd->real_action) {
 640         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
 641     } else {
 642         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
 643     }
 644     crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 645     crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
 646 
 647     if (cmd->result.action_stderr != NULL) {
 648         crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr);
 649 
 650     } else if (cmd->result.action_stdout != NULL) {
 651         crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout);
 652     }
 653 
 654     if (cmd->params) {
 655         char *key = NULL;
 656         char *value = NULL;
 657         GHashTableIter iter;
 658 
 659         xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
 660 
 661         g_hash_table_iter_init(&iter, cmd->params);
 662         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 663             hash2smartfield((gpointer) key, (gpointer) value, args);
 664         }
 665     }
 666     if ((cmd->client_id != NULL)
 667         && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
 668 
 669         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 670 
 671         if (client != NULL) {
 672             send_client_notify(client->id, client, notify);
 673         }
 674     } else {
 675         pcmk__foreach_ipc_client(send_client_notify, notify);
 676     }
 677 
 678     free_xml(notify);
 679 }
 680 
 681 static void
 682 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 683 {
 684     if (pcmk__ipc_client_count() != 0) {
 685         int call_id = 0;
 686         xmlNode *notify = NULL;
 687         xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
 688         const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
 689         const char *op = crm_element_value(request, F_LRMD_OPERATION);
 690 
 691         crm_element_value_int(request, F_LRMD_CALLID, &call_id);
 692 
 693         notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 694         crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 695         crm_xml_add_int(notify, F_LRMD_RC, rc);
 696         crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
 697         crm_xml_add(notify, F_LRMD_OPERATION, op);
 698         crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
 699 
 700         pcmk__foreach_ipc_client(send_client_notify, notify);
 701 
 702         free_xml(notify);
 703     }
 704 }
 705 
 706 static void
 707 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 708 {
 709     cmd->last_pid = 0;
 710 #ifdef PCMK__TIME_USE_CGT
 711     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 712     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 713 #endif
 714     cmd->epoch_last_run = 0;
 715 
 716     pcmk__reset_result(&(cmd->result));
 717     cmd->result.execution_status = PCMK_EXEC_DONE;
 718 }
 719 
 720 static void
 721 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 722 {
 723     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 724               rsc ? rsc->active : NULL, cmd);
 725 
 726     if (rsc && (rsc->active == cmd)) {
 727         rsc->active = NULL;
 728         mainloop_set_trigger(rsc->work);
 729     }
 730 
 731     if (!rsc) {
 732         cmd->rsc_deleted = 1;
 733     }
 734 
 735     /* reset original timeout so client notification has correct information */
 736     cmd->timeout = cmd->timeout_orig;
 737 
 738     send_cmd_complete_notify(cmd);
 739 
 740     if ((cmd->interval_ms != 0)
 741         && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
 742 
 743         if (rsc) {
 744             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 745             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 746         }
 747         free_lrmd_cmd(cmd);
 748     } else if (cmd->interval_ms == 0) {
 749         if (rsc) {
 750             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 751         }
 752         free_lrmd_cmd(cmd);
 753     } else {
 754         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 755         cmd_reset(cmd);
 756     }
 757 }
 758 
 759 struct notify_new_client_data {
 760     xmlNode *notify;
 761     pcmk__client_t *new_client;
 762 };
 763 
 764 static void
 765 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 766 {
 767     pcmk__client_t *client = value;
 768     struct notify_new_client_data *data = user_data;
 769 
 770     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 771         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 772     }
 773 }
 774 
 775 void
 776 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 777 {
 778     struct notify_new_client_data data;
 779 
 780     data.new_client = new_client;
 781     data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 782     crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
 783     crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
 784     pcmk__foreach_ipc_client(notify_one_client, &data);
 785     free_xml(data.notify);
 786 }
 787 
 788 void
 789 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791     GHashTableIter iter;
 792     lrmd_rsc_t *rsc = NULL;
 793     char *key = NULL;
 794 
 795     g_hash_table_iter_init(&iter, rsc_list);
 796     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 797         if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
 798             /* This client is disconnecting, drop any recurring operations
 799              * it may have initiated on the resource */
 800             cancel_all_recurring(rsc, client_id);
 801         }
 802     }
 803 }
 804 
 805 static void
 806 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 807 {
 808     lrmd_rsc_t *rsc;
 809     lrmd_cmd_t *cmd = action->cb_data;
 810     enum ocf_exitcode code;
 811 
 812 #ifdef PCMK__TIME_USE_CGT
 813     const char *rclass = NULL;
 814     bool goagain = false;
 815 #endif
 816 
 817     if (!cmd) {
 818         crm_err("Completed executor action (%s) does not match any known operations",
 819                 action->id);
 820         return;
 821     }
 822 
 823 #ifdef PCMK__TIME_USE_CGT
 824     if (cmd->result.exit_status != action->rc) {
 825         cmd->epoch_rcchange = time(NULL);
 826     }
 827 #endif
 828 
 829     cmd->last_pid = action->pid;
 830 
 831     // Cast variable instead of function return to keep compilers happy
 832     code = services_result2ocf(action->standard, cmd->action, action->rc);
 833     pcmk__set_result(&(cmd->result), (int) code,
 834                      action->status, services__exit_reason(action));
 835 
 836     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 837 
 838 #ifdef PCMK__TIME_USE_CGT
 839     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
 840         rclass = resources_find_service_class(rsc->type);
 841     } else if(rsc) {
 842         rclass = rsc->class;
 843     }
 844 
 845     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 846         if (pcmk__result_ok(&(cmd->result))
 847             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
 848             /* systemd returns from start and stop actions after the action
 849              * begins, not after it completes. We have to jump through a few
 850              * hoops so that we don't report 'complete' to the rest of pacemaker
 851              * until it's actually done.
 852              */
 853             goagain = true;
 854             cmd->real_action = cmd->action;
 855             cmd->action = strdup("monitor");
 856 
 857         } else if (cmd->real_action != NULL) {
 858             // This is follow-up monitor to check whether start/stop completed
 859             if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 860                 goagain = true;
 861 
 862             } else if (pcmk__result_ok(&(cmd->result))
 863                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 864                 goagain = true;
 865 
 866             } else {
 867                 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 868                 int timeout_left = cmd->timeout_orig - time_sum;
 869 
 870                 crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 871                           "remaining=%dms): %s (%d)",
 872                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 873                           services_ocf_exitcode_str(cmd->result.exit_status),
 874                           cmd->result.exit_status);
 875                 cmd_original_times(cmd);
 876 
 877                 // Monitors may return "not running", but start/stop shouldn't
 878                 if ((cmd->result.execution_status == PCMK_EXEC_DONE)
 879                     && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
 880 
 881                     if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
 882                         cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
 883                     } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 884                         cmd->result.exit_status = PCMK_OCF_OK;
 885                     }
 886                 }
 887             }
 888         }
 889     }
 890 #endif
 891 
 892 #if SUPPORT_NAGIOS
 893     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 894         if (action_matches(cmd, "monitor", 0)
 895             && pcmk__result_ok(&(cmd->result))) {
 896             /* Successfully executed --version for the nagios plugin */
 897             cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
 898 
 899         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
 900                    && !pcmk__result_ok(&(cmd->result))) {
 901 #ifdef PCMK__TIME_USE_CGT
 902             goagain = true;
 903 #endif
 904         }
 905     }
 906 #endif
 907 
 908 #ifdef PCMK__TIME_USE_CGT
 909     if (goagain) {
 910         int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 911         int timeout_left = cmd->timeout_orig - time_sum;
 912         int delay = cmd->timeout_orig / 10;
 913 
 914         if(delay >= timeout_left && timeout_left > 20) {
 915             delay = timeout_left/2;
 916         }
 917 
 918         delay = QB_MIN(2000, delay);
 919         if (delay < timeout_left) {
 920             cmd->start_delay = delay;
 921             cmd->timeout = timeout_left;
 922 
 923             if (pcmk__result_ok(&(cmd->result))) {
 924                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 925                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
 926 
 927             } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 928                 crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 929                          cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
 930 
 931             } else {
 932                 crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 933                            cmd->rsc_id, cmd->action,
 934                            services_ocf_exitcode_str(cmd->result.exit_status),
 935                            cmd->result.exit_status, time_sum, timeout_left,
 936                            delay);
 937             }
 938 
 939             cmd_reset(cmd);
 940             if(rsc) {
 941                 rsc->active = NULL;
 942             }
 943             schedule_lrmd_cmd(rsc, cmd);
 944 
 945             /* Don't finalize cmd, we're not done with it yet */
 946             return;
 947 
 948         } else {
 949             crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
 950                        cmd->rsc_id,
 951                        (cmd->real_action? cmd->real_action : cmd->action),
 952                        cmd->result.exit_status, time_sum, timeout_left);
 953             pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 954                              PCMK_EXEC_TIMEOUT,
 955                              "Investigate reason for timeout, and adjust "
 956                              "configured operation timeout if necessary");
 957             cmd_original_times(cmd);
 958         }
 959     }
 960 #endif
 961 
 962     pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
 963                             services__grab_stderr(action));
 964     cmd_finalize(cmd, rsc);
 965 }
 966 
 967 /*!
 968  * \internal
 969  * \brief Process the result of a fence device action (start, stop, or monitor)
 970  *
 971  * \param[in,out] cmd               Fence device action that completed
 972  * \param[in]     exit_status       Fencer API exit status for action
 973  * \param[in]     execution_status  Fencer API execution status for action
 974  * \param[in]     exit_reason       Human-friendly detail, if action failed
 975  */
 976 static void
 977 stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
 978                         enum pcmk_exec_status execution_status,
 979                         const char *exit_reason)
 980 {
 981     // This can be NULL if resource was removed before command completed
 982     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 983 
 984     // Simplify fencer exit status to uniform exit status
 985     if (exit_status != CRM_EX_OK) {
 986         exit_status = PCMK_OCF_UNKNOWN_ERROR;
 987     }
 988 
 989     if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
 990         /* An in-flight fence action was cancelled. The execution status is
 991          * already correct, so don't overwrite it.
 992          */
 993         execution_status = PCMK_EXEC_CANCELLED;
 994 
 995     } else {
 996         /* Some execution status codes have specific meanings for the fencer
 997          * that executor clients may not expect, so map them to a simple error
 998          * status.
 999          */
1000         switch (execution_status) {
1001             case PCMK_EXEC_NOT_CONNECTED:
1002             case PCMK_EXEC_INVALID:
1003                 execution_status = PCMK_EXEC_ERROR;
1004                 break;
1005 
1006             case PCMK_EXEC_NO_FENCE_DEVICE:
1007                 /* This should be possible only for probes in practice, but
1008                  * interpret for all actions to be safe.
1009                  */
1010                 if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS,
1011                                  pcmk__str_none)) {
1012                     exit_status = PCMK_OCF_NOT_RUNNING;
1013 
1014                 } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP,
1015                                         pcmk__str_none)) {
1016                     exit_status = PCMK_OCF_OK;
1017 
1018                 } else {
1019                     exit_status = PCMK_OCF_NOT_INSTALLED;
1020                 }
1021                 execution_status = PCMK_EXEC_ERROR;
1022                 break;
1023 
1024             case PCMK_EXEC_NOT_SUPPORTED:
1025                 exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
1026                 break;
1027 
1028             default:
1029                 break;
1030         }
1031     }
1032 
1033     pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
1034 
1035     // Certain successful actions change the known state of the resource
1036     if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
1037 
1038         if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1039             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
1040                              PCMK_EXEC_DONE, NULL); // "running"
1041 
1042         } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1043             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1044                              PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
1045         }
1046     }
1047 
1048     /* The recurring timer should not be running at this point in any case, but
1049      * as a failsafe, stop it if it is.
1050      */
1051     stop_recurring_timer(cmd);
1052 
1053     /* Reschedule this command if appropriate. If a recurring command is *not*
1054      * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1055      * not be removed from recurring_ops by cmd_finalize().
1056      */
1057     if (rsc && (cmd->interval_ms > 0)
1058         && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1059         start_recurring_timer(cmd);
1060     }
1061 
1062     cmd_finalize(cmd, rsc);
1063 }
1064 
1065 static void
1066 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1067 {
1068     if ((data == NULL) || (data->userdata == NULL)) {
1069         crm_err("Ignoring fence action result: "
1070                 "Invalid callback arguments (bug?)");
1071     } else {
1072         stonith_action_complete((lrmd_cmd_t *) data->userdata,
1073                                 stonith__exit_status(data),
1074                                 stonith__execution_status(data),
1075                                 stonith__exit_reason(data));
1076     }
1077 }
1078 
1079 void
1080 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1081 {
1082     GHashTableIter iter;
1083     lrmd_rsc_t *rsc = NULL;
1084 
1085     crm_warn("Connection to fencer lost (any pending operations for "
1086              "fence devices will be considered failed)");
1087 
1088     g_hash_table_iter_init(&iter, rsc_list);
1089     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
1090         if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1091                           pcmk__str_none)) {
1092             continue;
1093         }
1094 
1095         /* If we registered this fence device, we don't know whether the
1096          * fencer still has the registration or not. Cause future probes to
1097          * return an error until the resource is stopped or started
1098          * successfully. This is especially important if the controller also
1099          * went away (possibly due to a cluster layer restart) and won't
1100          * receive our client notification of any monitors finalized below.
1101          */
1102         if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
1103             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1104                              PCMK_EXEC_NOT_CONNECTED,
1105                              "Lost connection to fencer");
1106         }
1107 
1108         // Consider any active, pending, or recurring operations as failed
1109 
1110         for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
1111             lrmd_cmd_t *cmd = op->data;
1112 
1113             /* This won't free a recurring op but instead restart its timer.
1114              * If cmd is rsc->active, this will set rsc->active to NULL, so we
1115              * don't have to worry about finalizing it a second time below.
1116              */
1117             stonith_action_complete(cmd,
1118                                     CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1119                                     "Lost connection to fencer");
1120         }
1121 
1122         if (rsc->active != NULL) {
1123             rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
1124         }
1125         while (rsc->pending_ops != NULL) {
1126             // This will free the op and remove it from rsc->pending_ops
1127             stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
1128                                     CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1129                                     "Lost connection to fencer");
1130         }
1131     }
1132 }
1133 
1134 /*!
1135  * \internal
1136  * \brief Execute a stonith resource "start" action
1137  *
1138  * Start a stonith resource by registering it with the fencer.
1139  * (Stonith agents don't have a start command.)
1140  *
1141  * \param[in,out] stonith_api  Connection to fencer
1142  * \param[in]     rsc          Stonith resource to start
1143  * \param[in]     cmd          Start command to execute
1144  *
1145  * \return pcmk_ok on success, -errno otherwise
1146  */
1147 static int
1148 execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1149                     const lrmd_cmd_t *cmd)
1150 {
1151     char *key = NULL;
1152     char *value = NULL;
1153     stonith_key_value_t *device_params = NULL;
1154     int rc = pcmk_ok;
1155 
1156     // Convert command parameters to stonith API key/values
1157     if (cmd->params) {
1158         GHashTableIter iter;
1159 
1160         g_hash_table_iter_init(&iter, cmd->params);
1161         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1162             device_params = stonith_key_value_add(device_params, key, value);
1163         }
1164     }
1165 
1166     /* The fencer will automatically register devices via CIB notifications
1167      * when the CIB changes, but to avoid a possible race condition between
1168      * the fencer receiving the notification and the executor requesting that
1169      * resource, the executor registers the device as well. The fencer knows how
1170      * to handle duplicate registrations.
1171      */
1172     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1173                                             cmd->rsc_id, rsc->provider,
1174                                             rsc->type, device_params);
1175 
1176     stonith_key_value_freeall(device_params, 1, 1);
1177     return rc;
1178 }
1179 
1180 /*!
1181  * \internal
1182  * \brief Execute a stonith resource "stop" action
1183  *
1184  * Stop a stonith resource by unregistering it with the fencer.
1185  * (Stonith agents don't have a stop command.)
1186  *
1187  * \param[in,out] stonith_api  Connection to fencer
1188  * \param[in]     rsc          Stonith resource to stop
1189  *
1190  * \return pcmk_ok on success, -errno otherwise
1191  */
1192 static inline int
1193 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1194 {
1195     /* @TODO Failure would indicate a problem communicating with fencer;
1196      * perhaps we should try reconnecting and retrying a few times?
1197      */
1198     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1199                                             rsc->rsc_id);
1200 }
1201 
1202 /*!
1203  * \internal
1204  * \brief Initiate a stonith resource agent recurring "monitor" action
1205  *
1206  * \param[in,out] stonith_api  Connection to fencer
1207  * \param[in,out] rsc          Stonith resource to monitor
1208  * \param[in]     cmd          Monitor command being executed
1209  *
1210  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1211  */
1212 static inline int
1213 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1214 {
1215     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1216                                         cmd->timeout / 1000);
1217 
1218     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1219                                               "lrmd_stonith_callback",
1220                                               lrmd_stonith_callback);
1221     if (rc == TRUE) {
1222         rsc->active = cmd;
1223         rc = pcmk_ok;
1224     } else {
1225         rc = -pcmk_err_generic;
1226     }
1227     return rc;
1228 }
1229 
1230 static void
1231 execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1232 {
1233     int rc = 0;
1234     bool do_monitor = FALSE;
1235 
1236     stonith_t *stonith_api = get_stonith_connection();
1237 
1238     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)
1239         && (cmd->interval_ms == 0)) {
1240         // Probes don't require a fencer connection
1241         stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
1242                                 rsc->fence_probe_result.execution_status,
1243                                 rsc->fence_probe_result.exit_reason);
1244         return;
1245 
1246     } else if (stonith_api == NULL) {
1247         stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
1248                                 PCMK_EXEC_NOT_CONNECTED,
1249                                 "No connection to fencer");
1250         return;
1251 
1252     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1253         rc = execd_stonith_start(stonith_api, rsc, cmd);
1254         if (rc == pcmk_ok) {
1255             do_monitor = TRUE;
1256         }
1257 
1258     } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1259         rc = execd_stonith_stop(stonith_api, rsc);
1260 
1261     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1262         do_monitor = TRUE;
1263 
1264     } else {
1265         stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
1266                                 PCMK_EXEC_ERROR,
1267                                 "Invalid fence device action (bug?)");
1268         return;
1269     }
1270 
1271     if (do_monitor) {
1272         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1273         if (rc == pcmk_ok) {
1274             // Don't clean up yet, we will find out result of the monitor later
1275             return;
1276         }
1277     }
1278 
1279     stonith_action_complete(cmd,
1280                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
1281                             stonith__legacy2status(rc),
1282                             ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
1283 }
1284 
1285 static void
1286 execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1287 {
1288     svc_action_t *action = NULL;
1289     GHashTable *params_copy = NULL;
1290 
1291     CRM_ASSERT(rsc);
1292     CRM_ASSERT(cmd);
1293 
1294     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1295               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1296 
1297 #if SUPPORT_NAGIOS
1298     /* Recurring operations are cancelled anyway for a stop operation */
1299     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
1300         && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1301 
1302         cmd->result.exit_status = PCMK_OCF_OK;
1303         cmd_finalize(cmd, rsc);
1304         return;
1305     }
1306 #endif
1307 
1308     params_copy = pcmk__str_table_dup(cmd->params);
1309 
1310     action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1311                                      rsc->type,
1312                                      normalize_action_name(rsc, cmd->action),
1313                                      cmd->interval_ms, cmd->timeout,
1314                                      params_copy, cmd->service_flags);
1315 
1316     if (action == NULL) {
1317         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1318                          PCMK_EXEC_ERROR, strerror(ENOMEM));
1319         cmd_finalize(cmd, rsc);
1320         return;
1321     }
1322 
1323     if (action->rc != PCMK_OCF_UNKNOWN) {
1324         pcmk__set_result(&(cmd->result), action->rc, action->status,
1325                          services__exit_reason(action));
1326         services_action_free(action);
1327         cmd_finalize(cmd, rsc);
1328         return;
1329     }
1330 
1331     action->cb_data = cmd;
1332 
1333     if (services_action_async(action, action_complete)) {
1334         /* The services library has taken responsibility for the action. It
1335          * could be pending, blocked, or merged into a duplicate recurring
1336          * action, in which case the action callback (action_complete())
1337          * will be called when the action completes, otherwise the callback has
1338          * already been called.
1339          *
1340          * action_complete() calls cmd_finalize() which can free cmd, so cmd
1341          * cannot be used here.
1342          */
1343     } else {
1344         /* This is a recurring action that is not being cancelled and could not
1345          * be initiated. It has been rescheduled, and the action callback
1346          * (action_complete()) has been called, which in this case has already
1347          * called cmd_finalize(), which in this case should only reset (not
1348          * free) cmd.
1349          */
1350 
1351         pcmk__set_result(&(cmd->result), action->rc, action->status,
1352                          services__exit_reason(action));
1353         services_action_free(action);
1354     }
1355 }
1356 
1357 static gboolean
1358 execute_resource_action(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1359 {
1360     lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
1361     lrmd_cmd_t *cmd = NULL;
1362 
1363     CRM_CHECK(rsc != NULL, return FALSE);
1364 
1365     if (rsc->active) {
1366         crm_trace("%s is still active", rsc->rsc_id);
1367         return TRUE;
1368     }
1369 
1370     if (rsc->pending_ops) {
1371         GList *first = rsc->pending_ops;
1372 
1373         cmd = first->data;
1374         if (cmd->delay_id) {
1375             crm_trace
1376                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1377                  cmd->rsc_id, cmd->action, cmd->start_delay);
1378             return TRUE;
1379         }
1380         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1381         g_list_free_1(first);
1382 
1383 #ifdef PCMK__TIME_USE_CGT
1384         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1385 #endif
1386         cmd->epoch_last_run = time(NULL);
1387     }
1388 
1389     if (!cmd) {
1390         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1391         return TRUE;
1392     }
1393 
1394     rsc->active = cmd;          /* only one op at a time for a rsc */
1395     if (cmd->interval_ms) {
1396         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1397     }
1398 
1399     log_execute(cmd);
1400 
1401     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1402         execute_stonith_action(rsc, cmd);
1403     } else {
1404         execute_nonstonith_action(rsc, cmd);
1405     }
1406 
1407     return TRUE;
1408 }
1409 
1410 void
1411 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1412 {
1413     GList *gIter = NULL;
1414     lrmd_rsc_t *rsc = data;
1415     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1416                                   pcmk__str_casei);
1417 
1418     gIter = rsc->pending_ops;
1419     while (gIter != NULL) {
1420         GList *next = gIter->next;
1421         lrmd_cmd_t *cmd = gIter->data;
1422 
1423         /* command was never executed */
1424         cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1425         cmd_finalize(cmd, NULL);
1426 
1427         gIter = next;
1428     }
1429     /* frees list, but not list elements. */
1430     g_list_free(rsc->pending_ops);
1431 
1432     gIter = rsc->recurring_ops;
1433     while (gIter != NULL) {
1434         GList *next = gIter->next;
1435         lrmd_cmd_t *cmd = gIter->data;
1436 
1437         if (is_stonith) {
1438             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1439             /* If a stonith command is in-flight, just mark it as cancelled;
1440              * it is not safe to finalize/free the cmd until the stonith api
1441              * says it has either completed or timed out.
1442              */
1443             if (rsc->active != cmd) {
1444                 cmd_finalize(cmd, NULL);
1445             }
1446         } else {
1447             /* This command is already handed off to service library,
1448              * let service library cancel it and tell us via the callback
1449              * when it is cancelled. The rsc can be safely destroyed
1450              * even if we are waiting for the cancel result */
1451             services_action_cancel(rsc->rsc_id,
1452                                    normalize_action_name(rsc, cmd->action),
1453                                    cmd->interval_ms);
1454         }
1455 
1456         gIter = next;
1457     }
1458     /* frees list, but not list elements. */
1459     g_list_free(rsc->recurring_ops);
1460 
1461     free(rsc->rsc_id);
1462     free(rsc->class);
1463     free(rsc->provider);
1464     free(rsc->type);
1465     mainloop_destroy_trigger(rsc->work);
1466 
1467     free(rsc);
1468 }
1469 
1470 static int
1471 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1472                     xmlNode **reply)
1473 {
1474     int rc = pcmk_ok;
1475     const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
1476 
1477     if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
1478         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1479                 LRMD_MIN_PROTOCOL_VERSION, protocol_version);
1480         rc = -EPROTO;
1481     }
1482 
1483     if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) {
1484 #ifdef PCMK__COMPILE_REMOTE
1485         if ((client->remote != NULL)
1486             && pcmk_is_set(client->flags,
1487                            pcmk__client_tls_handshake_complete)) {
1488 
1489             // This is a remote connection from a cluster node's controller
1490             ipc_proxy_add_provider(client);
1491         } else {
1492             rc = -EACCES;
1493         }
1494 #else
1495         rc = -EPROTONOSUPPORT;
1496 #endif
1497     }
1498 
1499     *reply = create_lrmd_reply(__func__, rc, call_id);
1500     crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
1501     crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
1502     crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1503 
1504     return rc;
1505 }
1506 
1507 static int
1508 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1509 {
1510     int rc = pcmk_ok;
1511     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1512     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1513 
1514     if (dup &&
1515         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1516         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1517 
1518         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1519         free_rsc(rsc);
1520         return rc;
1521     }
1522 
1523     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1524     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1525     return rc;
1526 }
1527 
1528 static xmlNode *
1529 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1530 {
1531     int rc = pcmk_ok;
1532     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1533     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1534     xmlNode *reply = NULL;
1535     lrmd_rsc_t *rsc = NULL;
1536 
1537     if (rsc_id == NULL) {
1538         rc = -ENODEV;
1539     } else {
1540         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1541         if (rsc == NULL) {
1542             crm_info("Agent information for '%s' not in cache", rsc_id);
1543             rc = -ENODEV;
1544         }
1545     }
1546 
1547     reply = create_lrmd_reply(__func__, rc, call_id);
1548     if (rsc) {
1549         crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
1550         crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
1551         crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
1552         crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
1553     }
1554     return reply;
1555 }
1556 
1557 static int
1558 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1559                             xmlNode *request)
1560 {
1561     int rc = pcmk_ok;
1562     lrmd_rsc_t *rsc = NULL;
1563     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1564     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1565 
1566     if (!rsc_id) {
1567         return -ENODEV;
1568     }
1569 
1570     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1571     if (rsc == NULL) {
1572         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1573                  rsc_id);
1574         return pcmk_ok;
1575     }
1576 
1577     if (rsc->active) {
1578         /* let the caller know there are still active ops on this rsc to watch for */
1579         crm_trace("Operation (%p) still in progress for unregistered resource %s",
1580                   rsc->active, rsc_id);
1581         rc = -EINPROGRESS;
1582     }
1583 
1584     g_hash_table_remove(rsc_list, rsc_id);
1585 
1586     return rc;
1587 }
1588 
1589 static int
1590 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1591 {
1592     lrmd_rsc_t *rsc = NULL;
1593     lrmd_cmd_t *cmd = NULL;
1594     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1595     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1596     int call_id;
1597 
1598     if (!rsc_id) {
1599         return -EINVAL;
1600     }
1601     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1602         crm_info("Resource '%s' not found (%d active resources)",
1603                  rsc_id, g_hash_table_size(rsc_list));
1604         return -ENODEV;
1605     }
1606 
1607     cmd = create_lrmd_cmd(request, client);
1608     call_id = cmd->call_id;
1609 
1610     /* Don't reference cmd after handing it off to be scheduled.
1611      * The cmd could get merged and freed. */
1612     schedule_lrmd_cmd(rsc, cmd);
1613 
1614     return call_id;
1615 }
1616 
1617 static int
1618 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1619 {
1620     GList *gIter = NULL;
1621     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1622 
1623     /* How to cancel an action.
1624      * 1. Check pending ops list, if it hasn't been handed off
1625      *    to the service library or stonith recurring list remove
1626      *    it there and that will stop it.
1627      * 2. If it isn't in the pending ops list, then it's either a
1628      *    recurring op in the stonith recurring list, or the service
1629      *    library's recurring list.  Stop it there
1630      * 3. If not found in any lists, then this operation has either
1631      *    been executed already and is not a recurring operation, or
1632      *    never existed.
1633      */
1634     if (!rsc) {
1635         return -ENODEV;
1636     }
1637 
1638     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1639         lrmd_cmd_t *cmd = gIter->data;
1640 
1641         if (action_matches(cmd, action, interval_ms)) {
1642             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1643             cmd_finalize(cmd, rsc);
1644             return pcmk_ok;
1645         }
1646     }
1647 
1648     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1649         /* The service library does not handle stonith operations.
1650          * We have to handle recurring stonith operations ourselves. */
1651         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1652             lrmd_cmd_t *cmd = gIter->data;
1653 
1654             if (action_matches(cmd, action, interval_ms)) {
1655                 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1656                 if (rsc->active != cmd) {
1657                     cmd_finalize(cmd, rsc);
1658                 }
1659                 return pcmk_ok;
1660             }
1661         }
1662     } else if (services_action_cancel(rsc_id,
1663                                       normalize_action_name(rsc, action),
1664                                       interval_ms) == TRUE) {
1665         /* The service library will tell the action_complete callback function
1666          * this action was cancelled, which will destroy the cmd and remove
1667          * it from the recurring_op list. Do not do that in this function
1668          * if the service library says it cancelled it. */
1669         return pcmk_ok;
1670     }
1671 
1672     return -EOPNOTSUPP;
1673 }
1674 
1675 static void
1676 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1677 {
1678     GList *cmd_list = NULL;
1679     GList *cmd_iter = NULL;
1680 
1681     /* Notice a copy of each list is created when concat is called.
1682      * This prevents odd behavior from occurring when the cmd_list
1683      * is iterated through later on.  It is possible the cancel_op
1684      * function may end up modifying the recurring_ops and pending_ops
1685      * lists.  If we did not copy those lists, our cmd_list iteration
1686      * could get messed up.*/
1687     if (rsc->recurring_ops) {
1688         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1689     }
1690     if (rsc->pending_ops) {
1691         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1692     }
1693     if (!cmd_list) {
1694         return;
1695     }
1696 
1697     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1698         lrmd_cmd_t *cmd = cmd_iter->data;
1699 
1700         if (cmd->interval_ms == 0) {
1701             continue;
1702         }
1703 
1704         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1705             continue;
1706         }
1707 
1708         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1709     }
1710     /* frees only the copied list data, not the cmds */
1711     g_list_free(cmd_list);
1712 }
1713 
1714 static int
1715 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1716 {
1717     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1718     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1719     const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
1720     guint interval_ms = 0;
1721 
1722     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
1723 
1724     if (!rsc_id || !action) {
1725         return -EINVAL;
1726     }
1727 
1728     return cancel_op(rsc_id, action, interval_ms);
1729 }
1730 
1731 static void
1732 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1733 {
1734     xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
1735 
1736     crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
1737     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1738         lrmd_cmd_t *cmd = item->data;
1739         xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
1740 
1741         crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
1742                     (cmd->real_action? cmd->real_action : cmd->action));
1743         crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
1744         crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
1745     }
1746 }
1747 
1748 static xmlNode *
1749 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1750 {
1751     int rc = pcmk_ok;
1752     const char *rsc_id = NULL;
1753     lrmd_rsc_t *rsc = NULL;
1754     xmlNode *reply = NULL;
1755     xmlNode *rsc_xml = NULL;
1756 
1757     // Resource ID is optional
1758     rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
1759     if (rsc_xml) {
1760         rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
1761     }
1762     if (rsc_xml) {
1763         rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1764     }
1765 
1766     // If resource ID is specified, resource must exist
1767     if (rsc_id != NULL) {
1768         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1769         if (rsc == NULL) {
1770             crm_info("Resource '%s' not found (%d active resources)",
1771                      rsc_id, g_hash_table_size(rsc_list));
1772             rc = -ENODEV;
1773         }
1774     }
1775 
1776     reply = create_lrmd_reply(__func__, rc, call_id);
1777 
1778     // If resource ID is not specified, check all resources
1779     if (rsc_id == NULL) {
1780         GHashTableIter iter;
1781         char *key = NULL;
1782 
1783         g_hash_table_iter_init(&iter, rsc_list);
1784         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1785                                       (gpointer *) &rsc)) {
1786             add_recurring_op_xml(reply, rsc);
1787         }
1788     } else if (rsc) {
1789         add_recurring_op_xml(reply, rsc);
1790     }
1791     return reply;
1792 }
1793 
1794 void
1795 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1796 {
1797     int rc = pcmk_ok;
1798     int call_id = 0;
1799     const char *op = crm_element_value(request, F_LRMD_OPERATION);
1800     int do_reply = 0;
1801     int do_notify = 0;
1802     xmlNode *reply = NULL;
1803 
1804     /* Certain IPC commands may be done only by privileged users (i.e. root or
1805      * hacluster), because they would otherwise provide a means of bypassing
1806      * ACLs.
1807      */
1808     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1809 
1810     crm_trace("Processing %s operation from %s", op, client->id);
1811     crm_element_value_int(request, F_LRMD_CALLID, &call_id);
1812 
1813     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1814 #ifdef PCMK__COMPILE_REMOTE
1815         if (allowed) {
1816             ipc_proxy_forward_client(client, request);
1817         } else {
1818             rc = -EACCES;
1819         }
1820 #else
1821         rc = -EPROTONOSUPPORT;
1822 #endif
1823         do_reply = 1;
1824     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1825         rc = process_lrmd_signon(client, request, call_id, &reply);
1826         do_reply = 1;
1827     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1828         if (allowed) {
1829             rc = process_lrmd_rsc_register(client, id, request);
1830             do_notify = 1;
1831         } else {
1832             rc = -EACCES;
1833         }
1834         do_reply = 1;
1835     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1836         if (allowed) {
1837             reply = process_lrmd_get_rsc_info(request, call_id);
1838         } else {
1839             rc = -EACCES;
1840         }
1841         do_reply = 1;
1842     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1843         if (allowed) {
1844             rc = process_lrmd_rsc_unregister(client, id, request);
1845             /* don't notify anyone about failed un-registers */
1846             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1847                 do_notify = 1;
1848             }
1849         } else {
1850             rc = -EACCES;
1851         }
1852         do_reply = 1;
1853     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1854         if (allowed) {
1855             rc = process_lrmd_rsc_exec(client, id, request);
1856         } else {
1857             rc = -EACCES;
1858         }
1859         do_reply = 1;
1860     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1861         if (allowed) {
1862             rc = process_lrmd_rsc_cancel(client, id, request);
1863         } else {
1864             rc = -EACCES;
1865         }
1866         do_reply = 1;
1867     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1868         do_notify = 1;
1869         do_reply = 1;
1870     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1871         if (allowed) {
1872             xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
1873 
1874             CRM_LOG_ASSERT(data != NULL);
1875             pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
1876         } else {
1877             rc = -EACCES;
1878         }
1879     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1880         if (allowed) {
1881             rc = process_lrmd_alert_exec(client, id, request);
1882         } else {
1883             rc = -EACCES;
1884         }
1885         do_reply = 1;
1886     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1887         if (allowed) {
1888             reply = process_lrmd_get_recurring(request, call_id);
1889         } else {
1890             rc = -EACCES;
1891         }
1892         do_reply = 1;
1893     } else {
1894         rc = -EOPNOTSUPP;
1895         do_reply = 1;
1896         crm_err("Unknown IPC request '%s' from client %s",
1897                 op, pcmk__client_name(client));
1898     }
1899 
1900     if (rc == -EACCES) {
1901         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1902                  op, pcmk__client_name(client));
1903     }
1904 
1905     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1906               op, client->id, rc, do_reply, do_notify);
1907 
1908     if (do_reply) {
1909         int send_rc = pcmk_rc_ok;
1910 
1911         if (reply == NULL) {
1912             reply = create_lrmd_reply(__func__, rc, call_id);
1913         }
1914         send_rc = lrmd_server_send_reply(client, id, reply);
1915         free_xml(reply);
1916         if (send_rc != pcmk_rc_ok) {
1917             crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
1918                      pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
1919         }
1920     }
1921 
1922     if (do_notify) {
1923         send_generic_notify(rc, request);
1924     }
1925 }

/* [previous][next][first][last][top][bottom][index][help] */