pacemaker  1.1.14-70404b0
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cluster.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <crm_internal.h>
20 #include <dlfcn.h>
21 
22 #include <stdio.h>
23 #include <unistd.h>
24 #include <string.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <sys/param.h>
28 #include <sys/types.h>
29 #include <sys/utsname.h>
30 
31 #include <crm/crm.h>
32 #include <crm/msg_xml.h>
33 
34 #include <crm/common/ipc.h>
35 #include <crm/cluster/internal.h>
36 
37 CRM_TRACE_INIT_DATA(cluster);
38 
39 #if SUPPORT_HEARTBEAT
40 void *hb_library = NULL;
41 #endif
42 
43 static char *
44 get_heartbeat_uuid(const char *uname)
45 {
46  char *uuid_calc = NULL;
47 
48 #if SUPPORT_HEARTBEAT
49  cl_uuid_t uuid_raw;
50  const char *unknown = "00000000-0000-0000-0000-000000000000";
51 
52  if (heartbeat_cluster == NULL) {
53  crm_warn("No connection to heartbeat, using uuid=uname");
54  return NULL;
55  } else if(uname == NULL) {
56  return NULL;
57  }
58 
59  if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) ==
60  HA_FAIL) {
61  crm_err("get_uuid_by_name() call failed for host %s", uname);
62  free(uuid_calc);
63  return NULL;
64  }
65 
66  uuid_calc = calloc(1, 50);
67  cl_uuid_unparse(&uuid_raw, uuid_calc);
68 
69  if (safe_str_eq(uuid_calc, unknown)) {
70  crm_warn("Could not calculate UUID for %s", uname);
71  free(uuid_calc);
72  return NULL;
73  }
74 #endif
75  return uuid_calc;
76 }
77 
78 static gboolean
79 uname_is_uuid(void)
80 {
81  static const char *uuid_pref = NULL;
82 
83  if (uuid_pref == NULL) {
84  uuid_pref = getenv("PCMK_uname_is_uuid");
85  }
86 
87  if (uuid_pref == NULL) {
88  /* true is legacy mode */
89  uuid_pref = "false";
90  }
91 
92  return crm_is_true(uuid_pref);
93 }
94 
95 int
96 get_corosync_id(int id, const char *uuid)
97 {
98  if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) {
99  id = crm_atoi(uuid, "0");
100  }
101 
102  return id;
103 }
104 
105 char *
107 {
108  if(node == NULL) {
109  return NULL;
110 
111  } else if (!uname_is_uuid() && is_corosync_cluster()) {
112  if (node->id > 0) {
113  int len = 32;
114  char *buffer = NULL;
115 
116  buffer = calloc(1, (len + 1));
117  if (buffer != NULL) {
118  snprintf(buffer, len, "%u", node->id);
119  }
120 
121  return buffer;
122 
123  } else {
124  crm_info("Node %s is not yet known by corosync", node->uname);
125  }
126 
127  } else if (node->uname != NULL) {
128  return strdup(node->uname);
129  }
130 
131  return NULL;
132 }
133 
134 const char *
136 {
137  char *uuid = NULL;
139 
140  /* avoid blocking heartbeat calls where possible */
141  if(peer == NULL) {
142  return NULL;
143 
144  } else if (peer->uuid) {
145  return peer->uuid;
146  }
147 
148  switch (type) {
150  uuid = get_corosync_uuid(peer);
151  break;
152 
153  case pcmk_cluster_cman:
155  if (peer->uname) {
156  uuid = strdup(peer->uname);
157  }
158  break;
159 
161  uuid = get_heartbeat_uuid(peer->uname);
162  break;
163 
166  crm_err("Unsupported cluster type");
167  break;
168  }
169 
170  peer->uuid = uuid;
171  return peer->uuid;
172 }
173 
174 gboolean
176 {
178 
179  crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type));
180 #if SUPPORT_COROSYNC
181  if (is_openais_cluster()) {
182  crm_peer_init();
183  return init_cs_connection(cluster);
184  }
185 #endif
186 
187 #if SUPPORT_HEARTBEAT
188  if (is_heartbeat_cluster()) {
189  int rv;
190 
191  /* coverity[var_deref_op] False positive */
192  if (cluster->hb_conn == NULL) {
193  /* No object passed in, create a new one. */
194  ll_cluster_t *(*new_cluster) (const char *llctype) =
195  find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
196 
197  cluster->hb_conn = (*new_cluster) ("heartbeat");
198  /* dlclose(handle); */
199 
200  } else {
201  /* Object passed in. Disconnect first, then reconnect below. */
202  cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
203  }
204 
205  /* make sure we are disconnected first with the old object, if any. */
206  if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) {
207  heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE);
208  }
209 
210  CRM_ASSERT(cluster->hb_conn != NULL);
211  heartbeat_cluster = cluster->hb_conn;
212 
213  rv = register_heartbeat_conn(cluster);
214  if (rv) {
215  /* we'll benefit from a bigger queue length on heartbeat side.
216  * Otherwise, if peers send messages faster than we can consume
217  * them right now, heartbeat messaging layer will kick us out once
218  * it's (small) default queue fills up :(
219  * If we fail to adjust the sendq length, that's not yet fatal, though.
220  */
221  if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) {
222  crm_warn("Cannot set sendq length: %s",
223  heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster));
224  }
225  }
226  return rv;
227  }
228 #endif
229  crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
230  return FALSE;
231 }
232 
233 void
235 {
237  const char *type_str = name_for_cluster_type(type);
238 
239  crm_info("Disconnecting from cluster infrastructure: %s", type_str);
240 #if SUPPORT_COROSYNC
241  if (is_openais_cluster()) {
243  terminate_cs_connection(cluster);
244  crm_info("Disconnected from %s", type_str);
245  return;
246  }
247 #endif
248 
249 #if SUPPORT_HEARTBEAT
250  if (is_heartbeat_cluster()) {
251  if (cluster == NULL) {
252  crm_info("No cluster connection");
253  return;
254 
255  } else if (cluster->hb_conn) {
256  cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, TRUE);
257  cluster->hb_conn = NULL;
258  crm_info("Disconnected from %s", type_str);
259  return;
260 
261  } else {
262  crm_info("No %s connection", type_str);
263  return;
264  }
265  }
266 #endif
267  crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
268 }
269 
270 gboolean
271 send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode * data,
272  gboolean ordered)
273 {
274 
275 #if SUPPORT_COROSYNC
276  if (is_openais_cluster()) {
277  return send_cluster_message_cs(data, FALSE, node, service);
278  }
279 #endif
280 #if SUPPORT_HEARTBEAT
281  if (is_heartbeat_cluster()) {
282  return send_ha_message(heartbeat_cluster, data, node ? node->uname : NULL, ordered);
283  }
284 #endif
285  return FALSE;
286 }
287 
288 const char *
290 {
291  static char *name = NULL;
292 
293  if(name) {
294  return name;
295  }
296  name = get_node_name(0);
297  return name;
298 }
299 
300 char *
302 {
303  char *name = NULL;
304  const char *isolation_host = NULL;
305  enum cluster_type_e stack;
306 
307  if (nodeid == 0) {
308  isolation_host = getenv("OCF_RESKEY_"CRM_META"_isolation_host");
309  if (isolation_host) {
310  return strdup(isolation_host);
311  }
312  }
313 
314  stack = get_cluster_type();
315  switch (stack) {
317  break;
318 
319 #if SUPPORT_PLUGIN
321  name = classic_node_name(nodeid);
322  break;
323 #else
324 # if SUPPORT_COROSYNC
326  name = corosync_node_name(0, nodeid);
327  break;
328 # endif
329 #endif
330 
331 #if SUPPORT_CMAN
332  case pcmk_cluster_cman:
333  name = cman_node_name(nodeid);
334  break;
335 #endif
336 
337  default:
338  crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack);
339  }
340 
341  if(name == NULL && nodeid == 0) {
342  struct utsname res;
343  int rc = uname(&res);
344 
345  if (rc == 0) {
346  crm_notice("Defaulting to uname -n for the local %s node name",
347  name_for_cluster_type(stack));
348  name = strdup(res.nodename);
349  }
350 
351  if (name == NULL) {
352  crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack));
354  }
355  }
356 
357  if (name == NULL) {
358  crm_notice("Could not obtain a node name for %s nodeid %u",
359  name_for_cluster_type(stack), nodeid);
360  }
361  return name;
362 }
363 
364 const char *
365 crm_peer_uname(const char *uuid)
366 {
367  GHashTableIter iter;
368  crm_node_t *node = NULL;
369 
370  CRM_CHECK(uuid != NULL, return NULL);
371 
372  /* remote nodes have the same uname and uuid */
373  if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
374  return uuid;
375  }
376 
377  /* avoid blocking calls where possible */
378  g_hash_table_iter_init(&iter, crm_peer_cache);
379  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
380  if(node->uuid && strcasecmp(node->uuid, uuid) == 0) {
381  if(node->uname) {
382  return node->uname;
383  }
384  break;
385  }
386  }
387 
388 #if SUPPORT_COROSYNC
389  if (is_openais_cluster()) {
390  if (uname_is_uuid() == FALSE && is_corosync_cluster()) {
391  uint32_t id = crm_int_helper(uuid, NULL);
392  if(id != 0) {
393  node = crm_find_peer(id, NULL);
394  } else {
395  crm_err("Invalid node id: %s", uuid);
396  }
397 
398  } else {
399  node = crm_find_peer(0, uuid);
400  }
401 
402  if (node) {
403  crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid);
404  node->uuid = strdup(uuid);
405  if(node->uname) {
406  return node->uname;
407  }
408  }
409  return NULL;
410  }
411 #endif
412 
413 #if SUPPORT_HEARTBEAT
414  if (is_heartbeat_cluster()) {
415  if (heartbeat_cluster != NULL) {
416  cl_uuid_t uuid_raw;
417  char *uuid_copy = strdup(uuid);
418  char *uname = malloc(MAX_NAME);
419 
420  cl_uuid_parse(uuid_copy, &uuid_raw);
421 
422  if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname,
423  MAX_NAME) == HA_FAIL) {
424  crm_err("Could not calculate uname for %s", uuid);
425  } else {
426  node = crm_get_peer(0, uname);
427  }
428 
429  free(uuid_copy);
430  free(uname);
431  }
432 
433  if (node) {
434  crm_info("Setting uuid for node %s to '%s'", node->uname, uuid);
435  node->uuid = strdup(uuid);
436  if(node->uname) {
437  return node->uname;
438  }
439  }
440  return NULL;
441  }
442 #endif
443 
444  return NULL;
445 }
446 
447 void
448 set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
449 {
450  const char *uuid_calc = crm_peer_uuid(node);
451 
452  crm_xml_add(xml, attr, uuid_calc);
453  return;
454 }
455 
456 const char *
458 {
459  switch (type) {
461  return "classic openais (with plugin)";
462  case pcmk_cluster_cman:
463  return "cman";
465  return "corosync";
467  return "heartbeat";
469  return "unknown";
471  return "invalid";
472  }
473  crm_err("Invalid cluster type: %d", type);
474  return "invalid";
475 }
476 
477 /* Do not expose these two */
479 static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
480 
481 int
483 {
484  if (cluster_type == pcmk_cluster_unknown) {
485  crm_info("Cluster type set to: %s", name_for_cluster_type(type));
486  cluster_type = type;
487  return 0;
488 
489  } else if (cluster_type == type) {
490  return 0;
491 
492  } else if (pcmk_cluster_unknown == type) {
493  cluster_type = type;
494  return 0;
495  }
496 
497  crm_err("Cluster type already set to %s, ignoring %s",
498  name_for_cluster_type(cluster_type), name_for_cluster_type(type));
499  return -1;
500 }
501 enum cluster_type_e
503 {
504  bool detected = FALSE;
505  const char *cluster = NULL;
506 
507  /* Return the previous calculation, if any */
508  if (cluster_type != pcmk_cluster_unknown) {
509  return cluster_type;
510  }
511 
512  cluster = getenv("HA_cluster_type");
513 
514 #if SUPPORT_HEARTBEAT
515  /* If nothing is defined in the environment, try heartbeat (if supported) */
516  if(cluster == NULL) {
517  ll_cluster_t *hb;
518  ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(
519  &hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
520 
521  hb = (*new_cluster) ("heartbeat");
522 
523  crm_debug("Testing with Heartbeat");
524  /*
525  * Test as "casual" client (clientid == NULL; will be replaced by
526  * current pid). We are trying to detect if we can communicate with
527  * heartbeat, not if we can register as some specific service.
528  * Otherwise all but one of several concurrent invocations will get
529  * HA_FAIL because of:
530  * WARN: duplicate client add request
531  * ERROR: api_process_registration_msg: cannot add client()
532  * and then likely fail :(
533  */
534  if (hb->llc_ops->signon(hb, NULL) == HA_OK) {
535  hb->llc_ops->signoff(hb, FALSE);
536 
537  cluster_type = pcmk_cluster_heartbeat;
538  detected = TRUE;
539  goto done;
540  }
541  }
542 #endif
543 
544 #if SUPPORT_COROSYNC
545  /* If nothing is defined in the environment, try corosync (if supported) */
546  if(cluster == NULL) {
547  crm_debug("Testing with Corosync");
548  cluster_type = find_corosync_variant();
549  if (cluster_type != pcmk_cluster_unknown) {
550  detected = TRUE;
551  goto done;
552  }
553  }
554 #endif
555 
556  /* Something was defined in the environment, test it against what we support */
557  crm_info("Verifying cluster type: '%s'", cluster?cluster:"-unspecified-");
558  if (cluster == NULL) {
559 
560 #if SUPPORT_HEARTBEAT
561  } else if (safe_str_eq(cluster, "heartbeat")) {
562  cluster_type = pcmk_cluster_heartbeat;
563 #endif
564 
565 #if SUPPORT_COROSYNC
566  } else if (safe_str_eq(cluster, "openais")
567  || safe_str_eq(cluster, "classic openais (with plugin)")) {
568  cluster_type = pcmk_cluster_classic_ais;
569 
570  } else if (safe_str_eq(cluster, "corosync")) {
571  cluster_type = pcmk_cluster_corosync;
572 #endif
573 
574 #if SUPPORT_CMAN
575  } else if (safe_str_eq(cluster, "cman")) {
576  cluster_type = pcmk_cluster_cman;
577 #endif
578 
579  } else {
580  cluster_type = pcmk_cluster_invalid;
581  goto done; /* Keep the compiler happy when no stacks are supported */
582  }
583 
584  done:
585  if (cluster_type == pcmk_cluster_unknown) {
586  crm_notice("Could not determine the current cluster type");
587 
588  } else if (cluster_type == pcmk_cluster_invalid) {
589  crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.",
590  cluster);
592 
593  } else {
594  crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type));
595  }
596 
597  return cluster_type;
598 }
599 
600 gboolean
602 {
604 }
605 
606 gboolean
608 {
610 }
611 
612 gboolean
614 {
616 }
617 
618 gboolean
620 {
622 
623  if (type == pcmk_cluster_classic_ais) {
624  return TRUE;
625  } else if (type == pcmk_cluster_corosync) {
626  return TRUE;
627  } else if (type == pcmk_cluster_cman) {
628  return TRUE;
629  }
630  return FALSE;
631 }
632 
633 gboolean
635 {
637 }
638 
639 gboolean
640 node_name_is_valid(const char *key, const char *name)
641 {
642  int octet;
643 
644  if (name == NULL) {
645  crm_trace("%s is empty", key);
646  return FALSE;
647 
648  } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
649  crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
650  return FALSE;
651 
652  } else if (strstr(name, ":") != NULL) {
653  crm_trace("%s contains an ipv6 address, ignoring: %s", key, name);
654  return FALSE;
655  }
656  crm_trace("%s is valid", key);
657  return TRUE;
658 }
void crm_peer_destroy(void)
Definition: membership.c:274
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
gboolean is_classic_ais_cluster(void)
Definition: cluster.c:613
A dumping ground.
void * find_library_function(void **handle, const char *lib, const char *fn, int fatal)
#define crm_notice(fmt, args...)
Definition: logging.h:250
crm_ais_msg_types
Definition: cluster.h:125
gboolean send_cluster_message(crm_node_t *node, enum crm_ais_msg_types service, xmlNode *data, gboolean ordered)
Definition: cluster.c:271
char * corosync_node_name(uint64_tcmap_handle, uint32_t nodeid)
Definition: corosync.c:52
uint32_t id
Definition: cluster.h:70
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:502
char * uuid
Definition: cluster.h:80
void terminate_cs_connection(crm_cluster_t *cluster)
Definition: corosync.c:140
const char * crm_peer_uuid(crm_node_t *peer)
Definition: cluster.c:135
void crm_peer_init(void)
Definition: membership.c:262
GHashTable * crm_remote_peer_cache
Definition: membership.c:36
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:519
void crm_cluster_disconnect(crm_cluster_t *cluster)
Definition: cluster.c:234
char * get_corosync_uuid(crm_node_t *node)
Definition: cluster.c:106
const char * get_local_node_name(void)
Definition: cluster.c:289
gboolean init_cs_connection(crm_cluster_t *cluster)
Definition: corosync.c:300
gboolean is_corosync_cluster(void)
Definition: cluster.c:607
char uname[MAX_NAME]
Definition: internal.h:53
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define crm_atoi(text, default_text)
Definition: util.h:87
gboolean node_name_is_valid(const char *key, const char *name)
Definition: cluster.c:640
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
uint32_t id
Definition: internal.h:48
#define crm_debug(fmt, args...)
Definition: logging.h:253
cluster_type_e
Definition: cluster.h:206
const char * crm_peer_uname(const char *uuid)
Definition: cluster.c:365
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:457
#define crm_trace(fmt, args...)
Definition: logging.h:254
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
CRM_TRACE_INIT_DATA(cluster)
gboolean is_openais_cluster(void)
Definition: cluster.c:619
enum cluster_type_e find_corosync_variant(void)
Definition: corosync.c:428
gboolean crm_cluster_connect(crm_cluster_t *cluster)
Definition: cluster.c:175
gboolean is_cman_cluster(void)
Definition: cluster.c:601
int set_cluster_type(enum cluster_type_e type)
Definition: cluster.c:482
#define MAX_NAME
Definition: crm.h:44
#define DAEMON_RESPAWN_STOP
Definition: crm.h:67
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Definition: xml.c:2695
#define CRM_META
Definition: crm.h:55
#define crm_err(fmt, args...)
Definition: logging.h:248
gboolean send_cluster_message_cs(xmlNode *msg, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
Definition: cpg.c:506
#define uint32_t
Definition: stdint.in.h:158
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
int crm_exit(int rc)
Definition: utils.c:87
Wrappers for and extensions to libqb IPC.
void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
Definition: cluster.c:448
char * uname
Definition: cluster.h:79
gboolean crm_is_true(const char *s)
Definition: utils.c:683
#define safe_str_eq(a, b)
Definition: util.h:74
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:383
long long crm_int_helper(const char *text, char **end_text)
Definition: utils.c:597
GHashTable * crm_peer_cache
Definition: membership.c:35
#define crm_info(fmt, args...)
Definition: logging.h:251
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:634
enum crm_ais_msg_types type
Definition: internal.h:51