X-Git-Url: https://git.8kb.co.uk/?p=slony-i%2Fslony_failover;a=blobdiff_plain;f=slony_failover.pl;h=f1eab2c3954522299b6b77fbe17c69d34c7daaea;hp=672a1a4966c9c645a5179024b7a7e7f7049caad5;hb=146c773e63a572450722aaa917dc96ced7319f7d;hpb=7d3a4b0e0f5a3eb5f20051c8a9b1a80b71df4016 diff --git a/slony_failover.pl b/slony_failover.pl old mode 100644 new mode 100755 index 672a1a4..f1eab2c --- a/slony_failover.pl +++ b/slony_failover.pl @@ -1,9 +1,9 @@ #!/usr/bin/perl # Script: failover.pl -# Copyright: 08/04/2012: v1.0.1 Glyn Astill +# Copyright: 08/04/2012: v1.0.2 Glyn Astill # Requires: Perl 5.10.1+, Data::UUID, File::Slurp -# PostgreSQL 9.0+ Slony-I 2.0+ +# PostgreSQL 9.0+ Slony-I 1.2+ / 2.0+ # # This script is a command-line utility to manage switchover and failover # of replication sets in Slony-I clusters. @@ -36,7 +36,7 @@ use Config qw/%Config/; use constant false => 0; use constant true => 1; -my $g_script_version = '1.0.1'; +my $g_script_version = '1.0.3'; my $g_debug = false; my $g_pidfile = '/var/run/slony_failover.pid'; my $g_pid_written = false; @@ -55,6 +55,7 @@ my $g_use_try_blocks = false; my $g_lockset_method = 'multiple'; my $g_logfile = 'failover.log'; my $g_input; +my $g_silence_notice = false; my $g_reason; my $g_script; my $g_node_from; @@ -88,20 +89,24 @@ my $g_autofailover_retry = 2; my $g_autofailover_retry_sleep = 1000; my $g_autofailover_provs = false; my $g_autofailover_config_any = true; +my $g_autofailover_perspective_sleep = 20000; +my $g_autofailover_majority_only = false; +my $g_autofailover_is_quorum = false; my @g_unresponsive; +my %g_unresponsive_subonly; my %g_backups; my $g_pid = $$; my %message = ( 'en' => { - 'usage' => q{-h -p -db -c -u -P (Password option not recommended; use pgpass instead)}, + 'usage' => q{-h -p -db -cl -u -P -f (Password option not recommended; use pgpass instead)}, 'title' => q{Slony-I failover script version $1}, - 'cluster_fixed' => q{Aborting failover action: all origin nodes now responsive}, + 'cluster_fixed' => q{Aborting failover action: all origin/provider nodes now responsive}, 'cluster_failed' => q{Found $1 failed nodes, sleeping for $2ms before retry $3 of $4}, 'load_cluster' => q{Getting a list of database nodes...}, 'load_cluster_fail' => q{Unable to read cluster configuration $1}, - 'load_cluster_success' => q{Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6}, + 'load_cluster_success' => q{I Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6}, 'lag_detail' => q{Current node lag information from configuration node:}, 'script_settings' => q{Using $1 batches of lock set, $2 FAILOVER and $3}, 'generated_script' => q{Generated script "$1"}, @@ -110,7 +115,7 @@ my %message = ( 'autofailover_init_pol' => q{Polling every $1ms}, 'autofailover_init_ret' => q{Failed nodes will be retried $1 times with $2ms sleep}, 'autofailover_init_set' => q{Failed forwarding providers $1 be failed over}, - 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes}, + 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes read from node $5}, 'autofailover_proceed' => q{Proceeding with failover:}, 'autofailover_detail' => q{Failed node: $1, Backup node: $2}, 'autofailover_halt' => q{Unable to perform any failover for $1 failed nodes}, @@ -120,10 +125,22 @@ my %message = ( 'autofailover_promote_found' => q{Using previously found most up to date subscriber to all sets ($1) on unresponsive node $2}, 'autofailover_promote_skip' => q{No failover required for unresponsive node $1 as it is neither the origin or an active forwarder of any sets}, 'autofailover_promote_fail' => q{Could not find suitable backup node for promotion}, - 'autofailover_node_detail' => q{Node $1 is $2 and provides sets $3 at $4 lag}, - 'autofailover_promote_best' => q{Best node for promotion is node $1 seq = $2}, - 'autofailover_unresponsive' => q{Detected unresponsive provider node: $1}, + 'autofailover_node_detail' => q{Node $1 is $2 subscribed to ($3) node $4 and provides sets $5 at $6 seconds lag (on event $7)}, + 'autofailover_promote_best' => q{Best node for promotion is node $1 lag = $2 seconds (event $3)}, + 'autofailover_promote_unsuitable' => q{Node $1 is unsuitable for promotion}, + 'autofailover_unresponsive' => q{Detected unresponsive origin node: $1}, + 'autofailover_unresponsive_prov' => q{Detected unresponsive provider node: $1}, 'autofailover_unresponsive_subonly'=> q{Detected unresponsive subscriber only node: $1}, + 'autofailover_recovery_subonly' => q{Detected recovery of previously unresponsive subscriber only node: $1}, + 'autofailover_pspec_check_fail' => q{Failed to connect to node $1: $2}, + 'autofailover_pspec_check' => q{Getting objective judgement from other nodes, apparent unresponsive nodes are : $1 (Failed nodes = $2 of $3)}, + 'autofailover_pspec_check_sleep' => q{Sleeping for $1 ms}, + 'autofailover_pspec_check_data' => q{$1: Node $2 says lag from node $3 -> $4 is $5 seconds}, + 'autofailover_pspec_check_true' => q{All detected failed nodes confirmed as lagging by other nodes}, + 'autofailover_pspec_check_false' => q{Not all nodes confirmed as lagging}, + 'autofailover_pspec_check_unknown' => q{Unable to confirm lag status of all nodes}, + 'autofailover_split_check' => q{Surviving nodes ($1 of $2) are the majority}, + 'autofailover_split_check_fail' => q{Surviving nodes ($1) are not the majority}, 'interactive_head_id' => q{ID}, 'interactive_head_name' => q{Name}, 'interactive_head_status' => q{Status}, @@ -139,6 +156,7 @@ my %message = ( 'interactive_write_script' => q{Writing a script to $1 node $2 to $3}, 'interactive_check_nodes' => q{Checking availability of database nodes...}, 'interactive_continue' => q{Do you wish to continue [y/n]?}, + 'interactive_drop_nodes' => q{Do you want to also drop the failed nodes from the slony configuration [y/n]?}, 'interactive_preserve' => q{Preserve subscription paths to follow the origin node (choose no if unsure) [y/n]?}, 'interactive_aliases' => q{Generate aliases based on sl_node/set comments in parentheses (choose no if unsure) [y/n]?}, 'interactive_summary' => q{Summary of nodes to be passed to failover:}, @@ -155,6 +173,9 @@ my %message = ( 'wrn_node_unavailable' => q{WARNING: Node $1 unavailable}, 'wrn_req_unavailable' => q{WARNING: Old origin node ($1) is available, however $2 subscribers are unavailable}, 'wrn_not_tested' => q{WARNING: Script not tested with Slony-I v$1}, + 'wrn_failover_issues' => q{WARNING: Slony-I v$1 may struggle to failover correctly with multiple failed nodes (affects v2.0-2.1)}, + 'note_autofail_fwd_only' => q{NOTICE: Slony versions prior to 2.2 cannot initiate failover from only failed forwarding providers}, + 'note_fail_sub_only' => q{NOTICE: Slony versions prior to 2.2 cannot failover subscriber only nodes, reverting to failover_offline_subscriber_only = false}, 'note_multiple_try' => q{NOTICE: Cannot lock multiple sets within try blocks in version $1 dropping back to single sets}, 'note_reshape_cluster' => q{NOTICE: Either drop the failed subscribers or bring them back up, then retry to MOVE SET}, 'dbg_generic' => q{DEBUG: $1}, @@ -197,9 +218,9 @@ my %message = ( 'exit' => q{Exited by $1} }, 'fr' => { - 'usage' => q{-h -p -db -c -u -P (Option mot de passe pas recommandé; utiliser pgpass place)}, + 'usage' => q{-h -p -db -cl -u -P -f (Option mot de passe pas recommandé; utiliser pgpass place)}, 'title' => q{Slony-I failover (basculement) version de script $1}, - 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine maintenant sensible}, + 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine / de fournisseurs maintenant sensible}, 'cluster_failed' => q{Trouvé $1 échoué noeuds, couchage pour $2 ms avant réessayer $3 de $4}, 'load_cluster' => q{Obtenir une liste de noeuds de base de donnees...}, 'load_cluster_fail' => q{Impossible de lire la configuration du cluster $1}, @@ -212,7 +233,7 @@ my %message = ( 'autofailover_init_pol' => q{Vérifier toutes les $1ms}, 'autofailover_init_ret' => q{Noeuds défaillants seront rejugés $1 fois avec $2 ms sommeil}, 'autofailover_init_set' => q{Fournisseurs d'expédition échoué $1 être échoué sur}, - 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds}, + 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds lire à noeud $5}, 'autofailover_proceed' => q{De procéder à failover:}, 'autofailover_detail' => q{Noeud défaillant: $1, noeud de sauvegarde: $2}, 'autofailover_halt' => q{Noeuds Impossible d'effectuer une failover pour $1 échoué}, @@ -222,10 +243,22 @@ my %message = ( 'autofailover_promote_found' => q{Utilisation précédemment trouvé plus à jour abonné à tous les jeux ($1) sur le noeud ne répond pas $2}, 'autofailover_promote_skip' => q{Pas de failover requis pour le noeud ne répond pas $1 car il n'est ni l'origine ou un transitaire active de tous les jeux}, 'autofailover_promote_fail' => q{Impossible de trouver le noeud de sauvegarde approprié pour la promotion}, - 'autofailover_node_detail' => q{Noeud $1 est $2 et fournit des ensembles $3 à $4 retard}, - 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 suivants = $2}, - 'autofailover_unresponsive' => q{Noeud ne répond pas détecté: $1}, + 'autofailover_node_detail' => q{Noeud $1 est souscrit à $2 ($3) noeud $4 et fournit des ensembles de $5 à retard $6 secondes (en cas d'événement $7)}, + 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 décalage = $2 secondes (événement $3)}, + 'autofailover_promote_unsuitable' => q{Noeud $1 est inadapté pour la promotion}, + 'autofailover_unresponsive' => q{Noeud d'origine ne répond pas détecté: $1}, + 'autofailover_unresponsive_prov' => q{Noeud fournisseur ne répond pas détecté: $1}, 'autofailover_unresponsive_subonly'=> q{Abonné ne répond pas détecté seulement de noeud: $1}, + 'autofailover_recovery_subonly' => q{Recouvrement détecté de l'abonné ne répond pas seulement auparavant de noeud: $1}, + 'autofailover_pspec_check_fail' => q{Impossible de se connecter au noeud $1: $2}, + 'autofailover_pspec_check' => q{Obtenir un jugement objectif à partir d'autres noeuds, les noeuds qui ne répondent pas apparentes sont : $1 (Noeuds défaillants = $2 de $3)}, + 'autofailover_pspec_check_sleep' => q{Dormir pour $1 ms}, + 'autofailover_pspec_check_data' => q{$1: Noeud $2 dit décalage de $3 -> $4 noeud est $5 secondes}, + 'autofailover_pspec_check_true' => q{Tous les noeuds détectés pas confirmés comme à la traîne par d'autres noeuds}, + 'autofailover_pspec_check_false' => q{Pas tous les noeuds confirmé retard}, + 'autofailover_pspec_check_unknown' => q{Impossible de confirmer le statut de latence de tous les noeuds}, + 'autofailover_split_check' => q{Autres noeuds ($1 sur $2) sont la majorité}, + 'autofailover_split_check_fail' => q{Autres noeuds ($1) ne sont pas la majorité}, 'interactive_head_name' => q{Nom}, 'interactive_head_status' => q{Statut}, 'interactive_head_providers' => q{Fournisseur IDs}, @@ -234,6 +267,7 @@ my %message = ( 'interactive_detail_3' => q{Abonnements: }, 'interactive_choose_node' => q{S'il vous plaît choisissez le noeud à déplacer tous les ensembles $1:}, 'interactive_confirm' => q{Vous avez choisi de passer ensembles $1 noeud $2 ($3). Est-ce correct [o/n]? }, + 'interactive_drop_nodes' => q{Voulez-vous laisser tomber aussi les noeuds défaillants de la configuration de slony [o/n]?}, 'interactive_action' => q{Meilleur plan d'action est le plus susceptible de faire une $1. Voulez-vous continuer [o/n]?}, 'interactive_surrender' => q{Uable pour déterminer le meilleur plan d'action}, 'interactive_write_script' => q{Rédaction d'un script à $1 $2 noeud à $3}, @@ -255,6 +289,9 @@ my %message = ( 'wrn_node_unavailable' => q{ATTENTION: Noeud $1 disponible}, 'wrn_req_unavailable' => q{ATTENTION: Noeud Old origine ($1) est disponible, mais $2 abonnés ne sont pas disponibles}, 'wrn_not_tested' => q{ATTENTION: Script pas testé avec Slony-I v$1}, + 'wrn_failover_issues' => q{ATTENTION: Slony-I v$1 peut lutter pour basculer correctement avec plusieurs nœuds défaillants (affecte v2.0-2.1)}, + 'note_autofail_fwd_only' => q{AVIS: Versions antérieures à la 2.2 Slony ne peuvent pas initier le basculement de seulement échoué transmettre fournisseurs}, + 'note_fail_sub_only' => q{AVIS: Versions antérieures à la 2.2 Slony ne peuvent pas basculer abonnes seuls les noeuds, revenant à failover_offile_subscriber_only = false}, 'note_multiple_try' => q{AVIS: Vous ne pouvez pas verrouiller plusieurs ensembles dans des blocs try dans la version $1 de retomber à des jeux simples}, 'note_reshape_cluster' => q{AVIS: Vous devez supprimer les abonnés défaillants ou les ramener, puis réessayez à MOVE SET}, 'err_generic' => q{ERREUR: $1}, @@ -272,7 +309,7 @@ my %message = ( 'err_cluster_empty' => q{ERREUR: Groupe chargé contient pas de noeuds}, 'err_cluster_offline' => q{ERREUR: Groupe chargé contient pas de noeuds accessibles}, 'err_cluster_lone' => q{ERRRUE: Groupe chargé ne contient que 1 noeud}, - 'err_not_origin' => q{ERREUR: Nœud $1 n'est pas à l'origine de tous les jeux}, + 'err_not_origin' => q{ERREUR: Noeud $1 n'est pas à l'origine de tous les jeux}, 'err_not_provider' => q{ERREUR: Noeud $1 n'est pas un fournisseur de tous les jeux}, 'err_not_provider_sets' => q{ERREUR: Noeud $1 ne fournit pas les ensembles nécessaires: le besoin ($2), mais fournit ($3)}, 'err_no_configuration' => q{ERREUR: Impossible de lire la configuration pour le noeud $1}, @@ -413,9 +450,6 @@ if ($g_node_count <= 0) { printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_cluster_empty')); cleanExit(3, "system"); } -elsif (substr($g_version,0,1) < 2) { - printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('wrn_not_tested', $g_version)); -} else { printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('load_cluster_success', $g_version, $g_clname, $g_node_count, $g_dbhost, $g_dbport, $g_dbname)); printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('script_settings', $g_lockset_method, $g_failover_method, uc($g_resubscribe_method))); @@ -515,6 +549,12 @@ if ($g_failover) { printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_3')); printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_4')); + printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_drop_nodes')); + $g_input = <>; + if ($g_input ~~ /^[Y|O]$/i) { + $g_drop_failed = true; + } + printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_reason')); $g_reason = <>; printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_continue')); @@ -749,6 +789,7 @@ sub loadCluster { $sth->execute(); while (my @node = $sth->fetchrow) { + #printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', join(' - ', @node))); push(@g_cluster, \@node); } @@ -763,9 +804,9 @@ sub loadCluster { die lookupMsg('err_pgsql_connect'); } else { - if (substr($version,0,1) < 2) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version)); - } + #if (substr($version,0,1) < 2) { + # printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version)); + #} if (($g_use_try_blocks) && ($g_lockset_method eq 'multiple') && (substr($version,0,3) <= 9.9)) { # It's currently not possible to lock multiple sets at a time within a try block (v2.2.2), leave the logic in and set a high version number for now. printlogln($prefix,$logfile,$log_prefix, lookupMsg('note_multiple_try', $version)); @@ -775,6 +816,20 @@ sub loadCluster { $g_failover_method = 'new'; $g_resubscribe_method = 'resubscribe'; } + else { + unless ($g_silence_notice) { + if ((substr($version,0,3) >= 2.0) && (substr($version,0,3) < 2.2)) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_failover_issues', $version)); + } + printlogln($prefix,$logfile,$log_prefix,lookupMsg('note_autofail_fwd_only')); + $g_silence_notice = true; + } + if ($g_fail_subonly) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('note_fail_sub_only')); + $g_fail_subonly = false; + } + } + } return (scalar(@g_cluster), $version); @@ -1001,7 +1056,7 @@ sub writePreamble { } elsif (!$g_fail_subonly) { foreach my $unresponsive (@g_unresponsive) { - if (($_->[0] == $unresponsive->[0]) && !defined($_->[9])) { + if (($_->[0] == $unresponsive->[0]) && !defined($_->[9]) && ($g_failover_method eq 'new')) { $line_prefix = "# (Node $_->[0] unavailable subscriber only) "; } } @@ -1189,7 +1244,7 @@ sub writeMoveSet { @subsets = (split(',', $setlist)) ; if ($g_debug) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_resubscribe', $_->[1], $_->[0]), $other_subs->[0], $other_subs->[4], $setlist, $setlist_name, $node, $node_name); + printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_resubscribe', $_->[1], $_->[0], $other_subs->[0], $other_subs->[4], $setlist, $setlist_name, $node, $node_name)); } if ($_->[0] ~~ @subsets) { @@ -1264,7 +1319,7 @@ sub writeMoveSet { print SLONFILE ($line_prefix . "ECHO 'Issuing resubscribe for provider $to -> receiver $other_subs->[0]';\n"); print SLONFILE ($line_prefix . - "SUBSCRIBE NODE ( ORIGIN = $to, PROVIDER = $to, RECEIVER = $other_subs->[0] );\n"); + "RESUBSCRIBE NODE ( ORIGIN = $to, PROVIDER = $to, RECEIVER = $other_subs->[0] );\n"); } } else { @@ -1319,6 +1374,16 @@ sub writeFailover { my $event_node; my ($year, $month, $day, $hour, $min, $sec) = (localtime(time))[5,4,3,2,1,0]; my $filetime = sprintf ("%02d_%02d_%04d_%02d:%02d:%02d", $day, $month+1, $year+1900, $hour, $min, $sec); + my $sets = false; + + my $subprov_idx; + my @subprov_name; + my ($node, $setlist); + my ($node_name, $setlist_name); + my @subsets; + my @subsets_name; + my $set_idx; + my @dropped; if (defined($from) && defined($to)) { $filename = $prefix . "/" . $clname . "-failover_from_" . $from . "_to_" . $to . "_on_" . $filetime . ".scr"; @@ -1327,7 +1392,14 @@ sub writeFailover { $filename = $prefix . "/" . $clname . "-autofailover_on_" . $filetime . ".scr"; } - unless (writePreamble($filename, $dbconninfo, $clname, $dbuser, $dbpass, false, $aliases, $prefix, $logfile, $log_prefix, false)) { + if ($g_failover_method ne 'new') { + # For pre 2.2 failover with multiple nodes, we attempt to resubscribe sets and drop other failed providers; + # This will never work as well as 2.2+ failover behaviour (infact failover may not work as all in 2.0/2.1 with multiple failed nodes) + # We also need to define the sets in the preamble for this. + $sets = true; + } + + unless (writePreamble($filename, $dbconninfo, $clname, $dbuser, $dbpass, $sets, $aliases, $prefix, $logfile, $log_prefix, false)) { printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_incomplete_preamble')); } @@ -1338,17 +1410,85 @@ sub writeFailover { printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_failover_method',$g_failover_method)); } + # If we are on pre 2.2 we need to drop failed subscriber nodes first regardless + if ($g_failover_method ne 'new') { + foreach (@g_failed) { + if (!defined($_->[3])) { + foreach my $backup (@g_cluster) { + if ($backup->[0] == $g_backups{$_->[0]}) { # this backup node candidate is in the list of suitable nodes for {failed node} + foreach my $subscriber (@g_cluster) { + if (defined($subscriber->[1]) && $subscriber->[1] == $_->[0] && $subscriber->[0] != $backup->[0]) { + # mess here needs cleaning up + @subprov_name = (split(';', $subscriber->[8])); + $subprov_idx = 0; + foreach my $subprov (split(';', $subscriber->[5])) { + ($node, $setlist) = (split('->', $subprov)) ; + ($node_name, $setlist_name) = (split('->', $subprov_name[$subprov_idx])) ; + $subprov_idx++; + $node =~ s/n//g; + + if ($node == $_->[0]) { + if ($aliases) { + print SLONFILE ("ECHO 'Resubscribing all sets on receiver $subscriber->[4] provided by other failed node $_->[4] to backup node $backup->[4]';\n"); + } + else { + print SLONFILE ("ECHO 'Resubscribing all sets on receiver $subscriber->[0] provided by other failed node $_->[0] to backup node $backup->[0]';\n"); + } + $setlist =~ s/(\)|\(|s)//g; + @subsets = (split(',', $setlist)); + $setlist_name =~ s/(\)|\()//g; + @subsets_name = (split(',', $setlist_name)); + + $set_idx = 0; + foreach my $subset (@subsets) { + if ($aliases) { + print SLONFILE ("SUBSCRIBE SET (ID = \@$subsets_name[$set_idx], PROVIDER = \@$backup->[4], RECEIVER = \@$subscriber->[4], FORWARD = YES);\n"); + print SLONFILE ("WAIT FOR EVENT (ORIGIN = \@$backup->[4], CONFIRMED = \@$subscriber->[4], WAIT ON = \@$backup->[4]);\n"); + } + else { + print SLONFILE ("SUBSCRIBE SET (ID = $subset, PROVIDER = $backup->[0], RECEIVER = $subscriber->[0], FORWARD = YES);\n"); + print SLONFILE ("WAIT FOR EVENT (ORIGIN = $backup->[0], CONFIRMED = $subscriber->[0], WAIT ON = $backup->[0]);\n"); + } + $set_idx++; + } + print SLONFILE ("\n"); + } + } + + if ($aliases) { + print SLONFILE ("ECHO 'Dropping other failed node $_->[4] ($_->[0])';\n"); + print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n\n"); + } + else { + print SLONFILE ("ECHO 'Dropping other failed node $_->[0]';\n"); + print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n\n"); + } + push(@dropped, $_->[0]); + } + else { + # The node is failed, but there are no downstream subscribers + } + } + last; + } + } + } + } + } + foreach (@g_failed) { - foreach my $backup (@g_cluster) { - if ($backup->[0] == $g_backups{$_->[0]}) { - ## Here we have both details of the backup node and the failed node - if ($aliases) { - print SLONFILE ("ECHO 'Failing over slony cluster from $_->[4] (id $_->[0]) to $backup->[4] (id $backup->[0])';\n"); + if (($g_failover_method eq 'new') || defined($_->[3])) { + foreach my $backup (@g_cluster) { + if ($backup->[0] == $g_backups{$_->[0]}) { + ## Here we have both details of the backup node and the failed node + if ($aliases) { + print SLONFILE ("ECHO 'Failing over slony cluster from $_->[4] (id $_->[0]) to $backup->[4] (id $backup->[0])';\n"); + } + else { + print SLONFILE ("ECHO 'Failing over slony cluster from node $_->[0] to node $backup->[0]';\n"); + } + last; } - else { - print SLONFILE ("ECHO 'Failing over slony cluster from node $_->[0] to node $backup->[0]';\n"); - } - last; } } } @@ -1356,47 +1496,46 @@ sub writeFailover { print SLONFILE ("FAILOVER (\n\t"); $written = 0; foreach (@g_failed) { - foreach my $backup (@g_cluster) { - - if ($backup->[0] == $g_backups{$_->[0]}) { - ## Here we have both details of the backup node and the failed node - if ($g_failover_method eq 'new') { - if( $written != 0 ) { - print SLONFILE (",\n\t"); + if (($g_failover_method eq 'new') || defined($_->[3])) { + foreach my $backup (@g_cluster) { + if ($backup->[0] == $g_backups{$_->[0]}) { + ## Here we have both details of the backup node and the failed node + if ($g_failover_method eq 'new') { + if( $written != 0 ) { + print SLONFILE (",\n\t"); + } + print SLONFILE ("NODE = ("); } - print SLONFILE ("NODE = ("); - } - else { - if( $written != 0 ) { - print SLONFILE ("\n);\nFAILOVER (\n\t"); + else { + if( $written != 0 ) { + print SLONFILE ("\n);\nFAILOVER (\n\t"); + } } + if ($aliases) { + print SLONFILE ("ID = \@$_->[4], BACKUP NODE = \@$backup->[4]"); + } + else { + print SLONFILE ("ID = $_->[0], BACKUP NODE = $backup->[0]"); + } + if ($g_failover_method eq 'new') { + print SLONFILE (")"); + } + last; } - if ($aliases) { - print SLONFILE ("ID = \@$_->[4], BACKUP NODE = \@$backup->[4]"); - } - else { - print SLONFILE ("ID = $_->[0], BACKUP NODE = $backup->[0]"); - } - if ($g_failover_method eq 'new') { - print SLONFILE (")"); - } - last; } + $written++; } - $written++; } - print SLONFILE ("\n);\n"); + print SLONFILE ("\n);\n\n"); if ($g_drop_failed) { - - if (($g_failover_method eq 'new') && (scalar(@g_failed) > 1)) { foreach (@g_failed) { if ($aliases) { - print SLONFILE ("ECHO 'Dropping node $_->[4] ($_->[0])';\n"); + print SLONFILE ("ECHO 'Dropping failed node $_->[4] ($_->[0])';\n"); } else { - print SLONFILE ("ECHO 'Dropping node $_->[0]';\n"); + print SLONFILE ("ECHO 'Dropping failed node $_->[0]';\n"); } } @@ -1419,7 +1558,7 @@ sub writeFailover { if( $written != 0 ) { print SLONFILE (","); } - ## Don;t bother being pissy and trying to define array values + ## Don't bother trying to define array values #if ($aliases) { # print SLONFILE "\@$_->[4]"; #} @@ -1428,14 +1567,14 @@ sub writeFailover { #} $written++; } - else { + elsif (($g_failover_method eq 'new') || defined($_->[3]) || !($_->[0] ~~ @dropped)) { if ($aliases) { - print SLONFILE ("ECHO 'Dropping node $_->[4] ($_->[0])';\n"); - print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n"); + print SLONFILE ("ECHO 'Dropping failed node $_->[4] ($_->[0])';\n"); + print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n\n"); } else { - print SLONFILE ("ECHO 'Dropping node $_->[0]';\n"); - print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n"); + print SLONFILE ("ECHO 'Dropping failed node $_->[0]';\n"); + print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n\n"); } } last; @@ -1487,6 +1626,13 @@ sub lookupMsg { return $text; } +sub qtrim { + my $string = shift; + $string =~ s/^('|")+//; + $string =~ s/('|")+$//; + return $string; +} + sub trim($) { my $string = shift; $string =~ s/^\s+//; @@ -1704,109 +1850,121 @@ sub getConfig { my $cfgfile = shift; my @fields; my $success = false; + my $value; if (open(CFGFILE, "<", $cfgfile)) { foreach () { chomp $_; for ($_) { s/\r//; - s/\#.*//; + #s/\#.*//; + s/#(?=(?:(?:[^']|[^"]*+'){2})*+[^']|[^"]*+\z).*//; } if (length(trim($_))) { @fields = split('=', $_, 2); given(lc($fields[0])) { + $value = qtrim(trim($fields[1])); when(/\blang\b/i) { - $g_lang = trim($fields[1]); + $g_lang = $value; } when(/\bslony_database_host\b/i) { - $g_dbhost = trim($fields[1]); + $g_dbhost = $value; } when(/\bslony_database_port\b/i) { - $g_dbport = $fields[1]; + $g_dbport = checkInteger($value); } when(/\bslony_database_name\b/i) { - $g_dbname = trim($fields[1]); + $g_dbname = $value; } when(/\bslony_database_user\b/i) { - $g_dbuser = trim($fields[1]); + $g_dbuser = $value; } when(/\bslony_database_password\b/i) { - $g_dbpass = trim($fields[1]); + $g_dbpass = $value; } when(/\bslony_cluster_name\b/i) { - $g_clname = trim($fields[1]); + $g_clname = $value; } when(/\benable_debugging\b/i) { - $g_debug = checkBoolean(trim($fields[1])); + $g_debug = checkBoolean($value); } when(/\bprefix_directory\b/i) { - $g_prefix = trim($fields[1]); + $g_prefix = $value; } when(/\bseparate_working_directory\b/i) { - $g_separate_working = checkBoolean(trim($fields[1])); + $g_separate_working = checkBoolean($value); } when(/\bpid_filename\b/i) { - $g_pidfile = trim($fields[1]); + $g_pidfile = $value; } when(/\bfailover_offline_subscriber_only\b/i) { - $g_fail_subonly = checkBoolean(trim($fields[1])); + $g_fail_subonly = checkBoolean($value); } when(/\bdrop_failed_nodes\b/i) { - $g_drop_failed = checkBoolean(trim($fields[1])); + $g_drop_failed = checkBoolean($value); } when(/\blog_line_prefix\b/i) { - $g_log_prefix = trim($fields[1]); + $g_log_prefix = $value; } when(/\blog_filename\b/i) { - $g_logfile = trim($fields[1]); + $g_logfile = $value; } when(/\blog_to_postgresql\b/i) { - $g_log_to_db = checkBoolean(trim($fields[1])); + $g_log_to_db = checkBoolean($value); } when(/\blog_database_host\b/i) { - $g_logdb_host = trim($fields[1]); + $g_logdb_host = $value; } when(/\blog_database_port\b/i) { - $g_logdb_port = $fields[1]; + $g_logdb_port = checkInteger($value); } when(/\blog_database_name\b/i) { - $g_logdb_name = trim($fields[1]); + $g_logdb_name = $value; } when(/\blog_database_user\b/i) { - $g_logdb_user = trim($fields[1]); + $g_logdb_user = $value; } when(/\blog_database_password\b/i) { - $g_logdb_pass = trim($fields[1]); + $g_logdb_pass = $value; } when(/\benable_try_blocks\b/i) { - $g_use_try_blocks = checkBoolean(trim($fields[1])); + $g_use_try_blocks = checkBoolean($value); } when(/\bpull_aliases_from_comments\b/i) { - $g_use_comment_aliases = checkBoolean(trim($fields[1])); + $g_use_comment_aliases = checkBoolean($value); } when(/\bslonik_path\b/i) { - $g_slonikpath = trim($fields[1]); + $g_slonikpath = $value; } when(/\blockset_method\b/i) { - $g_lockset_method = trim($fields[1]); + $g_lockset_method = $value; } when(/\benable_autofailover\b/i) { - $g_autofailover = checkBoolean(trim($fields[1])); + $g_autofailover = checkBoolean($value); } when(/\bautofailover_poll_interval\b/i) { - $g_autofailover_poll_interval = checkInteger(trim($fields[1])); + $g_autofailover_poll_interval = checkInteger($value); } when(/\bautofailover_node_retry\b/i) { - $g_autofailover_retry = checkInteger(trim($fields[1])); + $g_autofailover_retry = checkInteger($value); } when(/\bautofailover_sleep_time\b/i) { - $g_autofailover_retry_sleep = checkInteger(trim($fields[1])); + $g_autofailover_retry_sleep = checkInteger($value); } when(/\bautofailover_forwarding_providers\b/i) { - $g_autofailover_provs = checkBoolean(trim($fields[1])); + $g_autofailover_provs = checkBoolean($value); } when(/\bautofailover_config_any_node\b/i) { - $g_autofailover_config_any = checkBoolean(trim($fields[1])); + $g_autofailover_config_any = checkBoolean($value); + } + when(/\bautofailover_perspective_sleep_time\b/i) { + $g_autofailover_perspective_sleep = checkInteger($value); + } + when(/\bautofailover_majority_only\b/i) { + $g_autofailover_majority_only = checkBoolean($value); + } + when(/\bautofailover_is_quorum\b/i) { + $g_autofailover_is_quorum = checkBoolean($value); } } } @@ -1883,6 +2041,7 @@ sub autoFailover { my $actions; my $current_retry; my $cluster_loaded; + my $cluster_loaded_from; my @cluster; my $node_count; my $version; @@ -1905,6 +2064,7 @@ sub autoFailover { @cluster = @g_cluster; die lookupMsg('err_cluster_lone') if ($node_count == 1); $cluster_loaded = true; + $cluster_loaded_from = 'conninfo specified in config'; }; if ($@) { printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from supplied configuration')); @@ -1923,6 +2083,7 @@ sub autoFailover { @cluster = @g_cluster; die lookupMsg('err_cluster_lone') if ($node_count == 1); $cluster_loaded = true; + $cluster_loaded_from = $_->[0]; }; if ($@) { printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from node ' . $_->[0] . ': trying next node')); @@ -1936,7 +2097,7 @@ sub autoFailover { } if ($cluster_loaded) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count)); + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count, $cluster_loaded_from)); $cluster_time = time(); } else { @@ -1965,22 +2126,25 @@ sub autoFailover { } } if ($failed > 0) { - $actions = findBackup($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix); - if ($actions > 0) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_proceed')); - foreach my $failed ( keys %g_backups ) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_detail', $failed, $g_backups{$failed})); - } - $g_script = writeFailover($prefix, $dbconninfo, $clname, $dbuser, $dbpass, undef, undef, $g_subs_follow_origin, $g_use_comment_aliases, $logfile, $log_prefix); - unless (runSlonik($g_script, $prefix, $logfile, $log_prefix)) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script)); - } - $cluster_loaded = false; - #print "SCRIPT: $g_script\n"; - #exit(0); - } - else { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_halt', $failed)); + if ((!$g_autofailover_majority_only || checkSplit($prefix, $logfile, $log_prefix)) && (($g_autofailover_perspective_sleep <= 0) || checkPerspective($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix))) { + $actions = findBackup($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix); + if ($actions > 0) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_proceed')); + foreach my $failed ( keys %g_backups ) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_detail', $failed, $g_backups{$failed})); + } + $g_script = writeFailover($prefix, $dbconninfo, $clname, $dbuser, $dbpass, undef, undef, $g_subs_follow_origin, $g_use_comment_aliases, $logfile, $log_prefix); + unless (runSlonik($g_script, $prefix, $logfile, $log_prefix)) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script)); + } + $cluster_loaded = false; + + #print "SCRIPT: $g_script\n"; + #exit(0); + } + else { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_halt', $failed)); + } } } usleep($g_autofailover_poll_interval * 1000); @@ -1992,6 +2156,165 @@ sub autoFailover { } } +sub checkSplit { + my $prefix = shift; + my $logfile = shift; + my $log_prefix = shift; + + my $majority = false; + my $failed = scalar(@g_unresponsive); + my $survivers = (scalar(@g_cluster) - scalar(@g_unresponsive)); + + if ($survivers > $failed) { + $majority = true; + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_split_check', $survivers, ($survivers+$failed))); + } + elsif (($survivers == $failed) && $g_autofailover_is_quorum) { + $majority = true; + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_split_check', ($survivers . '+quorum'), ($survivers+$failed))); + } + else { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_split_check_fail', $survivers)); + } + + return $majority; +} + +# Check each nodes perspective of the failure to try to ensure the issue isn't that this script just can't connect to the origin/provider +# The idea here is just to wait for a short period of time and see if the lag time for the nodes has increased by the same amount. +sub checkPerspective { + my $clname = shift; + my $dbuser = shift; + my $dbpass = shift; + my $prefix = shift; + my $logfile = shift; + my $log_prefix = shift; + + my $dsn; + my $dbh; + my $sth; + my $query; + my $qw_clname; + my $param_on; + my $agreed = false; + my @unresponsive_ids; + my $lag_idx; + my $lag_confirmed; + my @lag_info1; + my @lag_info2; + my $bad = 0; + + foreach (@g_unresponsive) { + push(@unresponsive_ids, $_->[0]); + } + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check', join(", ", @unresponsive_ids), scalar(@g_unresponsive), scalar(@g_cluster))); + + foreach (@g_cluster) { + unless ($_->[0] ~~ @unresponsive_ids) { + $dsn = "DBI:Pg:$_->[2];"; + eval { + $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1}); + $qw_clname = $dbh->quote_identifier("_" . $clname); + + $query = "SELECT a.st_origin, a.st_received, extract(epoch from a.st_lag_time)::integer + FROM _test_replication.sl_status a + INNER JOIN _test_replication.sl_node b on a.st_origin = b.no_id + INNER JOIN _test_replication.sl_node c on a.st_received = c.no_id + WHERE a.st_received IN (" . substr('?, ' x scalar(@unresponsive_ids), 0, -2) . ") ORDER BY a.st_origin, a.st_received;"; + + $sth = $dbh->prepare($query); + + $param_on = 1; + foreach (@unresponsive_ids) { + $sth->bind_param($param_on, $_); + $param_on++; + } + $sth->execute(); + + while (my @node_lag = $sth->fetchrow) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_data', 'Check1', $_->[0], $node_lag[0], $node_lag[1], $node_lag[2])); + push(@lag_info1, \@node_lag); + } + + $sth->finish; + $dbh->disconnect(); + }; + if ($@) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_fail', $_->[0], $@)); + $bad++; + } + } + } + + if ($bad == 0) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_sleep', $g_autofailover_perspective_sleep)); + usleep($g_autofailover_perspective_sleep * 1000); + + foreach (@g_cluster) { + unless ($_->[0] ~~ @unresponsive_ids) { + $dsn = "DBI:Pg:$_->[2];"; + eval { + $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1}); + $qw_clname = $dbh->quote_identifier("_" . $clname); + + $query = "SELECT a.st_origin, a.st_received, extract(epoch from a.st_lag_time)::integer + FROM _test_replication.sl_status a + INNER JOIN _test_replication.sl_node b on a.st_origin = b.no_id + INNER JOIN _test_replication.sl_node c on a.st_received = c.no_id + WHERE a.st_received IN (" . substr('?, ' x scalar(@unresponsive_ids), 0, -2) . ") ORDER BY a.st_origin, a.st_received;"; + + $sth = $dbh->prepare($query); + + $param_on = 1; + foreach (@unresponsive_ids) { + $sth->bind_param($param_on, $_); + $param_on++; + } + $sth->execute(); + + while (my @node_lag = $sth->fetchrow) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_data', 'Check2', $_->[0], $node_lag[0], $node_lag[1], $node_lag[2])); + push(@lag_info2, \@node_lag); + } + + $sth->finish; + $dbh->disconnect(); + }; + if ($@) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_fail', $_->[0], $@)); + $bad++; + } + } + } + + $lag_idx = 0; + $lag_confirmed = 0; + foreach (@lag_info1) { + if ($g_debug) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', ("Node $_->[0] lag between checks on node $_->[1] is " . ($lag_info2[$lag_idx]->[2]-$_->[2]) . " seconds"))); + } + + if ((($lag_info2[$lag_idx]->[2]-$_->[2])*1000) >= $g_autofailover_perspective_sleep) { + $lag_confirmed++; + } + $lag_idx++; + } + } + + if ($bad > 0) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_unknown')); + } + elsif ($lag_idx == $lag_confirmed) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_true')); + $agreed = true; + } + else { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_false')); + } + + return $agreed; +} + sub checkFailed { my $clname = shift; my $dbuser = shift; @@ -2037,6 +2360,11 @@ sub checkFailed { $sth->finish; $dbh->disconnect(); + + if (exists($g_unresponsive_subonly{$_->[0]})) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_recovery_subonly', $_->[0])); + delete $g_unresponsive_subonly{$_->[0]}; + } }; if ($@) { if ($g_debug) { @@ -2044,11 +2372,21 @@ sub checkFailed { } push(@g_unresponsive, \@$_); if ((defined($_->[3])) || ($g_autofailover_provs && defined($_->[9]))) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0])); - $prov_failed++; + if (defined($_->[3])) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0])); + } + else { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_prov', $_->[0])); + } + unless ($g_failover_method ne 'new' && !defined($_->[3])) { + $prov_failed++; + } } else { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0])); + if (!exists($g_unresponsive_subonly{$_->[0]})) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0])); + $g_unresponsive_subonly{$_->[0]} = true; + } if ($g_fail_subonly) { $subonly_failed++; } @@ -2083,11 +2421,14 @@ sub findBackup { my $query; my $qw_clname; my $result_count = 0; - my $lowest_lag; + my $lowest_lag_time; + my $latest_last_event; my $best_node_id; my $best_node_is_direct; + my $best_node_can_forward; my @sets_from; my @sets_to; + my @sets_to_prov; my %backup_for_set_chosen; undef %g_backups; @@ -2098,7 +2439,10 @@ sub findBackup { printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_find', ($_->[9] // "none"), $_->[0])); undef $best_node_id; - $lowest_lag = (1<<$Config{ivsize}*8-1)-1; + $lowest_lag_time = (1<<$Config{ivsize}*8-1)-1; + $latest_last_event = 0; + $best_node_is_direct = false; + $best_node_can_forward = false; if (defined($_->[9]) && (exists $backup_for_set_chosen{$_->[9]})) { $best_node_id = $backup_for_set_chosen{$_->[9]}; @@ -2110,44 +2454,116 @@ sub findBackup { if ($g_debug) { printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_check_sub',$subscriber->[0])); } - + + # Here the strings containing the sets are converted to arrays + # Origin / Forwarded sets + undef @sets_to_prov; + if (defined($subscriber->[10])) { + @sets_to_prov = split(',',$subscriber->[10]); + } + if (defined($subscriber->[3])) { + if (@sets_to_prov) { + @sets_to_prov = (@sets_to_prov, split(',',$subscriber->[3])); + } + else { + @sets_to_prov = split(',',$subscriber->[3]); + } + } + if (!defined($subscriber->[10]) && !defined($subscriber->[3])) { + @sets_to_prov = (0); + } + + # Origin / Subscribed sets. + undef @sets_to; + if (defined($subscriber->[7])) { + @sets_to = split(',',$subscriber->[7]); + } + if (defined($subscriber->[3])) { + if (@sets_to) { + @sets_to = (@sets_to, split(',',$subscriber->[3])); + } + else { + @sets_to = split(',',$subscriber->[3]); + } + } + if (!defined($subscriber->[7]) && !defined($subscriber->[3])) { + @sets_to = (0); + } + + # Sets provided by the failed node. + undef @sets_from; + if (defined($_->[9])) { + @sets_from = split(',',$_->[9]); + } + else { + @sets_from = (0); + @sets_to = (0); + } + $dsn = "DBI:Pg:$subscriber->[2]"; eval { $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1}); $qw_clname = $dbh->quote_identifier("_" . $clname); - $query = "SELECT extract(epoch from a.st_lag_time), (a.st_received = ?) AS direct - FROM $qw_clname.sl_status a - INNER JOIN $qw_clname.sl_subscribe b ON b.sub_provider = a.st_received AND b.sub_receiver = a.st_origin - WHERE b.sub_active - GROUP BY a.st_lag_time, a.st_received;"; + #print "\tNODE " . $subscriber->[0] . ") SETS TO = " . join(',', @sets_to) . " SETS FROM = " . join(',', @sets_from) . " SETS TO PROV = " . join(',', @sets_to_prov) . "\n"; + + if (defined($subscriber->[3]) && checkProvidesAllSets(\@sets_from, \@sets_to)) { + $query = "SELECT 0, ev_seqno, (ev_origin = ?) + FROM $qw_clname.sl_event + WHERE ev_origin = $qw_clname.getlocalnodeid(?) + ORDER BY ev_seqno DESC LIMIT 1"; + } + else { + $query = "SELECT extract(epoch from (current_timestamp-a.con_timestamp)), a.con_seqno, (a.con_origin = ?) AS direct + FROM $qw_clname.sl_confirm a + INNER JOIN $qw_clname.sl_event b on b.ev_seqno = a.con_seqno AND a.con_origin = b.ev_origin + INNER JOIN $qw_clname.sl_subscribe c ON c.sub_provider = a.con_origin AND c.sub_receiver = a.con_received + WHERE c.sub_active AND a.con_received = $qw_clname.getlocalnodeid(?) + ORDER BY a.con_seqno DESC LIMIT 1;"; + } $sth = $dbh->prepare($query); $sth->bind_param(1, $_->[0]); + $sth->bind_param(2, "_" . $clname); $sth->execute(); while (my @subinfo = $sth->fetchrow) { - - undef @sets_from; - if (defined($_->[9])) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], ($subinfo[1] ? "a direct subscriber" : "an indirect subscriber"), $subscriber->[7], $subinfo[0])); - @sets_from = split(',',$_->[9]); - @sets_to = split(',',$subscriber->[7]); - } - elsif ($g_fail_subonly) { - # Subscriber only node will have no active sets forwarding sets to check - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], "suitable backup for this subscriber only node" , $subscriber->[7], $subinfo[0])); - @sets_from = (0); - @sets_to = (0); + # If the failed provider node isn't an origin for any sets, we classify any direct subscribers to it as indeirect + # because they are indirect to the origin. + if ($subinfo[2] && defined($_->[9]) && !defined($_->[3])) { + $subinfo[2] = false; } - if ((checkProvidesAllSets(\@sets_from, \@sets_to)) && (($subinfo[0] < $lowest_lag) || (!$best_node_is_direct && $subinfo[1]))) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_best', $subscriber->[0], $subinfo[0])); + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', + $subscriber->[0], + ($subinfo[2]?"directly":"indirectly"), + (defined($_->[3])?"origin":(defined($_->[9])?"provider":"subscriber only")), + $_->[0], + (defined($subscriber->[10])?$subscriber->[10]:(defined($subscriber->[3])?$subscriber->[3]:"")), + $subinfo[0], $subinfo[1])); + + # If select this node as the backup node if: + # 1) The node is a subscriber to all sets on the failed node + # 2) In order of preference: + # The node is one of the direct subscribers to the failed node on the most recent event and is a forwarding provider + # OR + # The node is one of the direct subscribers to the failed node on the most recent event and is not a forwarding provider + # OR + # The node is an indirect subscriber to the failed node with the lowest lag time + if (!checkProvidesAllSets(\@sets_from, \@sets_to)) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_unsuitable', $subscriber->[0])); + } + elsif (($subinfo[2] && (!$best_node_is_direct || $subinfo[1] > $latest_last_event || (!$best_node_can_forward && checkProvidesAllSets(\@sets_from, \@sets_to_prov) && $subinfo[1] == $latest_last_event))) + || (!$best_node_is_direct && !$subinfo[2] && $subinfo[0] < $lowest_lag_time)) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_best', $subscriber->[0], $subinfo[0], $subinfo[1])); $best_node_id = $subscriber->[0]; - $lowest_lag = $subinfo[0]; - $best_node_is_direct = $subinfo[1]; + $lowest_lag_time = $subinfo[0]; + $latest_last_event = $subinfo[1]; + $best_node_is_direct = $subinfo[2]; + $best_node_can_forward = checkProvidesAllSets(\@sets_from, \@sets_to_prov); } + } }; if ($@) { @@ -2167,7 +2583,7 @@ sub findBackup { } } else { - printlog($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail')); + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail')); } } else {