X-Git-Url: https://git.8kb.co.uk/?p=slony-i%2Fslony_failover;a=blobdiff_plain;f=slony_failover.pl;h=298efe3f00ed10ec15486cc9a109eab21787689a;hp=a6020fdf676ff0b4c1f72d2efa72241a916d8103;hb=20aaac18d413cff3a0e46405d40346cd2b6fe19b;hpb=483f9f14eb37560d3ca04ab2e834a03a3882a35d diff --git a/slony_failover.pl b/slony_failover.pl index a6020fd..298efe3 100755 --- a/slony_failover.pl +++ b/slony_failover.pl @@ -23,6 +23,7 @@ use strict; use warnings; +use experimental 'smartmatch'; use DBI; use Getopt::Long qw/GetOptions/; use Data::UUID; @@ -36,7 +37,7 @@ use Config qw/%Config/; use constant false => 0; use constant true => 1; -my $g_script_version = '1.0.2'; +my $g_script_version = '1.0.3'; my $g_debug = false; my $g_pidfile = '/var/run/slony_failover.pid'; my $g_pid_written = false; @@ -93,19 +94,20 @@ my $g_autofailover_perspective_sleep = 20000; my $g_autofailover_majority_only = false; my $g_autofailover_is_quorum = false; my @g_unresponsive; +my %g_unresponsive_subonly; my %g_backups; my $g_pid = $$; - +# Hash containing messages used by lookupMsg() my %message = ( 'en' => { 'usage' => q{-h -p -db -cl -u -P -f (Password option not recommended; use pgpass instead)}, 'title' => q{Slony-I failover script version $1}, - 'cluster_fixed' => q{Aborting failover action: all origin nodes now responsive}, + 'cluster_fixed' => q{Aborting failover action: all origin/provider nodes now responsive}, 'cluster_failed' => q{Found $1 failed nodes, sleeping for $2ms before retry $3 of $4}, 'load_cluster' => q{Getting a list of database nodes...}, 'load_cluster_fail' => q{Unable to read cluster configuration $1}, - 'load_cluster_success' => q{Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6}, + 'load_cluster_success' => q{I Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6}, 'lag_detail' => q{Current node lag information from configuration node:}, 'script_settings' => q{Using $1 batches of lock set, $2 FAILOVER and $3}, 'generated_script' => q{Generated script "$1"}, @@ -114,7 +116,7 @@ my %message = ( 'autofailover_init_pol' => q{Polling every $1ms}, 'autofailover_init_ret' => q{Failed nodes will be retried $1 times with $2ms sleep}, 'autofailover_init_set' => q{Failed forwarding providers $1 be failed over}, - 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes}, + 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes read from node $5}, 'autofailover_proceed' => q{Proceeding with failover:}, 'autofailover_detail' => q{Failed node: $1, Backup node: $2}, 'autofailover_halt' => q{Unable to perform any failover for $1 failed nodes}, @@ -124,11 +126,13 @@ my %message = ( 'autofailover_promote_found' => q{Using previously found most up to date subscriber to all sets ($1) on unresponsive node $2}, 'autofailover_promote_skip' => q{No failover required for unresponsive node $1 as it is neither the origin or an active forwarder of any sets}, 'autofailover_promote_fail' => q{Could not find suitable backup node for promotion}, - 'autofailover_node_detail' => q{Node $1 is $2 subscribed to ($3) node $4 and provides sets $5 at $6 lag ($7 events)}, - 'autofailover_node_detail_subonly' => q{Node $1 is $2 subscribed to ($3) node $4 and is a subscriber only at $5 lag ($6 events)}, - 'autofailover_promote_best' => q{Best node for promotion is node $1 seq = $2 ($3 events)}, - 'autofailover_unresponsive' => q{Detected unresponsive provider node: $1}, + 'autofailover_node_detail' => q{Node $1 is $2 subscribed to ($3) node $4 and provides sets $5 at $6 seconds lag (on event $7)}, + 'autofailover_promote_best' => q{Best node for promotion is node $1 lag = $2 seconds (event $3)}, + 'autofailover_promote_unsuitable' => q{Node $1 is unsuitable for promotion}, + 'autofailover_unresponsive' => q{Detected unresponsive origin node: $1}, + 'autofailover_unresponsive_prov' => q{Detected unresponsive provider node: $1}, 'autofailover_unresponsive_subonly'=> q{Detected unresponsive subscriber only node: $1}, + 'autofailover_recovery_subonly' => q{Detected recovery of previously unresponsive subscriber only node: $1}, 'autofailover_pspec_check_fail' => q{Failed to connect to node $1: $2}, 'autofailover_pspec_check' => q{Getting objective judgement from other nodes, apparent unresponsive nodes are : $1 (Failed nodes = $2 of $3)}, 'autofailover_pspec_check_sleep' => q{Sleeping for $1 ms}, @@ -217,7 +221,7 @@ my %message = ( 'fr' => { 'usage' => q{-h -p -db -cl -u -P -f (Option mot de passe pas recommandé; utiliser pgpass place)}, 'title' => q{Slony-I failover (basculement) version de script $1}, - 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine maintenant sensible}, + 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine / de fournisseurs maintenant sensible}, 'cluster_failed' => q{Trouvé $1 échoué noeuds, couchage pour $2 ms avant réessayer $3 de $4}, 'load_cluster' => q{Obtenir une liste de noeuds de base de donnees...}, 'load_cluster_fail' => q{Impossible de lire la configuration du cluster $1}, @@ -230,7 +234,7 @@ my %message = ( 'autofailover_init_pol' => q{Vérifier toutes les $1ms}, 'autofailover_init_ret' => q{Noeuds défaillants seront rejugés $1 fois avec $2 ms sommeil}, 'autofailover_init_set' => q{Fournisseurs d'expédition échoué $1 être échoué sur}, - 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds}, + 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds lire à noeud $5}, 'autofailover_proceed' => q{De procéder à failover:}, 'autofailover_detail' => q{Noeud défaillant: $1, noeud de sauvegarde: $2}, 'autofailover_halt' => q{Noeuds Impossible d'effectuer une failover pour $1 échoué}, @@ -240,11 +244,13 @@ my %message = ( 'autofailover_promote_found' => q{Utilisation précédemment trouvé plus à jour abonné à tous les jeux ($1) sur le noeud ne répond pas $2}, 'autofailover_promote_skip' => q{Pas de failover requis pour le noeud ne répond pas $1 car il n'est ni l'origine ou un transitaire active de tous les jeux}, 'autofailover_promote_fail' => q{Impossible de trouver le noeud de sauvegarde approprié pour la promotion}, - 'autofailover_node_detail' => q{Noeud $1 est souscrit à $2 ($3) noeud $4 et fournit des ensembles de $5 à retard $6 ($7 événements)}, - 'autofailover_node_detail_subonly' => q{Noeud $1 est souscrit à $2 ($3) et le noeud $4 est un abonné à retard $5 ($6 événements)}, - 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 suivants = $2 ($3 événements)}, - 'autofailover_unresponsive' => q{Noeud ne répond pas détecté: $1}, + 'autofailover_node_detail' => q{Noeud $1 est souscrit à $2 ($3) noeud $4 et fournit des ensembles de $5 à retard $6 secondes (en cas d'événement $7)}, + 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 décalage = $2 secondes (événement $3)}, + 'autofailover_promote_unsuitable' => q{Noeud $1 est inadapté pour la promotion}, + 'autofailover_unresponsive' => q{Noeud d'origine ne répond pas détecté: $1}, + 'autofailover_unresponsive_prov' => q{Noeud fournisseur ne répond pas détecté: $1}, 'autofailover_unresponsive_subonly'=> q{Abonné ne répond pas détecté seulement de noeud: $1}, + 'autofailover_recovery_subonly' => q{Recouvrement détecté de l'abonné ne répond pas seulement auparavant de noeud: $1}, 'autofailover_pspec_check_fail' => q{Impossible de se connecter au noeud $1: $2}, 'autofailover_pspec_check' => q{Obtenir un jugement objectif à partir d'autres noeuds, les noeuds qui ne répondent pas apparentes sont : $1 (Noeuds défaillants = $2 de $3)}, 'autofailover_pspec_check_sleep' => q{Dormir pour $1 ms}, @@ -359,7 +365,7 @@ else { } } -# Fill in any missing values with defaults or display message and die +# Display message and die if any of the required configuration variables are missing if (!defined($g_dbname)) { println(lookupMsg('err_no_database')); die lookupMsg('usage'); @@ -603,6 +609,7 @@ cleanExit(0, "script completion"); ########################################################################################################################################### +# Display exit message, insert log file into database if requested, delete any pid files and exit with the requested code sub cleanExit { my $exit_code = shift; my $type = shift; @@ -622,10 +629,12 @@ sub cleanExit { exit($exit_code); } +# Exit on caught signal sub sigExit { cleanExit(100,'signal'); } +# Check we can reach each node in the cluster and that it contains the Slony schema sub checkNodes { my $clname = shift; my $dbuser = shift; @@ -712,6 +721,18 @@ sub checkNodes { return ($result_count, $critical_count); } +# Load information on all nodes in the Slony cluster into global @g_cluster: +# 0) no_id = Node id of this node +# 1) no_provs = Comma separated list of all provider node ids +# 2) no_conninfo = Conninfo as recorded in sl_path +# 3) origin_sets = Comma separated list of set ids originating on this node +# 4) no_name = Node name; this is extracted from text between parentheses in sl_node.no_comment +# 5) no_sub_tree = Text representation of subscriptions in the form n->(s, ..) +# 6) no_status = Text representing the state of the node; either ACTIVE,INACTIVE or FAILED +# 7) sub_sets = Comma separated list of all set ids this node is subscribed to +# 8) no_sub_tree_name = As per no_sub_tree but holds textual names extracted from sl_node.no_comment +# 9) prov_sets_active = Comma separated list of all set ids this node is actively forwarding +# 10) prov_sets = Comma separated list of all set ids this node is subscribed to and able to forward sub loadCluster { my $dbconninfo = shift; my $clname = shift; @@ -748,14 +769,14 @@ sub loadCluster { SELECT a.no_id, b.sub_provider AS no_prov, COALESCE(c.pa_conninfo,(SELECT pa_conninfo FROM $qw_clname.sl_path WHERE pa_server = $qw_clname.getlocalnodeid(?) LIMIT 1)) AS no_conninfo, array_to_string(array(SELECT set_id FROM $qw_clname.sl_set WHERE set_origin = a.no_id ORDER BY set_id),',') AS origin_sets, - string_agg(CASE WHEN b.sub_receiver = a.no_id AND b.sub_forward AND b.sub_active THEN b.sub_set::text END, ',' ORDER BY b.sub_set) AS prov_sets, + string_agg(CASE WHEN b.sub_receiver = a.no_id AND b.sub_forward AND b.sub_active THEN b.sub_set::text END, ',' ORDER BY b.sub_set) AS sub_sets, coalesce(trim(regexp_replace(substring(a.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_','g')), 'node' || a.no_id) AS no_name, 'n' || b.sub_provider || '->(' || string_agg(CASE WHEN b.sub_receiver = a.no_id THEN 's' || b.sub_set END,',' ORDER BY b.sub_set,',') || ')' AS sub_tree, coalesce(trim(regexp_replace(substring(d.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_','g')), 'node' || b.sub_provider, '') || '->(' || string_agg(CASE WHEN b.sub_receiver = a.no_id THEN coalesce(trim(regexp_replace(e.set_comment, '[^0-9A-Za-z]', '_', 'g')), 'set' || b.sub_set) END,',' ORDER BY b.sub_set) || ')' AS sub_tree_name, CASE " . ((substr($version,0,3) >= 2.2) ? "WHEN a.no_failed THEN 'FAILED' " : "") . "WHEN a.no_active THEN 'ACTIVE' ELSE 'INACTIVE' END AS no_status, array_to_string(array(SELECT DISTINCT sub_set::text FROM $qw_clname.sl_subscribe WHERE sub_provider = a.no_id AND sub_active ORDER BY sub_set),',') AS prov_sets_active, - string_agg(CASE WHEN b.sub_receiver = a.no_id THEN b.sub_set::text END,',' ORDER BY b.sub_set,',') AS sub_sets + string_agg(CASE WHEN b.sub_receiver = a.no_id THEN b.sub_set::text END,',' ORDER BY b.sub_set,',') AS prov_sets FROM $qw_clname.sl_node a LEFT OUTER JOIN $qw_clname.sl_subscribe b ON a.no_id = b.sub_receiver LEFT OUTER JOIN $qw_clname.sl_path c ON c.pa_server = a.no_id AND c.pa_client = $qw_clname.getlocalnodeid(?) @@ -771,10 +792,10 @@ sub loadCluster { no_name, nullif(string_agg(sub_tree, ';' ORDER BY sub_tree),'') AS no_sub_tree, no_status, - nullif(string_agg(prov_sets::text, ',' ORDER BY prov_sets),'') AS prov_sets, + nullif(string_agg(sub_sets::text, ',' ORDER BY prov_sets),'') AS sub_sets, nullif(string_agg(sub_tree_name, ';' ORDER BY sub_tree_name),'') AS no_sub_tree_name, nullif(string_agg(prov_sets_active::text, ',' ORDER BY prov_sets_active),'') AS prov_sets_active, - nullif(string_agg(sub_sets::text, ',' ORDER BY sub_sets),'') AS no_subs + nullif(string_agg(prov_sets::text, ',' ORDER BY sub_sets),'') AS prov_sets FROM z GROUP BY no_id, no_conninfo, no_name, no_status"; $sth = $dbh->prepare($query); @@ -784,6 +805,7 @@ sub loadCluster { $sth->execute(); while (my @node = $sth->fetchrow) { + #printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', join(' - ', @node))); push(@g_cluster, \@node); } @@ -798,9 +820,9 @@ sub loadCluster { die lookupMsg('err_pgsql_connect'); } else { - if (substr($version,0,1) < 2) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version)); - } + #if (substr($version,0,1) < 2) { + # printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version)); + #} if (($g_use_try_blocks) && ($g_lockset_method eq 'multiple') && (substr($version,0,3) <= 9.9)) { # It's currently not possible to lock multiple sets at a time within a try block (v2.2.2), leave the logic in and set a high version number for now. printlogln($prefix,$logfile,$log_prefix, lookupMsg('note_multiple_try', $version)); @@ -829,6 +851,7 @@ sub loadCluster { return (scalar(@g_cluster), $version); } +# Load all sets originating on a node into global @g_sets sub loadSets { my $dbconninfo = shift; my $clname = shift; @@ -875,6 +898,8 @@ sub loadSets { return scalar(@g_sets); } +# Load information regarding replication lag from sl_status into @g_lags +# If loading from a node that is not the intended origin then this information might not be that accurate/useful sub loadLag { my $dbconninfo = shift; my $clname = shift; @@ -923,6 +948,7 @@ sub loadLag { return scalar(@g_lags); } +# Prompt user for nodes to an from in interactive mode and do some checking sub chooseNode { my $type = shift; my $prefix = shift; @@ -1014,6 +1040,7 @@ sub chooseNode { return $choice; } +# Write a slonik preamble section using information pulled into @g_cluster and @g_sets by loadCluster() and loadSets() functions sub writePreamble { my $filename = shift; my $dbconninfo = shift; @@ -1084,6 +1111,7 @@ sub writePreamble { return $success; } +# Write slonik commands to move sets sub writeMoveSet { my $prefix = shift; my $dbconninfo = shift; @@ -1351,6 +1379,7 @@ sub writeMoveSet { return $filename; } +# Write slonik commands to failover sets sub writeFailover { my $prefix = shift; my $dbconninfo = shift; @@ -1593,6 +1622,7 @@ sub writeFailover { } +# Used to return informational text from the %message hashes, pretty much entirely stolen from check_postgres (http://bucardo.org) sub lookupMsg { my $name = shift || '?'; my $line_call; @@ -1620,6 +1650,7 @@ sub lookupMsg { return $text; } +# Trim quotes off a string sub qtrim { my $string = shift; $string =~ s/^('|")+//; @@ -1627,6 +1658,7 @@ sub qtrim { return $string; } +# Trim a string sub trim($) { my $string = shift; $string =~ s/^\s+//; @@ -1634,10 +1666,12 @@ sub trim($) { return $string; } +# Print command with a linefeed sub println { print ((@_ ? join($/, @_) : $_), $/); } +# Print to stdout and the logfile, doing some replacements allong the way for logging sub printlog { my $prefix = shift; my $logfile_name = shift; @@ -1681,10 +1715,12 @@ sub printlog { } } +# Printlog command with a linefeed sub printlogln { printlog ($_[0], $_[1], $_[2], $_[3] . $/); } +# Insert details of any action into a database table sub logDB { my $dbconninfo = shift; my $dbuser = shift; @@ -1748,6 +1784,7 @@ sub logDB { return true; } +# Returns a uuid used for the failover script directory sub getUUID { my $date_string = shift; my $g_ug = new Data::UUID; @@ -1756,6 +1793,7 @@ sub getUUID { return $g_uuid_str; } +# Write out a PID file sub writePID { my $prefix = shift; my $logfile = shift; @@ -1785,6 +1823,7 @@ sub writePID { return $success; } +# Remove the PID file sub removePID { my $prefix = shift; my $logfile = shift; @@ -1817,6 +1856,7 @@ sub removePID { return $success; } +# Check all sets from an originationg node are contained in the list provided by another node sub checkProvidesAllSets { my ($originSets, $providerSets) = @_; my %test_hash; @@ -1827,6 +1867,7 @@ sub checkProvidesAllSets { return !%test_hash; # return false if any keys are left in the hash } +# Check any sets from an originationg node are contained in the list subscribed to by another node sub checkSubscribesAnySets { my ($originSets, $subscriberSets) = @_; my $before; @@ -1840,6 +1881,7 @@ sub checkSubscribesAnySets { return ($before != $after); # return false if no keys were removed from the hash } +# Read configuration details from a configuration file sub getConfig { my $cfgfile = shift; my @fields; @@ -1974,6 +2016,7 @@ sub getConfig { return $success; } +# Interpret a textual representation of a boolean value sub checkBoolean { my $text = shift; my $value = undef; @@ -1986,6 +2029,7 @@ sub checkBoolean { return $value; } +# Check if a text value is a valid integer sub checkInteger { my $integer = shift; my $value = undef; @@ -1996,7 +2040,7 @@ sub checkInteger { return $value; } - +# Run a slonik command and capture all output via autoflushing channel sub runSlonik { my $script = shift; my $prefix = shift; @@ -2021,6 +2065,7 @@ sub runSlonik { return $success; } +# Experimental logic to watch the cluster status and perform an automatic failover sub autoFailover { my $dbconninfo = shift; my $clname = shift; @@ -2035,6 +2080,7 @@ sub autoFailover { my $actions; my $current_retry; my $cluster_loaded; + my $cluster_loaded_from; my @cluster; my $node_count; my $version; @@ -2057,6 +2103,7 @@ sub autoFailover { @cluster = @g_cluster; die lookupMsg('err_cluster_lone') if ($node_count == 1); $cluster_loaded = true; + $cluster_loaded_from = 'conninfo specified in config'; }; if ($@) { printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from supplied configuration')); @@ -2075,6 +2122,7 @@ sub autoFailover { @cluster = @g_cluster; die lookupMsg('err_cluster_lone') if ($node_count == 1); $cluster_loaded = true; + $cluster_loaded_from = $_->[0]; }; if ($@) { printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from node ' . $_->[0] . ': trying next node')); @@ -2088,7 +2136,7 @@ sub autoFailover { } if ($cluster_loaded) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count)); + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count, $cluster_loaded_from)); $cluster_time = time(); } else { @@ -2129,6 +2177,7 @@ sub autoFailover { printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script)); } $cluster_loaded = false; + #print "SCRIPT: $g_script\n"; #exit(0); } @@ -2146,6 +2195,7 @@ sub autoFailover { } } +# Count of failed and live nodes to perform very basic split-brain check sub checkSplit { my $prefix = shift; my $logfile = shift; @@ -2171,7 +2221,7 @@ sub checkSplit { } # Check each nodes perspective of the failure to try to ensure the issue isn't that this script just can't connect to the origin/provider -# The idea here is just to wait for a short period of time and see if the lag time for the nodes has increased by the same amount. +# The idea here is just to wait for a short period of time and see if the lag time for the nodes has increased by the same amount sub checkPerspective { my $clname = shift; my $dbuser = shift; @@ -2305,6 +2355,7 @@ sub checkPerspective { return $agreed; } +# Check if any nodes have failed by connecting and probing the Slony schema sub checkFailed { my $clname = shift; my $dbuser = shift; @@ -2350,6 +2401,11 @@ sub checkFailed { $sth->finish; $dbh->disconnect(); + + if (exists($g_unresponsive_subonly{$_->[0]})) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_recovery_subonly', $_->[0])); + delete $g_unresponsive_subonly{$_->[0]}; + } }; if ($@) { if ($g_debug) { @@ -2357,13 +2413,21 @@ sub checkFailed { } push(@g_unresponsive, \@$_); if ((defined($_->[3])) || ($g_autofailover_provs && defined($_->[9]))) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0])); + if (defined($_->[3])) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0])); + } + else { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_prov', $_->[0])); + } unless ($g_failover_method ne 'new' && !defined($_->[3])) { $prov_failed++; } } else { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0])); + if (!exists($g_unresponsive_subonly{$_->[0]})) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0])); + $g_unresponsive_subonly{$_->[0]} = true; + } if ($g_fail_subonly) { $subonly_failed++; } @@ -2384,6 +2448,7 @@ sub checkFailed { } } +# Attempt to try and find the most suitable backup node for a failed node sub findBackup { my $clname = shift; my $dbuser = shift; @@ -2399,11 +2464,13 @@ sub findBackup { my $qw_clname; my $result_count = 0; my $lowest_lag_time; - my $lowest_lag_events; + my $latest_last_event; my $best_node_id; my $best_node_is_direct; + my $best_node_can_forward; my @sets_from; my @sets_to; + my @sets_to_prov; my %backup_for_set_chosen; undef %g_backups; @@ -2415,7 +2482,9 @@ sub findBackup { undef $best_node_id; $lowest_lag_time = (1<<$Config{ivsize}*8-1)-1; - $lowest_lag_events = $lowest_lag_time; + $latest_last_event = 0; + $best_node_is_direct = false; + $best_node_can_forward = false; if (defined($_->[9]) && (exists $backup_for_set_chosen{$_->[9]})) { $best_node_id = $backup_for_set_chosen{$_->[9]}; @@ -2427,45 +2496,116 @@ sub findBackup { if ($g_debug) { printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_check_sub',$subscriber->[0])); } - + + # Here the strings containing the sets are converted to arrays + # Origin / Forwarded sets + undef @sets_to_prov; + if (defined($subscriber->[10])) { + @sets_to_prov = split(',',$subscriber->[10]); + } + if (defined($subscriber->[3])) { + if (@sets_to_prov) { + @sets_to_prov = (@sets_to_prov, split(',',$subscriber->[3])); + } + else { + @sets_to_prov = split(',',$subscriber->[3]); + } + } + if (!defined($subscriber->[10]) && !defined($subscriber->[3])) { + @sets_to_prov = (0); + } + + # Origin / Subscribed sets. + undef @sets_to; + if (defined($subscriber->[7])) { + @sets_to = split(',',$subscriber->[7]); + } + if (defined($subscriber->[3])) { + if (@sets_to) { + @sets_to = (@sets_to, split(',',$subscriber->[3])); + } + else { + @sets_to = split(',',$subscriber->[3]); + } + } + if (!defined($subscriber->[7]) && !defined($subscriber->[3])) { + @sets_to = (0); + } + + # Sets provided by the failed node. + undef @sets_from; + if (defined($_->[9])) { + @sets_from = split(',',$_->[9]); + } + else { + @sets_from = (0); + @sets_to = (0); + } + $dsn = "DBI:Pg:$subscriber->[2]"; eval { $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1}); $qw_clname = $dbh->quote_identifier("_" . $clname); - $query = "SELECT extract(epoch from a.st_lag_time), a.st_lag_num_events, (a.st_received = ?) AS direct - FROM $qw_clname.sl_status a - INNER JOIN $qw_clname.sl_subscribe b ON b.sub_provider = a.st_received AND b.sub_receiver = a.st_origin - WHERE b.sub_active - GROUP BY a.st_lag_time, a.st_lag_num_events, a.st_received;"; + #print "\tNODE " . $subscriber->[0] . ") SETS TO = " . join(',', @sets_to) . " SETS FROM = " . join(',', @sets_from) . " SETS TO PROV = " . join(',', @sets_to_prov) . "\n"; + + if (defined($subscriber->[3]) && checkProvidesAllSets(\@sets_from, \@sets_to)) { + $query = "SELECT 0, ev_seqno, (ev_origin = ?) + FROM $qw_clname.sl_event + WHERE ev_origin = $qw_clname.getlocalnodeid(?) + ORDER BY ev_seqno DESC LIMIT 1"; + } + else { + $query = "SELECT extract(epoch from (current_timestamp-a.con_timestamp)), a.con_seqno, (a.con_origin = ?) AS direct + FROM $qw_clname.sl_confirm a + INNER JOIN $qw_clname.sl_event b on b.ev_seqno = a.con_seqno AND a.con_origin = b.ev_origin + INNER JOIN $qw_clname.sl_subscribe c ON c.sub_provider = a.con_origin AND c.sub_receiver = a.con_received + WHERE c.sub_active AND a.con_received = $qw_clname.getlocalnodeid(?) + ORDER BY a.con_seqno DESC LIMIT 1;"; + } $sth = $dbh->prepare($query); $sth->bind_param(1, $_->[0]); + $sth->bind_param(2, "_" . $clname); $sth->execute(); while (my @subinfo = $sth->fetchrow) { - - undef @sets_from; - if (defined($_->[9])) { - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], ($subinfo[2]?"directly":"indirectly"), (defined($_->[3])?"origin":"provider"), $_->[0], $subscriber->[7], $subinfo[0], $subinfo[1])); - @sets_from = split(',',$_->[9]); - @sets_to = split(',',$subscriber->[7]); - } - elsif ($g_fail_subonly) { - # Subscriber only node will have no active sets forwarding sets to check - printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail_subonly', $subscriber->[0], ($subinfo[2]?"directly":"indirectly"), (defined($_->[3])?"origin":"provider"), $_->[0], $subinfo[0], $subinfo[1])); - @sets_from = (0); - @sets_to = (0); + # If the failed provider node isn't an origin for any sets, we classify any direct subscribers to it as indeirect + # because they are indirect to the origin. + if ($subinfo[2] && defined($_->[9]) && !defined($_->[3])) { + $subinfo[2] = false; } - if ((checkProvidesAllSets(\@sets_from, \@sets_to)) && (($subinfo[0] < $lowest_lag_time && ($subinfo[2] || !defined($best_node_id))) || (!$best_node_is_direct && $subinfo[2]))) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', + $subscriber->[0], + ($subinfo[2]?"directly":"indirectly"), + (defined($_->[3])?"origin":(defined($_->[9])?"provider":"subscriber only")), + $_->[0], + (defined($subscriber->[10])?$subscriber->[10]:(defined($subscriber->[3])?$subscriber->[3]:"")), + $subinfo[0], $subinfo[1])); + + # If select this node as the backup node if: + # 1) The node is a subscriber to all sets on the failed node + # 2) In order of preference: + # The node is one of the direct subscribers to the failed node on the most recent event and is a forwarding provider + # OR + # The node is one of the direct subscribers to the failed node on the most recent event and is not a forwarding provider + # OR + # The node is an indirect subscriber to the failed node with the lowest lag time + if (!checkProvidesAllSets(\@sets_from, \@sets_to)) { + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_unsuitable', $subscriber->[0])); + } + elsif (($subinfo[2] && (!$best_node_is_direct || $subinfo[1] > $latest_last_event || (!$best_node_can_forward && checkProvidesAllSets(\@sets_from, \@sets_to_prov) && $subinfo[1] == $latest_last_event))) + || (!$best_node_is_direct && !$subinfo[2] && $subinfo[0] < $lowest_lag_time)) { printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_best', $subscriber->[0], $subinfo[0], $subinfo[1])); $best_node_id = $subscriber->[0]; $lowest_lag_time = $subinfo[0]; - $lowest_lag_events = $subinfo[1]; - $best_node_is_direct = $subinfo[2]; + $latest_last_event = $subinfo[1]; + $best_node_is_direct = $subinfo[2]; + $best_node_can_forward = checkProvidesAllSets(\@sets_from, \@sets_to_prov); } + } }; if ($@) { @@ -2485,7 +2625,7 @@ sub findBackup { } } else { - printlog($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail')); + printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail')); } } else {