#!/usr/bin/perl
# Script: failover.pl
-# Copyright: 08/04/2012: v1.0.1 Glyn Astill <glyn@8kb.co.uk>
+# Copyright: 08/04/2012: v1.0.2 Glyn Astill <glyn@8kb.co.uk>
# Requires: Perl 5.10.1+, Data::UUID, File::Slurp
-# PostgreSQL 9.0+ Slony-I 2.0+
+# PostgreSQL 9.0+ Slony-I 1.2+ / 2.0+
#
# This script is a command-line utility to manage switchover and failover
# of replication sets in Slony-I clusters.
use strict;
use warnings;
+use experimental 'smartmatch';
use DBI;
use Getopt::Long qw/GetOptions/;
use Data::UUID;
use constant false => 0;
use constant true => 1;
-my $g_script_version = '1.0.1';
+my $g_script_version = '1.0.3';
my $g_debug = false;
my $g_pidfile = '/var/run/slony_failover.pid';
my $g_pid_written = false;
my $g_lockset_method = 'multiple';
my $g_logfile = 'failover.log';
my $g_input;
+my $g_silence_notice = false;
my $g_reason;
my $g_script;
my $g_node_from;
my $g_autofailover_retry_sleep = 1000;
my $g_autofailover_provs = false;
my $g_autofailover_config_any = true;
+my $g_autofailover_perspective_sleep = 20000;
+my $g_autofailover_majority_only = false;
+my $g_autofailover_is_quorum = false;
my @g_unresponsive;
+my %g_unresponsive_subonly;
my %g_backups;
my $g_pid = $$;
-
+# Hash containing messages used by lookupMsg()
my %message = (
'en' => {
- 'usage' => q{-h <host> -p <port> -db <database> -c <cluster name> -u <username> -P <password> (Password option not recommended; use pgpass instead)},
+ 'usage' => q{-h <host> -p <port> -db <database> -cl <cluster name> -u <username> -P <password> -f <config file> (Password option not recommended; use pgpass instead)},
'title' => q{Slony-I failover script version $1},
- 'cluster_fixed' => q{Aborting failover action: all origin nodes now responsive},
+ 'cluster_fixed' => q{Aborting failover action: all origin/provider nodes now responsive},
'cluster_failed' => q{Found $1 failed nodes, sleeping for $2ms before retry $3 of $4},
'load_cluster' => q{Getting a list of database nodes...},
'load_cluster_fail' => q{Unable to read cluster configuration $1},
- 'load_cluster_success' => q{Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6},
+ 'load_cluster_success' => q{I Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6},
'lag_detail' => q{Current node lag information from configuration node:},
'script_settings' => q{Using $1 batches of lock set, $2 FAILOVER and $3},
'generated_script' => q{Generated script "$1"},
'autofailover_init_pol' => q{Polling every $1ms},
'autofailover_init_ret' => q{Failed nodes will be retried $1 times with $2ms sleep},
'autofailover_init_set' => q{Failed forwarding providers $1 be failed over},
- 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes},
+ 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes read from node $5},
'autofailover_proceed' => q{Proceeding with failover:},
'autofailover_detail' => q{Failed node: $1, Backup node: $2},
'autofailover_halt' => q{Unable to perform any failover for $1 failed nodes},
'autofailover_promote_found' => q{Using previously found most up to date subscriber to all sets ($1) on unresponsive node $2},
'autofailover_promote_skip' => q{No failover required for unresponsive node $1 as it is neither the origin or an active forwarder of any sets},
'autofailover_promote_fail' => q{Could not find suitable backup node for promotion},
- 'autofailover_node_detail' => q{Node $1 is $2 and provides sets $3 at $4 lag},
- 'autofailover_promote_best' => q{Best node for promotion is node $1 seq = $2},
- 'autofailover_unresponsive' => q{Detected unresponsive provider node: $1},
+ 'autofailover_node_detail' => q{Node $1 is $2 subscribed to ($3) node $4 and provides sets $5 at $6 seconds lag (on event $7)},
+ 'autofailover_promote_best' => q{Best node for promotion is node $1 lag = $2 seconds (event $3)},
+ 'autofailover_promote_unsuitable' => q{Node $1 is unsuitable for promotion},
+ 'autofailover_unresponsive' => q{Detected unresponsive origin node: $1},
+ 'autofailover_unresponsive_prov' => q{Detected unresponsive provider node: $1},
'autofailover_unresponsive_subonly'=> q{Detected unresponsive subscriber only node: $1},
+ 'autofailover_recovery_subonly' => q{Detected recovery of previously unresponsive subscriber only node: $1},
+ 'autofailover_pspec_check_fail' => q{Failed to connect to node $1: $2},
+ 'autofailover_pspec_check' => q{Getting objective judgement from other nodes, apparent unresponsive nodes are : $1 (Failed nodes = $2 of $3)},
+ 'autofailover_pspec_check_sleep' => q{Sleeping for $1 ms},
+ 'autofailover_pspec_check_data' => q{$1: Node $2 says lag from node $3 -> $4 is $5 seconds},
+ 'autofailover_pspec_check_true' => q{All detected failed nodes confirmed as lagging by other nodes},
+ 'autofailover_pspec_check_false' => q{Not all nodes confirmed as lagging},
+ 'autofailover_pspec_check_unknown' => q{Unable to confirm lag status of all nodes},
+ 'autofailover_split_check' => q{Surviving nodes ($1 of $2) are the majority},
+ 'autofailover_split_check_fail' => q{Surviving nodes ($1) are not the majority},
'interactive_head_id' => q{ID},
'interactive_head_name' => q{Name},
'interactive_head_status' => q{Status},
'interactive_write_script' => q{Writing a script to $1 node $2 to $3},
'interactive_check_nodes' => q{Checking availability of database nodes...},
'interactive_continue' => q{Do you wish to continue [y/n]?},
+ 'interactive_drop_nodes' => q{Do you want to also drop the failed nodes from the slony configuration [y/n]?},
'interactive_preserve' => q{Preserve subscription paths to follow the origin node (choose no if unsure) [y/n]?},
'interactive_aliases' => q{Generate aliases based on sl_node/set comments in parentheses (choose no if unsure) [y/n]?},
'interactive_summary' => q{Summary of nodes to be passed to failover:},
'wrn_node_unavailable' => q{WARNING: Node $1 unavailable},
'wrn_req_unavailable' => q{WARNING: Old origin node ($1) is available, however $2 subscribers are unavailable},
'wrn_not_tested' => q{WARNING: Script not tested with Slony-I v$1},
+ 'wrn_failover_issues' => q{WARNING: Slony-I v$1 may struggle to failover correctly with multiple failed nodes (affects v2.0-2.1)},
+ 'note_autofail_fwd_only' => q{NOTICE: Slony versions prior to 2.2 cannot initiate failover from only failed forwarding providers},
+ 'note_fail_sub_only' => q{NOTICE: Slony versions prior to 2.2 cannot failover subscriber only nodes, reverting to failover_offline_subscriber_only = false},
'note_multiple_try' => q{NOTICE: Cannot lock multiple sets within try blocks in version $1 dropping back to single sets},
'note_reshape_cluster' => q{NOTICE: Either drop the failed subscribers or bring them back up, then retry to MOVE SET},
'dbg_generic' => q{DEBUG: $1},
'exit' => q{Exited by $1}
},
'fr' => {
- 'usage' => q{-h <host> -p <port> -db <database> -c <cluster name> -u <username> -P <password> (Option mot de passe pas recommandé; utiliser pgpass place)},
+ 'usage' => q{-h <host> -p <port> -db <database> -cl <cluster name> -u <username> -P <password> -f <config file> (Option mot de passe pas recommandé; utiliser pgpass place)},
'title' => q{Slony-I failover (basculement) version de script $1},
- 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine maintenant sensible},
+ 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine / de fournisseurs maintenant sensible},
'cluster_failed' => q{Trouvé $1 échoué noeuds, couchage pour $2 ms avant réessayer $3 de $4},
'load_cluster' => q{Obtenir une liste de noeuds de base de donnees...},
'load_cluster_fail' => q{Impossible de lire la configuration du cluster $1},
'autofailover_init_pol' => q{Vérifier toutes les $1ms},
'autofailover_init_ret' => q{Noeuds défaillants seront rejugés $1 fois avec $2 ms sommeil},
'autofailover_init_set' => q{Fournisseurs d'expédition échoué $1 être échoué sur},
- 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds},
+ 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds lire à noeud $5},
'autofailover_proceed' => q{De procéder à failover:},
'autofailover_detail' => q{Noeud défaillant: $1, noeud de sauvegarde: $2},
'autofailover_halt' => q{Noeuds Impossible d'effectuer une failover pour $1 échoué},
'autofailover_promote_found' => q{Utilisation précédemment trouvé plus à jour abonné à tous les jeux ($1) sur le noeud ne répond pas $2},
'autofailover_promote_skip' => q{Pas de failover requis pour le noeud ne répond pas $1 car il n'est ni l'origine ou un transitaire active de tous les jeux},
'autofailover_promote_fail' => q{Impossible de trouver le noeud de sauvegarde approprié pour la promotion},
- 'autofailover_node_detail' => q{Noeud $1 est $2 et fournit des ensembles $3 à $4 retard},
- 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 suivants = $2},
- 'autofailover_unresponsive' => q{Noeud ne répond pas détecté: $1},
+ 'autofailover_node_detail' => q{Noeud $1 est souscrit à $2 ($3) noeud $4 et fournit des ensembles de $5 à retard $6 secondes (en cas d'événement $7)},
+ 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 décalage = $2 secondes (événement $3)},
+ 'autofailover_promote_unsuitable' => q{Noeud $1 est inadapté pour la promotion},
+ 'autofailover_unresponsive' => q{Noeud d'origine ne répond pas détecté: $1},
+ 'autofailover_unresponsive_prov' => q{Noeud fournisseur ne répond pas détecté: $1},
'autofailover_unresponsive_subonly'=> q{Abonné ne répond pas détecté seulement de noeud: $1},
+ 'autofailover_recovery_subonly' => q{Recouvrement détecté de l'abonné ne répond pas seulement auparavant de noeud: $1},
+ 'autofailover_pspec_check_fail' => q{Impossible de se connecter au noeud $1: $2},
+ 'autofailover_pspec_check' => q{Obtenir un jugement objectif à partir d'autres noeuds, les noeuds qui ne répondent pas apparentes sont : $1 (Noeuds défaillants = $2 de $3)},
+ 'autofailover_pspec_check_sleep' => q{Dormir pour $1 ms},
+ 'autofailover_pspec_check_data' => q{$1: Noeud $2 dit décalage de $3 -> $4 noeud est $5 secondes},
+ 'autofailover_pspec_check_true' => q{Tous les noeuds détectés pas confirmés comme à la traîne par d'autres noeuds},
+ 'autofailover_pspec_check_false' => q{Pas tous les noeuds confirmé retard},
+ 'autofailover_pspec_check_unknown' => q{Impossible de confirmer le statut de latence de tous les noeuds},
+ 'autofailover_split_check' => q{Autres noeuds ($1 sur $2) sont la majorité},
+ 'autofailover_split_check_fail' => q{Autres noeuds ($1) ne sont pas la majorité},
'interactive_head_name' => q{Nom},
'interactive_head_status' => q{Statut},
'interactive_head_providers' => q{Fournisseur IDs},
'interactive_detail_3' => q{Abonnements: },
'interactive_choose_node' => q{S'il vous plaît choisissez le noeud à déplacer tous les ensembles $1:},
'interactive_confirm' => q{Vous avez choisi de passer ensembles $1 noeud $2 ($3). Est-ce correct [o/n]? },
+ 'interactive_drop_nodes' => q{Voulez-vous laisser tomber aussi les noeuds défaillants de la configuration de slony [o/n]?},
'interactive_action' => q{Meilleur plan d'action est le plus susceptible de faire une $1. Voulez-vous continuer [o/n]?},
'interactive_surrender' => q{Uable pour déterminer le meilleur plan d'action},
'interactive_write_script' => q{Rédaction d'un script à $1 $2 noeud à $3},
'wrn_node_unavailable' => q{ATTENTION: Noeud $1 disponible},
'wrn_req_unavailable' => q{ATTENTION: Noeud Old origine ($1) est disponible, mais $2 abonnés ne sont pas disponibles},
'wrn_not_tested' => q{ATTENTION: Script pas testé avec Slony-I v$1},
+ 'wrn_failover_issues' => q{ATTENTION: Slony-I v$1 peut lutter pour basculer correctement avec plusieurs nœuds défaillants (affecte v2.0-2.1)},
+ 'note_autofail_fwd_only' => q{AVIS: Versions antérieures à la 2.2 Slony ne peuvent pas initier le basculement de seulement échoué transmettre fournisseurs},
+ 'note_fail_sub_only' => q{AVIS: Versions antérieures à la 2.2 Slony ne peuvent pas basculer abonnes seuls les noeuds, revenant à failover_offile_subscriber_only = false},
'note_multiple_try' => q{AVIS: Vous ne pouvez pas verrouiller plusieurs ensembles dans des blocs try dans la version $1 de retomber à des jeux simples},
'note_reshape_cluster' => q{AVIS: Vous devez supprimer les abonnés défaillants ou les ramener, puis réessayez à MOVE SET},
'err_generic' => q{ERREUR: $1},
'err_cluster_empty' => q{ERREUR: Groupe chargé contient pas de noeuds},
'err_cluster_offline' => q{ERREUR: Groupe chargé contient pas de noeuds accessibles},
'err_cluster_lone' => q{ERRRUE: Groupe chargé ne contient que 1 noeud},
- 'err_not_origin' => q{ERREUR: Nœud $1 n'est pas à l'origine de tous les jeux},
+ 'err_not_origin' => q{ERREUR: Noeud $1 n'est pas à l'origine de tous les jeux},
'err_not_provider' => q{ERREUR: Noeud $1 n'est pas un fournisseur de tous les jeux},
'err_not_provider_sets' => q{ERREUR: Noeud $1 ne fournit pas les ensembles nécessaires: le besoin ($2), mais fournit ($3)},
'err_no_configuration' => q{ERREUR: Impossible de lire la configuration pour le noeud $1},
}
}
-# Fill in any missing values with defaults or display message and die
+# Display message and die if any of the required configuration variables are missing
if (!defined($g_dbname)) {
println(lookupMsg('err_no_database'));
die lookupMsg('usage');
printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_cluster_empty'));
cleanExit(3, "system");
}
-elsif (substr($g_version,0,1) < 2) {
- printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('wrn_not_tested', $g_version));
-}
else {
printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('load_cluster_success', $g_version, $g_clname, $g_node_count, $g_dbhost, $g_dbport, $g_dbname));
printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('script_settings', $g_lockset_method, $g_failover_method, uc($g_resubscribe_method)));
printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_3'));
printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_4'));
+ printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_drop_nodes'));
+ $g_input = <>;
+ if ($g_input ~~ /^[Y|O]$/i) {
+ $g_drop_failed = true;
+ }
+
printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_reason'));
$g_reason = <>;
printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_continue'));
###########################################################################################################################################
+# Display exit message, insert log file into database if requested, delete any pid files and exit with the requested code
sub cleanExit {
my $exit_code = shift;
my $type = shift;
exit($exit_code);
}
+# Exit on caught signal
sub sigExit {
cleanExit(100,'signal');
}
+# Check we can reach each node in the cluster and that it contains the Slony schema
sub checkNodes {
my $clname = shift;
my $dbuser = shift;
return ($result_count, $critical_count);
}
+# Load information on all nodes in the Slony cluster into global @g_cluster:
+# 0) no_id = Node id of this node
+# 1) no_provs = Comma separated list of all provider node ids
+# 2) no_conninfo = Conninfo as recorded in sl_path
+# 3) origin_sets = Comma separated list of set ids originating on this node
+# 4) no_name = Node name; this is extracted from text between parentheses in sl_node.no_comment
+# 5) no_sub_tree = Text representation of subscriptions in the form n<provider node id>->(s<set id>, ..)
+# 6) no_status = Text representing the state of the node; either ACTIVE,INACTIVE or FAILED
+# 7) sub_sets = Comma separated list of all set ids this node is subscribed to
+# 8) no_sub_tree_name = As per no_sub_tree but holds textual names extracted from sl_node.no_comment
+# 9) prov_sets_active = Comma separated list of all set ids this node is actively forwarding
+# 10) prov_sets = Comma separated list of all set ids this node is subscribed to and able to forward
sub loadCluster {
my $dbconninfo = shift;
my $clname = shift;
SELECT a.no_id, b.sub_provider AS no_prov,
COALESCE(c.pa_conninfo,(SELECT pa_conninfo FROM $qw_clname.sl_path WHERE pa_server = $qw_clname.getlocalnodeid(?) LIMIT 1)) AS no_conninfo,
array_to_string(array(SELECT set_id FROM $qw_clname.sl_set WHERE set_origin = a.no_id ORDER BY set_id),',') AS origin_sets,
- string_agg(CASE WHEN b.sub_receiver = a.no_id AND b.sub_forward AND b.sub_active THEN b.sub_set::text END, ',' ORDER BY b.sub_set) AS prov_sets,
+ string_agg(CASE WHEN b.sub_receiver = a.no_id AND b.sub_forward AND b.sub_active THEN b.sub_set::text END, ',' ORDER BY b.sub_set) AS sub_sets,
coalesce(trim(regexp_replace(substring(a.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_','g')), 'node' || a.no_id) AS no_name,
'n' || b.sub_provider || '->(' || string_agg(CASE WHEN b.sub_receiver = a.no_id THEN 's' || b.sub_set END,',' ORDER BY b.sub_set,',') || ')' AS sub_tree,
coalesce(trim(regexp_replace(substring(d.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_','g')), 'node' || b.sub_provider, '')
|| '->(' || string_agg(CASE WHEN b.sub_receiver = a.no_id THEN coalesce(trim(regexp_replace(e.set_comment, '[^0-9A-Za-z]', '_', 'g')), 'set' || b.sub_set) END,',' ORDER BY b.sub_set) || ')' AS sub_tree_name,
CASE " . ((substr($version,0,3) >= 2.2) ? "WHEN a.no_failed THEN 'FAILED' " : "") . "WHEN a.no_active THEN 'ACTIVE' ELSE 'INACTIVE' END AS no_status,
array_to_string(array(SELECT DISTINCT sub_set::text FROM $qw_clname.sl_subscribe WHERE sub_provider = a.no_id AND sub_active ORDER BY sub_set),',') AS prov_sets_active,
- string_agg(CASE WHEN b.sub_receiver = a.no_id THEN b.sub_set::text END,',' ORDER BY b.sub_set,',') AS sub_sets
+ string_agg(CASE WHEN b.sub_receiver = a.no_id THEN b.sub_set::text END,',' ORDER BY b.sub_set,',') AS prov_sets
FROM $qw_clname.sl_node a
LEFT OUTER JOIN $qw_clname.sl_subscribe b ON a.no_id = b.sub_receiver
LEFT OUTER JOIN $qw_clname.sl_path c ON c.pa_server = a.no_id AND c.pa_client = $qw_clname.getlocalnodeid(?)
no_name,
nullif(string_agg(sub_tree, ';' ORDER BY sub_tree),'') AS no_sub_tree,
no_status,
- nullif(string_agg(prov_sets::text, ',' ORDER BY prov_sets),'') AS prov_sets,
+ nullif(string_agg(sub_sets::text, ',' ORDER BY prov_sets),'') AS sub_sets,
nullif(string_agg(sub_tree_name, ';' ORDER BY sub_tree_name),'') AS no_sub_tree_name,
nullif(string_agg(prov_sets_active::text, ',' ORDER BY prov_sets_active),'') AS prov_sets_active,
- nullif(string_agg(sub_sets::text, ',' ORDER BY sub_sets),'') AS no_subs
+ nullif(string_agg(prov_sets::text, ',' ORDER BY sub_sets),'') AS prov_sets
FROM z GROUP BY no_id, no_conninfo, no_name, no_status";
$sth = $dbh->prepare($query);
$sth->execute();
while (my @node = $sth->fetchrow) {
+ #printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', join(' - ', @node)));
push(@g_cluster, \@node);
}
die lookupMsg('err_pgsql_connect');
}
else {
- if (substr($version,0,1) < 2) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version));
- }
+ #if (substr($version,0,1) < 2) {
+ # printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version));
+ #}
if (($g_use_try_blocks) && ($g_lockset_method eq 'multiple') && (substr($version,0,3) <= 9.9)) {
# It's currently not possible to lock multiple sets at a time within a try block (v2.2.2), leave the logic in and set a high version number for now.
printlogln($prefix,$logfile,$log_prefix, lookupMsg('note_multiple_try', $version));
$g_failover_method = 'new';
$g_resubscribe_method = 'resubscribe';
}
+ else {
+ unless ($g_silence_notice) {
+ if ((substr($version,0,3) >= 2.0) && (substr($version,0,3) < 2.2)) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_failover_issues', $version));
+ }
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('note_autofail_fwd_only'));
+ $g_silence_notice = true;
+ }
+ if ($g_fail_subonly) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('note_fail_sub_only'));
+ $g_fail_subonly = false;
+ }
+ }
+
}
return (scalar(@g_cluster), $version);
}
+# Load all sets originating on a node into global @g_sets
sub loadSets {
my $dbconninfo = shift;
my $clname = shift;
return scalar(@g_sets);
}
+# Load information regarding replication lag from sl_status into @g_lags
+# If loading from a node that is not the intended origin then this information might not be that accurate/useful
sub loadLag {
my $dbconninfo = shift;
my $clname = shift;
return scalar(@g_lags);
}
+# Prompt user for nodes to an from in interactive mode and do some checking
sub chooseNode {
my $type = shift;
my $prefix = shift;
return $choice;
}
+# Write a slonik preamble section using information pulled into @g_cluster and @g_sets by loadCluster() and loadSets() functions
sub writePreamble {
my $filename = shift;
my $dbconninfo = shift;
}
elsif (!$g_fail_subonly) {
foreach my $unresponsive (@g_unresponsive) {
- if (($_->[0] == $unresponsive->[0]) && !defined($_->[9])) {
+ if (($_->[0] == $unresponsive->[0]) && !defined($_->[9]) && ($g_failover_method eq 'new')) {
$line_prefix = "# (Node $_->[0] unavailable subscriber only) ";
}
}
return $success;
}
+# Write slonik commands to move sets
sub writeMoveSet {
my $prefix = shift;
my $dbconninfo = shift;
@subsets = (split(',', $setlist)) ;
if ($g_debug) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_resubscribe', $_->[1], $_->[0]), $other_subs->[0], $other_subs->[4], $setlist, $setlist_name, $node, $node_name);
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_resubscribe', $_->[1], $_->[0], $other_subs->[0], $other_subs->[4], $setlist, $setlist_name, $node, $node_name));
}
if ($_->[0] ~~ @subsets) {
return $filename;
}
+# Write slonik commands to failover sets
sub writeFailover {
my $prefix = shift;
my $dbconninfo = shift;
my $event_node;
my ($year, $month, $day, $hour, $min, $sec) = (localtime(time))[5,4,3,2,1,0];
my $filetime = sprintf ("%02d_%02d_%04d_%02d:%02d:%02d", $day, $month+1, $year+1900, $hour, $min, $sec);
+ my $sets = false;
+
+ my $subprov_idx;
+ my @subprov_name;
+ my ($node, $setlist);
+ my ($node_name, $setlist_name);
+ my @subsets;
+ my @subsets_name;
+ my $set_idx;
+ my @dropped;
if (defined($from) && defined($to)) {
$filename = $prefix . "/" . $clname . "-failover_from_" . $from . "_to_" . $to . "_on_" . $filetime . ".scr";
$filename = $prefix . "/" . $clname . "-autofailover_on_" . $filetime . ".scr";
}
- unless (writePreamble($filename, $dbconninfo, $clname, $dbuser, $dbpass, false, $aliases, $prefix, $logfile, $log_prefix, false)) {
+ if ($g_failover_method ne 'new') {
+ # For pre 2.2 failover with multiple nodes, we attempt to resubscribe sets and drop other failed providers;
+ # This will never work as well as 2.2+ failover behaviour (infact failover may not work as all in 2.0/2.1 with multiple failed nodes)
+ # We also need to define the sets in the preamble for this.
+ $sets = true;
+ }
+
+ unless (writePreamble($filename, $dbconninfo, $clname, $dbuser, $dbpass, $sets, $aliases, $prefix, $logfile, $log_prefix, false)) {
printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_incomplete_preamble'));
}
printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_failover_method',$g_failover_method));
}
+ # If we are on pre 2.2 we need to drop failed subscriber nodes first regardless
+ if ($g_failover_method ne 'new') {
+ foreach (@g_failed) {
+ if (!defined($_->[3])) {
+ foreach my $backup (@g_cluster) {
+ if ($backup->[0] == $g_backups{$_->[0]}) { # this backup node candidate is in the list of suitable nodes for {failed node}
+ foreach my $subscriber (@g_cluster) {
+ if (defined($subscriber->[1]) && $subscriber->[1] == $_->[0] && $subscriber->[0] != $backup->[0]) {
+ # mess here needs cleaning up
+ @subprov_name = (split(';', $subscriber->[8]));
+ $subprov_idx = 0;
+ foreach my $subprov (split(';', $subscriber->[5])) {
+ ($node, $setlist) = (split('->', $subprov)) ;
+ ($node_name, $setlist_name) = (split('->', $subprov_name[$subprov_idx])) ;
+ $subprov_idx++;
+ $node =~ s/n//g;
+
+ if ($node == $_->[0]) {
+ if ($aliases) {
+ print SLONFILE ("ECHO 'Resubscribing all sets on receiver $subscriber->[4] provided by other failed node $_->[4] to backup node $backup->[4]';\n");
+ }
+ else {
+ print SLONFILE ("ECHO 'Resubscribing all sets on receiver $subscriber->[0] provided by other failed node $_->[0] to backup node $backup->[0]';\n");
+ }
+ $setlist =~ s/(\)|\(|s)//g;
+ @subsets = (split(',', $setlist));
+ $setlist_name =~ s/(\)|\()//g;
+ @subsets_name = (split(',', $setlist_name));
+
+ $set_idx = 0;
+ foreach my $subset (@subsets) {
+ if ($aliases) {
+ print SLONFILE ("SUBSCRIBE SET (ID = \@$subsets_name[$set_idx], PROVIDER = \@$backup->[4], RECEIVER = \@$subscriber->[4], FORWARD = YES);\n");
+ print SLONFILE ("WAIT FOR EVENT (ORIGIN = \@$backup->[4], CONFIRMED = \@$subscriber->[4], WAIT ON = \@$backup->[4]);\n");
+ }
+ else {
+ print SLONFILE ("SUBSCRIBE SET (ID = $subset, PROVIDER = $backup->[0], RECEIVER = $subscriber->[0], FORWARD = YES);\n");
+ print SLONFILE ("WAIT FOR EVENT (ORIGIN = $backup->[0], CONFIRMED = $subscriber->[0], WAIT ON = $backup->[0]);\n");
+ }
+ $set_idx++;
+ }
+ print SLONFILE ("\n");
+ }
+ }
+
+ if ($aliases) {
+ print SLONFILE ("ECHO 'Dropping other failed node $_->[4] ($_->[0])';\n");
+ print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n\n");
+ }
+ else {
+ print SLONFILE ("ECHO 'Dropping other failed node $_->[0]';\n");
+ print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n\n");
+ }
+ push(@dropped, $_->[0]);
+ }
+ else {
+ # The node is failed, but there are no downstream subscribers
+ }
+ }
+ last;
+ }
+ }
+ }
+ }
+ }
+
foreach (@g_failed) {
- foreach my $backup (@g_cluster) {
- if ($backup->[0] == $g_backups{$_->[0]}) {
- ## Here we have both details of the backup node and the failed node
- if ($aliases) {
- print SLONFILE ("ECHO 'Failing over slony cluster from $_->[4] (id $_->[0]) to $backup->[4] (id $backup->[0])';\n");
+ if (($g_failover_method eq 'new') || defined($_->[3])) {
+ foreach my $backup (@g_cluster) {
+ if ($backup->[0] == $g_backups{$_->[0]}) {
+ ## Here we have both details of the backup node and the failed node
+ if ($aliases) {
+ print SLONFILE ("ECHO 'Failing over slony cluster from $_->[4] (id $_->[0]) to $backup->[4] (id $backup->[0])';\n");
+ }
+ else {
+ print SLONFILE ("ECHO 'Failing over slony cluster from node $_->[0] to node $backup->[0]';\n");
+ }
+ last;
}
- else {
- print SLONFILE ("ECHO 'Failing over slony cluster from node $_->[0] to node $backup->[0]';\n");
- }
- last;
}
}
}
print SLONFILE ("FAILOVER (\n\t");
$written = 0;
foreach (@g_failed) {
- foreach my $backup (@g_cluster) {
-
- if ($backup->[0] == $g_backups{$_->[0]}) {
- ## Here we have both details of the backup node and the failed node
- if ($g_failover_method eq 'new') {
- if( $written != 0 ) {
- print SLONFILE (",\n\t");
+ if (($g_failover_method eq 'new') || defined($_->[3])) {
+ foreach my $backup (@g_cluster) {
+ if ($backup->[0] == $g_backups{$_->[0]}) {
+ ## Here we have both details of the backup node and the failed node
+ if ($g_failover_method eq 'new') {
+ if( $written != 0 ) {
+ print SLONFILE (",\n\t");
+ }
+ print SLONFILE ("NODE = (");
}
- print SLONFILE ("NODE = (");
- }
- else {
- if( $written != 0 ) {
- print SLONFILE ("\n);\nFAILOVER (\n\t");
+ else {
+ if( $written != 0 ) {
+ print SLONFILE ("\n);\nFAILOVER (\n\t");
+ }
}
+ if ($aliases) {
+ print SLONFILE ("ID = \@$_->[4], BACKUP NODE = \@$backup->[4]");
+ }
+ else {
+ print SLONFILE ("ID = $_->[0], BACKUP NODE = $backup->[0]");
+ }
+ if ($g_failover_method eq 'new') {
+ print SLONFILE (")");
+ }
+ last;
}
- if ($aliases) {
- print SLONFILE ("ID = \@$_->[4], BACKUP NODE = \@$backup->[4]");
- }
- else {
- print SLONFILE ("ID = $_->[0], BACKUP NODE = $backup->[0]");
- }
- if ($g_failover_method eq 'new') {
- print SLONFILE (")");
- }
- last;
}
+ $written++;
}
- $written++;
}
- print SLONFILE ("\n);\n");
+ print SLONFILE ("\n);\n\n");
if ($g_drop_failed) {
-
-
if (($g_failover_method eq 'new') && (scalar(@g_failed) > 1)) {
foreach (@g_failed) {
if ($aliases) {
- print SLONFILE ("ECHO 'Dropping node $_->[4] ($_->[0])';\n");
+ print SLONFILE ("ECHO 'Dropping failed node $_->[4] ($_->[0])';\n");
}
else {
- print SLONFILE ("ECHO 'Dropping node $_->[0]';\n");
+ print SLONFILE ("ECHO 'Dropping failed node $_->[0]';\n");
}
}
if( $written != 0 ) {
print SLONFILE (",");
}
- ## Don;t bother being pissy and trying to define array values
+ ## Don't bother trying to define array values
#if ($aliases) {
# print SLONFILE "\@$_->[4]";
#}
#}
$written++;
}
- else {
+ elsif (($g_failover_method eq 'new') || defined($_->[3]) || !($_->[0] ~~ @dropped)) {
if ($aliases) {
- print SLONFILE ("ECHO 'Dropping node $_->[4] ($_->[0])';\n");
- print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n");
+ print SLONFILE ("ECHO 'Dropping failed node $_->[4] ($_->[0])';\n");
+ print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n\n");
}
else {
- print SLONFILE ("ECHO 'Dropping node $_->[0]';\n");
- print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n");
+ print SLONFILE ("ECHO 'Dropping failed node $_->[0]';\n");
+ print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n\n");
}
}
last;
}
+# Used to return informational text from the %message hashes, pretty much entirely stolen from check_postgres (http://bucardo.org)
sub lookupMsg {
my $name = shift || '?';
my $line_call;
return $text;
}
+# Trim quotes off a string
sub qtrim {
my $string = shift;
$string =~ s/^('|")+//;
return $string;
}
+# Trim a string
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
return $string;
}
+# Print command with a linefeed
sub println {
print ((@_ ? join($/, @_) : $_), $/);
}
+# Print to stdout and the logfile, doing some replacements allong the way for logging
sub printlog {
my $prefix = shift;
my $logfile_name = shift;
}
}
+# Printlog command with a linefeed
sub printlogln {
printlog ($_[0], $_[1], $_[2], $_[3] . $/);
}
+# Insert details of any action into a database table
sub logDB {
my $dbconninfo = shift;
my $dbuser = shift;
return true;
}
+# Returns a uuid used for the failover script directory
sub getUUID {
my $date_string = shift;
my $g_ug = new Data::UUID;
return $g_uuid_str;
}
+# Write out a PID file
sub writePID {
my $prefix = shift;
my $logfile = shift;
return $success;
}
+# Remove the PID file
sub removePID {
my $prefix = shift;
my $logfile = shift;
return $success;
}
+# Check all sets from an originationg node are contained in the list provided by another node
sub checkProvidesAllSets {
my ($originSets, $providerSets) = @_;
my %test_hash;
return !%test_hash; # return false if any keys are left in the hash
}
+# Check any sets from an originationg node are contained in the list subscribed to by another node
sub checkSubscribesAnySets {
my ($originSets, $subscriberSets) = @_;
my $before;
return ($before != $after); # return false if no keys were removed from the hash
}
+# Read configuration details from a configuration file
sub getConfig {
my $cfgfile = shift;
my @fields;
when(/\bautofailover_config_any_node\b/i) {
$g_autofailover_config_any = checkBoolean($value);
}
+ when(/\bautofailover_perspective_sleep_time\b/i) {
+ $g_autofailover_perspective_sleep = checkInteger($value);
+ }
+ when(/\bautofailover_majority_only\b/i) {
+ $g_autofailover_majority_only = checkBoolean($value);
+ }
+ when(/\bautofailover_is_quorum\b/i) {
+ $g_autofailover_is_quorum = checkBoolean($value);
+ }
}
}
}
return $success;
}
+# Interpret a textual representation of a boolean value
sub checkBoolean {
my $text = shift;
my $value = undef;
return $value;
}
+# Check if a text value is a valid integer
sub checkInteger {
my $integer = shift;
my $value = undef;
return $value;
}
-
+# Run a slonik command and capture all output via autoflushing channel
sub runSlonik {
my $script = shift;
my $prefix = shift;
return $success;
}
+# Experimental logic to watch the cluster status and perform an automatic failover
sub autoFailover {
my $dbconninfo = shift;
my $clname = shift;
my $actions;
my $current_retry;
my $cluster_loaded;
+ my $cluster_loaded_from;
my @cluster;
my $node_count;
my $version;
@cluster = @g_cluster;
die lookupMsg('err_cluster_lone') if ($node_count == 1);
$cluster_loaded = true;
+ $cluster_loaded_from = 'conninfo specified in config';
};
if ($@) {
printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from supplied configuration'));
@cluster = @g_cluster;
die lookupMsg('err_cluster_lone') if ($node_count == 1);
$cluster_loaded = true;
+ $cluster_loaded_from = $_->[0];
};
if ($@) {
printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from node ' . $_->[0] . ': trying next node'));
}
if ($cluster_loaded) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count));
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count, $cluster_loaded_from));
$cluster_time = time();
}
else {
}
}
if ($failed > 0) {
- $actions = findBackup($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
- if ($actions > 0) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_proceed'));
- foreach my $failed ( keys %g_backups ) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_detail', $failed, $g_backups{$failed}));
- }
- $g_script = writeFailover($prefix, $dbconninfo, $clname, $dbuser, $dbpass, undef, undef, $g_subs_follow_origin, $g_use_comment_aliases, $logfile, $log_prefix);
- unless (runSlonik($g_script, $prefix, $logfile, $log_prefix)) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script));
- }
- $cluster_loaded = false;
- #print "SCRIPT: $g_script\n";
- #exit(0);
- }
- else {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_halt', $failed));
+ if ((!$g_autofailover_majority_only || checkSplit($prefix, $logfile, $log_prefix)) && (($g_autofailover_perspective_sleep <= 0) || checkPerspective($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix))) {
+ $actions = findBackup($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
+ if ($actions > 0) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_proceed'));
+ foreach my $failed ( keys %g_backups ) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_detail', $failed, $g_backups{$failed}));
+ }
+ $g_script = writeFailover($prefix, $dbconninfo, $clname, $dbuser, $dbpass, undef, undef, $g_subs_follow_origin, $g_use_comment_aliases, $logfile, $log_prefix);
+ unless (runSlonik($g_script, $prefix, $logfile, $log_prefix)) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script));
+ }
+ $cluster_loaded = false;
+
+ #print "SCRIPT: $g_script\n";
+ #exit(0);
+ }
+ else {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_halt', $failed));
+ }
}
}
usleep($g_autofailover_poll_interval * 1000);
}
}
+# Count of failed and live nodes to perform very basic split-brain check
+sub checkSplit {
+ my $prefix = shift;
+ my $logfile = shift;
+ my $log_prefix = shift;
+
+ my $majority = false;
+ my $failed = scalar(@g_unresponsive);
+ my $survivers = (scalar(@g_cluster) - scalar(@g_unresponsive));
+
+ if ($survivers > $failed) {
+ $majority = true;
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_split_check', $survivers, ($survivers+$failed)));
+ }
+ elsif (($survivers == $failed) && $g_autofailover_is_quorum) {
+ $majority = true;
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_split_check', ($survivers . '+quorum'), ($survivers+$failed)));
+ }
+ else {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_split_check_fail', $survivers));
+ }
+
+ return $majority;
+}
+
+# Check each nodes perspective of the failure to try to ensure the issue isn't that this script just can't connect to the origin/provider
+# The idea here is just to wait for a short period of time and see if the lag time for the nodes has increased by the same amount
+sub checkPerspective {
+ my $clname = shift;
+ my $dbuser = shift;
+ my $dbpass = shift;
+ my $prefix = shift;
+ my $logfile = shift;
+ my $log_prefix = shift;
+
+ my $dsn;
+ my $dbh;
+ my $sth;
+ my $query;
+ my $qw_clname;
+ my $param_on;
+ my $agreed = false;
+ my @unresponsive_ids;
+ my $lag_idx;
+ my $lag_confirmed;
+ my @lag_info1;
+ my @lag_info2;
+ my $bad = 0;
+
+ foreach (@g_unresponsive) {
+ push(@unresponsive_ids, $_->[0]);
+ }
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check', join(", ", @unresponsive_ids), scalar(@g_unresponsive), scalar(@g_cluster)));
+
+ foreach (@g_cluster) {
+ unless ($_->[0] ~~ @unresponsive_ids) {
+ $dsn = "DBI:Pg:$_->[2];";
+ eval {
+ $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
+ $qw_clname = $dbh->quote_identifier("_" . $clname);
+
+ $query = "SELECT a.st_origin, a.st_received, extract(epoch from a.st_lag_time)::integer
+ FROM _test_replication.sl_status a
+ INNER JOIN _test_replication.sl_node b on a.st_origin = b.no_id
+ INNER JOIN _test_replication.sl_node c on a.st_received = c.no_id
+ WHERE a.st_received IN (" . substr('?, ' x scalar(@unresponsive_ids), 0, -2) . ") ORDER BY a.st_origin, a.st_received;";
+
+ $sth = $dbh->prepare($query);
+
+ $param_on = 1;
+ foreach (@unresponsive_ids) {
+ $sth->bind_param($param_on, $_);
+ $param_on++;
+ }
+ $sth->execute();
+
+ while (my @node_lag = $sth->fetchrow) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_data', 'Check1', $_->[0], $node_lag[0], $node_lag[1], $node_lag[2]));
+ push(@lag_info1, \@node_lag);
+ }
+
+ $sth->finish;
+ $dbh->disconnect();
+ };
+ if ($@) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_fail', $_->[0], $@));
+ $bad++;
+ }
+ }
+ }
+
+ if ($bad == 0) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_sleep', $g_autofailover_perspective_sleep));
+ usleep($g_autofailover_perspective_sleep * 1000);
+
+ foreach (@g_cluster) {
+ unless ($_->[0] ~~ @unresponsive_ids) {
+ $dsn = "DBI:Pg:$_->[2];";
+ eval {
+ $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
+ $qw_clname = $dbh->quote_identifier("_" . $clname);
+
+ $query = "SELECT a.st_origin, a.st_received, extract(epoch from a.st_lag_time)::integer
+ FROM _test_replication.sl_status a
+ INNER JOIN _test_replication.sl_node b on a.st_origin = b.no_id
+ INNER JOIN _test_replication.sl_node c on a.st_received = c.no_id
+ WHERE a.st_received IN (" . substr('?, ' x scalar(@unresponsive_ids), 0, -2) . ") ORDER BY a.st_origin, a.st_received;";
+
+ $sth = $dbh->prepare($query);
+
+ $param_on = 1;
+ foreach (@unresponsive_ids) {
+ $sth->bind_param($param_on, $_);
+ $param_on++;
+ }
+ $sth->execute();
+
+ while (my @node_lag = $sth->fetchrow) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_data', 'Check2', $_->[0], $node_lag[0], $node_lag[1], $node_lag[2]));
+ push(@lag_info2, \@node_lag);
+ }
+
+ $sth->finish;
+ $dbh->disconnect();
+ };
+ if ($@) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_fail', $_->[0], $@));
+ $bad++;
+ }
+ }
+ }
+
+ $lag_idx = 0;
+ $lag_confirmed = 0;
+ foreach (@lag_info1) {
+ if ($g_debug) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', ("Node $_->[0] lag between checks on node $_->[1] is " . ($lag_info2[$lag_idx]->[2]-$_->[2]) . " seconds")));
+ }
+
+ if ((($lag_info2[$lag_idx]->[2]-$_->[2])*1000) >= $g_autofailover_perspective_sleep) {
+ $lag_confirmed++;
+ }
+ $lag_idx++;
+ }
+ }
+
+ if ($bad > 0) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_unknown'));
+ }
+ elsif ($lag_idx == $lag_confirmed) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_true'));
+ $agreed = true;
+ }
+ else {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_pspec_check_false'));
+ }
+
+ return $agreed;
+}
+
+# Check if any nodes have failed by connecting and probing the Slony schema
sub checkFailed {
my $clname = shift;
my $dbuser = shift;
$sth->finish;
$dbh->disconnect();
+
+ if (exists($g_unresponsive_subonly{$_->[0]})) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_recovery_subonly', $_->[0]));
+ delete $g_unresponsive_subonly{$_->[0]};
+ }
};
if ($@) {
if ($g_debug) {
}
push(@g_unresponsive, \@$_);
if ((defined($_->[3])) || ($g_autofailover_provs && defined($_->[9]))) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0]));
- $prov_failed++;
+ if (defined($_->[3])) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0]));
+ }
+ else {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_prov', $_->[0]));
+ }
+ unless ($g_failover_method ne 'new' && !defined($_->[3])) {
+ $prov_failed++;
+ }
}
else {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0]));
+ if (!exists($g_unresponsive_subonly{$_->[0]})) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0]));
+ $g_unresponsive_subonly{$_->[0]} = true;
+ }
if ($g_fail_subonly) {
$subonly_failed++;
}
}
}
+# Attempt to try and find the most suitable backup node for a failed node
sub findBackup {
my $clname = shift;
my $dbuser = shift;
my $query;
my $qw_clname;
my $result_count = 0;
- my $lowest_lag;
+ my $lowest_lag_time;
+ my $latest_last_event;
my $best_node_id;
my $best_node_is_direct;
+ my $best_node_can_forward;
my @sets_from;
my @sets_to;
+ my @sets_to_prov;
my %backup_for_set_chosen;
undef %g_backups;
printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_find', ($_->[9] // "none"), $_->[0]));
undef $best_node_id;
- $lowest_lag = (1<<$Config{ivsize}*8-1)-1;
+ $lowest_lag_time = (1<<$Config{ivsize}*8-1)-1;
+ $latest_last_event = 0;
+ $best_node_is_direct = false;
+ $best_node_can_forward = false;
if (defined($_->[9]) && (exists $backup_for_set_chosen{$_->[9]})) {
$best_node_id = $backup_for_set_chosen{$_->[9]};
if ($g_debug) {
printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_check_sub',$subscriber->[0]));
}
-
+
+ # Here the strings containing the sets are converted to arrays
+ # Origin / Forwarded sets
+ undef @sets_to_prov;
+ if (defined($subscriber->[10])) {
+ @sets_to_prov = split(',',$subscriber->[10]);
+ }
+ if (defined($subscriber->[3])) {
+ if (@sets_to_prov) {
+ @sets_to_prov = (@sets_to_prov, split(',',$subscriber->[3]));
+ }
+ else {
+ @sets_to_prov = split(',',$subscriber->[3]);
+ }
+ }
+ if (!defined($subscriber->[10]) && !defined($subscriber->[3])) {
+ @sets_to_prov = (0);
+ }
+
+ # Origin / Subscribed sets.
+ undef @sets_to;
+ if (defined($subscriber->[7])) {
+ @sets_to = split(',',$subscriber->[7]);
+ }
+ if (defined($subscriber->[3])) {
+ if (@sets_to) {
+ @sets_to = (@sets_to, split(',',$subscriber->[3]));
+ }
+ else {
+ @sets_to = split(',',$subscriber->[3]);
+ }
+ }
+ if (!defined($subscriber->[7]) && !defined($subscriber->[3])) {
+ @sets_to = (0);
+ }
+
+ # Sets provided by the failed node.
+ undef @sets_from;
+ if (defined($_->[9])) {
+ @sets_from = split(',',$_->[9]);
+ }
+ else {
+ @sets_from = (0);
+ @sets_to = (0);
+ }
+
$dsn = "DBI:Pg:$subscriber->[2]";
eval {
$dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
$qw_clname = $dbh->quote_identifier("_" . $clname);
- $query = "SELECT extract(epoch from a.st_lag_time), (a.st_received = ?) AS direct
- FROM $qw_clname.sl_status a
- INNER JOIN $qw_clname.sl_subscribe b ON b.sub_provider = a.st_received AND b.sub_receiver = a.st_origin
- WHERE b.sub_active
- GROUP BY a.st_lag_time, a.st_received;";
+ #print "\tNODE " . $subscriber->[0] . ") SETS TO = " . join(',', @sets_to) . " SETS FROM = " . join(',', @sets_from) . " SETS TO PROV = " . join(',', @sets_to_prov) . "\n";
+
+ if (defined($subscriber->[3]) && checkProvidesAllSets(\@sets_from, \@sets_to)) {
+ $query = "SELECT 0, ev_seqno, (ev_origin = ?)
+ FROM $qw_clname.sl_event
+ WHERE ev_origin = $qw_clname.getlocalnodeid(?)
+ ORDER BY ev_seqno DESC LIMIT 1";
+ }
+ else {
+ $query = "SELECT extract(epoch from (current_timestamp-a.con_timestamp)), a.con_seqno, (a.con_origin = ?) AS direct
+ FROM $qw_clname.sl_confirm a
+ INNER JOIN $qw_clname.sl_event b on b.ev_seqno = a.con_seqno AND a.con_origin = b.ev_origin
+ INNER JOIN $qw_clname.sl_subscribe c ON c.sub_provider = a.con_origin AND c.sub_receiver = a.con_received
+ WHERE c.sub_active AND a.con_received = $qw_clname.getlocalnodeid(?)
+ ORDER BY a.con_seqno DESC LIMIT 1;";
+ }
$sth = $dbh->prepare($query);
$sth->bind_param(1, $_->[0]);
+ $sth->bind_param(2, "_" . $clname);
$sth->execute();
while (my @subinfo = $sth->fetchrow) {
-
- undef @sets_from;
- if (defined($_->[9])) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], ($subinfo[1] ? "a direct subscriber" : "an indirect subscriber"), $subscriber->[7], $subinfo[0]));
- @sets_from = split(',',$_->[9]);
- @sets_to = split(',',$subscriber->[7]);
- }
- elsif ($g_fail_subonly) {
- # Subscriber only node will have no active sets forwarding sets to check
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], "suitable backup for this subscriber only node" , $subscriber->[7], $subinfo[0]));
- @sets_from = (0);
- @sets_to = (0);
+ # If the failed provider node isn't an origin for any sets, we classify any direct subscribers to it as indeirect
+ # because they are indirect to the origin.
+ if ($subinfo[2] && defined($_->[9]) && !defined($_->[3])) {
+ $subinfo[2] = false;
}
- if ((checkProvidesAllSets(\@sets_from, \@sets_to)) && (($subinfo[0] < $lowest_lag) || (!$best_node_is_direct && $subinfo[1]))) {
- printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_best', $subscriber->[0], $subinfo[0]));
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail',
+ $subscriber->[0],
+ ($subinfo[2]?"directly":"indirectly"),
+ (defined($_->[3])?"origin":(defined($_->[9])?"provider":"subscriber only")),
+ $_->[0],
+ (defined($subscriber->[10])?$subscriber->[10]:(defined($subscriber->[3])?$subscriber->[3]:"<NONE>")),
+ $subinfo[0], $subinfo[1]));
+
+ # If select this node as the backup node if:
+ # 1) The node is a subscriber to all sets on the failed node
+ # 2) In order of preference:
+ # The node is one of the direct subscribers to the failed node on the most recent event and is a forwarding provider
+ # OR
+ # The node is one of the direct subscribers to the failed node on the most recent event and is not a forwarding provider
+ # OR
+ # The node is an indirect subscriber to the failed node with the lowest lag time
+ if (!checkProvidesAllSets(\@sets_from, \@sets_to)) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_unsuitable', $subscriber->[0]));
+ }
+ elsif (($subinfo[2] && (!$best_node_is_direct || $subinfo[1] > $latest_last_event || (!$best_node_can_forward && checkProvidesAllSets(\@sets_from, \@sets_to_prov) && $subinfo[1] == $latest_last_event)))
+ || (!$best_node_is_direct && !$subinfo[2] && $subinfo[0] < $lowest_lag_time)) {
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_best', $subscriber->[0], $subinfo[0], $subinfo[1]));
$best_node_id = $subscriber->[0];
- $lowest_lag = $subinfo[0];
- $best_node_is_direct = $subinfo[1];
+ $lowest_lag_time = $subinfo[0];
+ $latest_last_event = $subinfo[1];
+ $best_node_is_direct = $subinfo[2];
+ $best_node_can_forward = checkProvidesAllSets(\@sets_from, \@sets_to_prov);
}
+
}
};
if ($@) {
}
}
else {
- printlog($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail'));
+ printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail'));
}
}
else {