4 # Copyright: 08/04/2012: v1.0.1 Glyn Astill <glyn@8kb.co.uk>
5 # Requires: Perl 5.10.1+, Data::UUID, File::Slurp
6 # PostgreSQL 9.0+ Slony-I 2.0+
8 # This script is a command-line utility to manage switchover and failover
9 # of replication sets in Slony-I clusters.
11 # This script is free software: you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation, either version 3 of the License, or
14 # (at your option) any later version.
16 # This script is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this script. If not, see <http://www.gnu.org/licenses/>.
27 use Getopt::Long qw/GetOptions/;
31 use sigtrap 'handler' => \&sigExit, 'HUP', 'INT','ABRT','QUIT','TERM';
32 use Time::HiRes qw/usleep gettimeofday/;
33 use POSIX qw/strftime/;
34 use Config qw/%Config/;
36 use constant false => 0;
37 use constant true => 1;
39 my $g_script_version = '1.0.1';
41 my $g_pidfile = '/var/run/slony_failover.pid';
42 my $g_pid_written = false;
44 my $g_prefix = '/tmp/slony_failovers';
45 my $g_separate_working = true;
46 my $g_log_prefix = '%t';
47 my $g_log_to_db = false;
54 my $g_use_try_blocks = false;
55 my $g_lockset_method = 'multiple';
56 my $g_logfile = 'failover.log';
68 my $g_dbuser = 'slony';
70 my $g_available_node_count;
71 my $g_critical_node_count;
72 my $g_subs_follow_origin = false;
73 my $g_use_comment_aliases = false;
74 my @g_cluster; # Array refs of node info. In hindsight this should have been a hash, should be fairly simple to switch.
80 my $g_failover_method = 'old';
81 my $g_resubscribe_method = 'subscribe';
82 my $g_failover = false;
83 my $g_fail_subonly = false;
84 my $g_drop_failed = false;
85 my $g_autofailover = false;
86 my $g_autofailover_poll_interval = 500;
87 my $g_autofailover_retry = 2;
88 my $g_autofailover_retry_sleep = 1000;
89 my $g_autofailover_provs = false;
90 my $g_autofailover_config_any = true;
98 'usage' => q{-h <host> -p <port> -db <database> -c <cluster name> -u <username> -P <password> (Password option not recommended; use pgpass instead)},
99 'title' => q{Slony-I failover script version $1},
100 'cluster_fixed' => q{Aborting failover action: all origin nodes now responsive},
101 'cluster_failed' => q{Found $1 failed nodes, sleeping for $2ms before retry $3 of $4},
102 'load_cluster' => q{Getting a list of database nodes...},
103 'load_cluster_fail' => q{Unable to read cluster configuration $1},
104 'load_cluster_success' => q{Loaded Slony-I v$1 cluster "$2" with $3 nodes read from node at $4:$5/$6},
105 'lag_detail' => q{Current node lag information from configuration node:},
106 'script_settings' => q{Using $1 batches of lock set, $2 FAILOVER and $3},
107 'generated_script' => q{Generated script "$1"},
108 'autofailover_init' => q{Entering autofailover mode},
109 'autofailover_init_cnf' => q{Slony configuration will be read from $1 node},
110 'autofailover_init_pol' => q{Polling every $1ms},
111 'autofailover_init_ret' => q{Failed nodes will be retried $1 times with $2ms sleep},
112 'autofailover_init_set' => q{Failed forwarding providers $1 be failed over},
113 'autofailover_load_cluster' => q{$1 Slony-I v$2 cluster "$3" with $4 nodes},
114 'autofailover_proceed' => q{Proceeding with failover:},
115 'autofailover_detail' => q{Failed node: $1, Backup node: $2},
116 'autofailover_halt' => q{Unable to perform any failover for $1 failed nodes},
117 'autofailover_check_sub' => q{Checking subscriber node $1},
118 'autofailover_check_sub_fail' => q{Unable to check subscriber node $1},
119 'autofailover_promote_find' => q{Finding most up to date subscriber to all sets ($1) on unresponsive node $2},
120 'autofailover_promote_found' => q{Using previously found most up to date subscriber to all sets ($1) on unresponsive node $2},
121 'autofailover_promote_skip' => q{No failover required for unresponsive node $1 as it is neither the origin or an active forwarder of any sets},
122 'autofailover_promote_fail' => q{Could not find suitable backup node for promotion},
123 'autofailover_node_detail' => q{Node $1 is $2 and provides sets $3 at $4 lag},
124 'autofailover_promote_best' => q{Best node for promotion is node $1 seq = $2},
125 'autofailover_unresponsive' => q{Detected unresponsive provider node: $1},
126 'autofailover_unresponsive_subonly'=> q{Detected unresponsive subscriber only node: $1},
127 'interactive_head_id' => q{ID},
128 'interactive_head_name' => q{Name},
129 'interactive_head_status' => q{Status},
130 'interactive_head_providers' => q{Provider IDs},
131 'interactive_head_config' => q{Configuration},
132 'interactive_detail_1' => q{Origin for sets: },
133 'interactive_detail_2' => q{Providing sets: },
134 'interactive_detail_3' => q{Subscriptions: },
135 'interactive_choose_node' => q{Please choose the node to move all sets $1:},
136 'interactive_confirm' => q{You chose to move sets $1 node $2 ($3). Is this correct [y/n]? },
137 'interactive_action' => q{Best course of action is most likely to do a "$1". Do you wish to continue [y/n]?},
138 'interactive_surrender' => q{Uable to determine best course of action},
139 'interactive_write_script' => q{Writing a script to $1 node $2 to $3},
140 'interactive_check_nodes' => q{Checking availability of database nodes...},
141 'interactive_continue' => q{Do you wish to continue [y/n]?},
142 'interactive_preserve' => q{Preserve subscription paths to follow the origin node (choose no if unsure) [y/n]?},
143 'interactive_aliases' => q{Generate aliases based on sl_node/set comments in parentheses (choose no if unsure) [y/n]?},
144 'interactive_summary' => q{Summary of nodes to be passed to failover:},
145 'interactive_node_info' => q{Node : $1 ($2) $3 (conninfo $4)},
146 'interactive_run_script' => q{Would you like to run this script now [y/n]?},
147 'interactive_running' => q{Running the script now. This may take some time; please be patient!},
148 'interactive_reason' => q{Please enter a brief reson for taking this action: },
149 'interactive_failover_detail_1' => q{Before you go any further please consider the impact of a full failover:},
150 'interactive_failover_detail_2' => q{The node you are failing over from will cease to participate in the cluster permanently until it is rebuild & subscribed},
151 'interactive_failover_detail_3' => q{If the outage is temporary (i.e. network/power/easily replaceable hardware related) consider waiting it out},
152 'interactive_failover_detail_4' => q{This type of failover is likely to be more a business decision than a technical one},
153 'info_all_nodes_available' => q{INFO: All nodes are available},
154 'info_req_nodes_available' => q{INFO: $1 of $2 nodes are available. No unavailable nodes are subscribed to the old origin},
155 'wrn_node_unavailable' => q{WARNING: Node $1 unavailable},
156 'wrn_req_unavailable' => q{WARNING: Old origin node ($1) is available, however $2 subscribers are unavailable},
157 'wrn_not_tested' => q{WARNING: Script not tested with Slony-I v$1},
158 'note_multiple_try' => q{NOTICE: Cannot lock multiple sets within try blocks in version $1 dropping back to single sets},
159 'note_reshape_cluster' => q{NOTICE: Either drop the failed subscribers or bring them back up, then retry to MOVE SET},
160 'dbg_generic' => q{DEBUG: $1},
161 'dbg_cluster' => q{DEBUG: NodeID $1/ProvIDs $2/Conninfo $3/OrigSets $4/NodeName $5/ProvTree $6/Active $7/FwdSets $8/ActSubSets $9},
162 'dbg_resubscribe' => q{DEBUG: Checking possibility to resubscribe set $1 ($2) to node $3 ($4) which pulls $5 ($6) from $7 ($8)},
163 'dbg_failover_method' => q{DEBUG: Failover method is $1},
164 'dbg_cluster_load' => q{DEBUG: Loading cluster configuration from $1},
165 'dbg_cluster_good' => q{DEBUG: Cluster state good},
166 'dbg_autofailover_check' => q{DEBUG: Checking node $1 ($2) role is $3 (conninfo: $4)},
167 'dbg_autofailover_active_check' => q{DEBUG: Initiate active check of $1 node $2},
168 'dbg_slonik_script' => q{DEBUG: Running slonik script $1},
169 'err_generic' => q{ERROR: $1},
170 'err_no_database' => q{ERROR: Please specify a database name},
171 'err_no_cluster' => q{ERROR: Please specify a slony cluster name},
172 'err_no_host' => q{ERROR: Please specify a host},
173 'err_no_config' => q{ERROR: No valid config found},
174 'err_fail_config' => q{ERROR: Failed to load configuration},
175 'err_write_fail' => q{ERROR: Could not write to $1 "$2"},
176 'err_read_fail' => q{ERROR: Could not read from $1 "$2"},
177 'err_unlink_fail' => q{ERROR: Could not delete $1 "$2"},
178 'err_mkdir_fail' => q{ERROR: Unable to create $1 directory "$2"},
179 'err_execute_fail' => q{ERROR: Could not execute $1 "$2"},
180 'err_inactive' => q{ERROR: Node $1 is not active (state = $2)},
181 'err_cluster_empty' => q{ERROR: Loaded cluster contains no nodes},
182 'err_cluster_offline' => q{ERROR: Loaded cluster contains no reachable nodes},
183 'err_cluster_lone' => q{ERROR: Loaded cluster contains only 1 node},
184 'err_not_origin' => q{ERROR: Node $1 is not the origin of any sets},
185 'err_not_provider' => q{ERROR: Node $1 is not a provider of any sets},
186 'err_not_provider_sets' => q{ERROR: Node $1 does not provide the sets required: need ($2) but provides ($3)},
187 'err_no_configuration' => q{ERROR: Could not read configuration for node $1},
188 'err_must_enter_node_id' => q{ERROR: You must enter a node id},
189 'err_not_a_node_id' => q{ERROR: I have no knowledge of a node $1},
190 'err_same_node' => q{ERROR: Cant move from and to the same node},
191 'err_node_offline' => q{ERROR: $1 node ($2) is not available},
192 'err_incomplete_preamble' => q{ERROR: Incomplete preamble},
193 'err_running_slonik' => q{ERROR: Could not run slonik: $1},
194 'err_pgsql_connect' => q{ERROR: Cannot connect to postgres server},
195 'slonik_output' => q{SLONIK: $1},
196 'exit_noaction' => q{Exiting, no action has been taken},
197 'exit' => q{Exited by $1}
200 'usage' => q{-h <host> -p <port> -db <database> -c <cluster name> -u <username> -P <password> (Option mot de passe pas recommandé; utiliser pgpass place)},
201 'title' => q{Slony-I failover (basculement) version de script $1},
202 'cluster_fixed' => q{Abandon de l'action de basculement: tous les noeuds d'origine maintenant sensible},
203 'cluster_failed' => q{Trouvé $1 échoué noeuds, couchage pour $2 ms avant réessayer $3 de $4},
204 'load_cluster' => q{Obtenir une liste de noeuds de base de donnees...},
205 'load_cluster_fail' => q{Impossible de lire la configuration du cluster $1},
206 'load_cluster_success' => q{Chargé Slony-I v$1 groupe "$2" avec $3 noeuds lire à partir du noeud à $4:$5/$6},
207 'lag_detail' => q{Current informations noeud de décalage à partir du noeud de configuration:},
208 'script_settings' => q{Utilisation de $1 lots de système de verrouillage, $2 FAILOVER et $3},
209 'generated_script' => q{Script généré "$1"},
210 'autofailover_init' => q{Entrer dans le mode de autofailover},
211 'autofailover_init_cnf' => q{Configuration Slony sera lu à partir de $1 noeud},
212 'autofailover_init_pol' => q{Vérifier toutes les $1ms},
213 'autofailover_init_ret' => q{Noeuds défaillants seront rejugés $1 fois avec $2 ms sommeil},
214 'autofailover_init_set' => q{Fournisseurs d'expédition échoué $1 être échoué sur},
215 'autofailover_load_cluster' => q{$1 Slony-I v$2 grappe "$3" avec $4 noeuds},
216 'autofailover_proceed' => q{De procéder à failover:},
217 'autofailover_detail' => q{Noeud défaillant: $1, noeud de sauvegarde: $2},
218 'autofailover_halt' => q{Noeuds Impossible d'effectuer une failover pour $1 échoué},
219 'autofailover_check_sub' => q{Vérification noeud abonné $1},
220 'autofailover_check_sub_fail' => q{Impossible de vérifier noeud abonné $1},
221 'autofailover_promote_find' => q{Trouver plus à jour abonné à tous les jeux ($1) sur le noeud ne répond pas $2},
222 'autofailover_promote_found' => q{Utilisation précédemment trouvé plus à jour abonné à tous les jeux ($1) sur le noeud ne répond pas $2},
223 'autofailover_promote_skip' => q{Pas de failover requis pour le noeud ne répond pas $1 car il n'est ni l'origine ou un transitaire active de tous les jeux},
224 'autofailover_promote_fail' => q{Impossible de trouver le noeud de sauvegarde approprié pour la promotion},
225 'autofailover_node_detail' => q{Noeud $1 est $2 et fournit des ensembles $3 Ã $4 retard},
226 'autofailover_promote_best' => q{Meilleur noeud pour la promotion est noeud $1 suivants = $2},
227 'autofailover_unresponsive' => q{Noeud ne répond pas détecté: $1},
228 'autofailover_unresponsive_subonly'=> q{Abonné ne répond pas détecté seulement de noeud: $1},
229 'interactive_head_name' => q{Nom},
230 'interactive_head_status' => q{Statut},
231 'interactive_head_providers' => q{Fournisseur IDs},
232 'interactive_detail_1' => q{Origine pour les jeux: },
233 'interactive_detail_2' => q{Fournir des ensembles: },
234 'interactive_detail_3' => q{Abonnements: },
235 'interactive_choose_node' => q{S'il vous plaît choisissez le noeud à déplacer tous les ensembles $1:},
236 'interactive_confirm' => q{Vous avez choisi de passer ensembles $1 noeud $2 ($3). Est-ce correct [o/n]? },
237 'interactive_action' => q{Meilleur plan d'action est le plus susceptible de faire une $1. Voulez-vous continuer [o/n]?},
238 'interactive_surrender' => q{Uable pour déterminer le meilleur plan d'action},
239 'interactive_write_script' => q{Rédaction d'un script à $1 $2 noeud à $3},
240 'interactive_check_nodes' => q{Vérification de la disponibilité des noeuds de base de donnees...},
241 'interactive_continue' => q{Voulez-vous continuer [o/n]?},
242 'interactive_preserve' => q{Préserver les chemins de souscription à suivre le noeud d'origine (ne pas choisir en cas de doute) [o/n]?},
243 'interactive_aliases' => q{Générer des alias sur la base de sl_node / set commentaires entre parenthèses (ne pas choisir en cas de doute) [o/n]?},
244 'interactive_summary' => q{Résumé des noeuds à passer à failover:},
245 'interactive_node_info' => q{Noeud : $1 ($2) $3 (conninfo $4)},
246 'interactive_run_script' => q{Voulez-vous exécuter ce script maintenant [o/n]?},
247 'interactive_running' => q{L'exécution du script maintenant. Cela peut prendre un certain temps; s'il vous plaît être patient!},
248 'interactive_reason' => q{S'il vous plaît entrer une brève reson pour cette action: },
249 'interactive_failover_detail_1' => q{Avant d'aller plus loin s'il vous plaît envisager l'impact d'un failover (basculement) complet:},
250 'interactive_failover_detail_2' => q{Le noeud vous ne parviennent pas au-dessus de cesse de participer au groupe de façon permanente jusqu'à ce qu'il soit à reconstruire et souscrit},
251 'interactive_failover_detail_3' => q{Si la panne est temporaire (c.-à -réseau / alimentation / facilement remplaçable matériel connexe) envisager d'attendre dehors},
252 'interactive_failover_detail_4' => q{Ce type de failover est susceptible d'être plus une décision d'affaires que technique},
253 'info_all_nodes_available' => q{INFO: Tous les noeuds sont disponibles},
254 'info_req_nodes_available' => q{INFO: $1 of $2 noeuds sont disponibles. Pas de noeuds indisponibles sont souscrites à l'ancienne origine},
255 'wrn_node_unavailable' => q{ATTENTION: Noeud $1 disponible},
256 'wrn_req_unavailable' => q{ATTENTION: Noeud Old origine ($1) est disponible, mais $2 abonnés ne sont pas disponibles},
257 'wrn_not_tested' => q{ATTENTION: Script pas testé avec Slony-I v$1},
258 'note_multiple_try' => q{AVIS: Vous ne pouvez pas verrouiller plusieurs ensembles dans des blocs try dans la version $1 de retomber à des jeux simples},
259 'note_reshape_cluster' => q{AVIS: Vous devez supprimer les abonnés défaillants ou les ramener, puis réessayez à MOVE SET},
260 'err_generic' => q{ERREUR: $1},
261 'err_no_database' => q{ERREUR: S'il vous plaît spécifier un base de donnees nom},
262 'err_no_cluster' => q{ERREUR: S'il vous plaît indiquez un nom de cluster slony},
263 'err_no_host' => q{ERREUR: S'il vous plaît spécifier un hôte},
264 'err_no_config' => q{ERREUR: Aucune configuration valide n'a été trouvée},
265 'err_fail_config' => q{ERREUR: Impossible de charger la configuration},
266 'err_write_fail' => q{ERREUR: Impossible d'écrire dans $1 "$2"},
267 'err_read_fail' => q{ERREUR: Impossible de lire $1 "$2"},
268 'err_unlink_fail' => q{ERREUR: Impossible de supprimer $1 "$2"},
269 'err_mkdir_fail' => q{ERREUR: Impossible de créer $1 répertoire "$2"},
270 'err_execute_fail' => q{ERREUR: Impossible d'exécuter $1 "$2"},
271 'err_inactive' => q{ERREUR: Noeud $1 n'est pas active (état = $2)},
272 'err_cluster_empty' => q{ERREUR: Groupe chargé contient pas de noeuds},
273 'err_cluster_offline' => q{ERREUR: Groupe chargé contient pas de noeuds accessibles},
274 'err_cluster_lone' => q{ERRRUE: Groupe chargé ne contient que 1 noeud},
275 'err_not_origin' => q{ERREUR: Nœud $1 n'est pas à l'origine de tous les jeux},
276 'err_not_provider' => q{ERREUR: Noeud $1 n'est pas un fournisseur de tous les jeux},
277 'err_not_provider_sets' => q{ERREUR: Noeud $1 ne fournit pas les ensembles nécessaires: le besoin ($2), mais fournit ($3)},
278 'err_no_configuration' => q{ERREUR: Impossible de lire la configuration pour le noeud $1},
279 'err_must_enter_node_id' => q{ERREUR: Vous devez entrer un id de noeud},
280 'err_not_a_node_id' => q{ERREUR: Je n'ai pas connaissance d'un $1 de noeud},
281 'err_same_node' => q{ERREUR: Cant déplacer depuis et vers le même noeud},
282 'err_node_offline' => q{ERREUR: $1 noeud ($2) n'est pas disponible},
283 'err_incomplete_preamble' => q{ERREUR: Préambule incomplète},
284 'err_running_slonik' => q{ERREUR: Ne pouvait pas courir slonik: $1},
285 'err_pgsql_connect' => q{ERREUR: Impossible de se connecter au serveur postgres},
286 'slonik_output' => q{SLONIK: $1},
287 'exit_noaction' => q{Quitter, aucune action n'a été prise},
288 'exit' => q{Quitter par $1}
293 # Setup date variables
294 my ($g_year, $g_month, $g_day, $g_hour, $g_min, $g_sec) = (localtime(time))[5,4,3,2,1,0];
295 my $g_date = sprintf ("%02d:%02d:%02d on %02d/%02d/%04d", $g_hour, $g_min, $g_sec, $g_day, $g_month+1, $g_year+1900);
297 # Handle command line options
298 Getopt::Long::Configure('no_ignore_case');
300 die lookupMsg('usage') unless GetOptions(\%opt, 'host|H=s', 'port|p=i', 'dbname|db=s', 'clname|cl=s', 'dbuser|u=s', 'dbpass|P=s', 'cfgfile|f=s', 'infoprint|I', ) and keys %opt and ! @ARGV;
303 if (defined($opt{cfgfile})) {
304 unless (getConfig($opt{cfgfile})) {
305 println(lookupMsg('err_no_config'));
310 if (defined($opt{dbname})) {
311 $g_dbname = $opt{dbname};
313 if (defined($opt{clname})) {
314 $g_clname = $opt{clname};
316 if (defined($opt{host})) {
317 $g_dbhost = $opt{host};
319 if (defined($opt{port})) {
320 $g_dbport = $opt{port};
322 if (defined($opt{dbuser})) {
323 $g_dbuser = $opt{dbuser};
325 if (defined($opt{dbpass})) {
326 $g_dbpass = $opt{dbpass};
330 # Fill in any missing values with defaults or display message and die
331 if (!defined($g_dbname)) {
332 println(lookupMsg('err_no_database'));
333 die lookupMsg('usage');
335 if (!defined($g_clname)) {
336 println(lookupMsg('err_no_cluster'));
337 die lookupMsg('usage');
339 if (!defined($g_dbhost)) {
340 println(lookupMsg('err_no_host'));
341 die lookupMsg('usage');
345 # Build conninfo from supplied datbase name/host/port
346 $g_dbconninfo = "dbname=$g_dbname;host=$g_dbhost;port=$g_dbport";
348 if (!defined($opt{infoprint})) {
349 # Check prefix directory and create if not present
350 unless(-e $g_prefix or mkdir $g_prefix) {
351 println(lookupMsg('err_mkdir_fail', 'prefix', $g_prefix));
355 if ($g_separate_working) {
356 if ($g_prefix !~ m/\/$/) {
360 # Get a uuid for working directory
361 $g_prefix .= getUUID($g_date);
363 # Create a working directory and setup log file
364 unless(-e $g_prefix or mkdir $g_prefix) {
365 println(lookupMsg('err_mkdir_fail', 'work', $g_prefix));
370 # Set postgres path if provided
371 if (defined($g_slonikpath) && ($g_slonikpath ne "")) {
372 $ENV{PATH} .= ":$g_slonikpath";
375 # Check if autofailover is enabled, if so check configuration and enter autofailover mode
376 if (($g_autofailover) && !defined($opt{infoprint})) {
378 # Write out a PID file
379 if (writePID($g_prefix, $g_logfile, $g_log_prefix, $g_pidfile)) {
380 $g_pid_written = true;
383 cleanExit(1, "system");
386 # Go into endless loop for autofailover
387 autoFailover($g_dbconninfo, $g_clname, $g_dbuser, $g_dbpass, $g_prefix, $g_logfile, $g_log_prefix);
390 # Read slony configuration and output some basic information
393 println(lookupMsg('load_cluster', $g_prefix));
394 ($g_node_count, $g_version) = loadCluster($g_dbconninfo, $g_clname, $g_dbuser, $g_dbpass, $g_prefix, $g_logfile, $g_log_prefix);
397 println(lookupMsg('load_cluster_fail', 'from supplied configuration'));
398 cleanExit(2, "system");
401 if (defined($opt{infoprint})) {
402 println(lookupMsg('load_cluster_success', $g_version, $g_clname, $g_node_count, $g_dbhost, $g_dbport, $g_dbname) . ":");
403 chooseNode("info", undef, undef, undef, 0);
407 printlog($g_prefix,$g_logfile,$g_log_prefix,"*"x68 . "\n* ");
408 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('title', $g_script_version));
409 printlogln($g_prefix,$g_logfile,$g_log_prefix,"*"x68);
412 if ($g_node_count <= 0) {
413 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_cluster_empty'));
414 cleanExit(3, "system");
416 elsif (substr($g_version,0,1) < 2) {
417 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('wrn_not_tested', $g_version));
420 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('load_cluster_success', $g_version, $g_clname, $g_node_count, $g_dbhost, $g_dbport, $g_dbname));
421 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('script_settings', $g_lockset_method, $g_failover_method, uc($g_resubscribe_method)));
424 # Output lag information between each node and node configuration was read from
425 if (loadLag($g_dbconninfo, $g_clname, $g_dbuser, $g_dbpass, $g_prefix, $g_logfile, $g_log_prefix) > 0) {
426 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('lag_detail'));
428 printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t$_");
430 printlog($g_prefix,$g_logfile,$g_log_prefix,"\n");
433 # Prompt user to choose nodes to move sets from / to
434 $g_node_from = chooseNode("from", $g_prefix, $g_logfile, $g_log_prefix, 0);
435 if ($g_node_from == 0) {
436 cleanExit(4, "user");
438 elsif ($g_node_from == -1) {
439 cleanExit(5, "system");
442 $g_node_to = chooseNode("to", $g_prefix, $g_logfile, $g_log_prefix, $g_node_from);
443 if ($g_node_to == 0) {
444 cleanExit(6, "user");
446 elsif ($g_node_to == -1) {
447 cleanExit(7, "system");
449 elsif ($g_node_from == $g_node_to) {
450 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_same_node'));
451 cleanExit(8, "system");
454 # Check nodes are available and decide on action to take
455 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_check_nodes'));
456 ($g_available_node_count, $g_critical_node_count) = checkNodes($g_clname, $g_dbuser, $g_dbpass, $g_node_from, $g_node_to, $g_prefix, $g_logfile, $g_log_prefix);
458 if ($g_available_node_count <= 0) {
459 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_cluster_offline'));
460 cleanExit(9, "system");
462 elsif ($g_critical_node_count == -1) {
463 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_node_offline', 'Target new origin', $g_node_to));
464 cleanExit(10, "system");
466 elsif ($g_critical_node_count == -2) {
467 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_node_offline', 'Old origin', $g_node_from));
468 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_action', 'FAILOVER'));
471 elsif ($g_critical_node_count == 0) {
472 if ($g_node_count == $g_available_node_count) {
473 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('info_all_nodes_available'));
476 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('info_req_nodes_available', $g_available_node_count, $g_node_count));
478 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_action', 'MOVE SET'));
480 elsif ($g_critical_node_count > 0) {
481 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('wrn_req_unavailable', $g_node_from, $g_critical_node_count));
482 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('note_reshape_cluster'));
483 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('exit_noaction'));
484 cleanExit(11, "user");
487 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_surrender'));
488 cleanExit(12, "system");
492 if ($g_input !~ /^[Y|O]$/i) {
493 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('exit_noaction'));
494 cleanExit(13, "user");
497 if (!$g_use_comment_aliases) {
498 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_aliases'));
501 if ($g_input =~ /^[Y|O]$/i) {
502 $g_use_comment_aliases = true;
507 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_summary'));
509 foreach (@g_failed) {
510 printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_node_info',$_->[0],($_->[4] // "unnamed"),(defined($_->[9]) ? "providing sets $_->[9]" : "sole subscriber"), $_->[2]));
513 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_failover_detail_1'));
514 printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_2'));
515 printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_3'));
516 printlogln($g_prefix,$g_logfile,$g_log_prefix,"\t" . lookupMsg('interactive_failover_detail_4'));
518 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_reason'));
520 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_continue'));
523 if ($g_input !~ /^[Y|O]$/i) {
524 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('exit_noaction'));
525 cleanExit(14, "user");
528 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_write_script', 'failover from', $g_node_from, $g_node_to));
529 $g_script = writeFailover($g_prefix, $g_dbconninfo, $g_clname, $g_dbuser, $g_dbpass, $g_node_from, $g_node_to, $g_subs_follow_origin, $g_use_comment_aliases, $g_logfile, $g_log_prefix);
532 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_preserve'));
535 if ($g_input =~ /^[Y|O]$/i) {
536 $g_subs_follow_origin = true;
539 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_reason'));
542 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_write_script', 'move all sets provided by', $g_node_from, $g_node_to));
543 $g_script = writeMoveSet($g_prefix, $g_dbconninfo, $g_clname, $g_dbuser, $g_dbpass, $g_node_from, $g_node_to, $g_subs_follow_origin, $g_use_comment_aliases, $g_logfile, $g_log_prefix);
546 # Complete and run script if required
548 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('generated_script', $g_script));
549 printlog($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_run_script', $g_script));
552 if ($g_input =~ /^[Y|O]$/i) {
553 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('interactive_running'));
554 unless (runSlonik($g_script, $g_prefix, $g_logfile, $g_log_prefix)) {
555 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script));
559 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('exit_noaction'));
563 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('err_read_fail', 'slonik script', $g_script));
564 cleanExit(15, "system");
567 cleanExit(0, "script completion");
569 ###########################################################################################################################################
572 my $exit_code = shift;
575 printlogln($g_prefix,$g_logfile,$g_log_prefix,lookupMsg('exit', $type));
579 logDB("dbname=$g_logdb_name;host=$g_logdb_host;port=$g_logdb_port", $g_logdb_user, $g_logdb_pass, $exit_code, $g_reason, $g_prefix, $g_logfile, $g_log_prefix, $g_clname, $g_script);
583 if ($g_pid_written) {
584 removePID($g_prefix, $g_logfile, $g_log_prefix, $g_pidfile);
591 cleanExit(100,'signal');
602 my $log_prefix = shift;
608 my $result_count = 0;
609 my $critical_count = 0;
615 undef @g_unresponsive;
618 foreach (@g_cluster) {
619 if ($_->[0] == $from) {
620 @origsets = split(',', $_->[3]);
625 foreach (@g_cluster) {
627 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_cluster', $_->[0],($_->[1] // "<NONE>"),$_->[2],($_->[3] // "<NONE>"),$_->[4],($_->[5] // "<NONE>") . "(" . ($_->[8] // "<NONE>") . ")",$_->[6],($_->[7] // "<NONE>"),($_->[9] // "<NONE>") . " (" . ($_->[10] // "<NONE>") . ")"));
630 $dsn = "DBI:Pg:$_->[2];";
632 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
633 $query = "SELECT count(*) FROM pg_namespace WHERE nspname = ?";
634 $sth = $dbh->prepare($query);
635 $sth->bind_param(1, "_" . $clname);
638 $result_count = $result_count+$sth->rows;
645 # Critical count will be -1 if the new origin is down, -2 if the old origin is down or positive if subscribers to sets on old origin are down.
646 printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_node_unavailable', $_->[0]));
648 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
650 if ($_->[0] == $to) {
651 $critical_count = -1;
653 elsif ($_->[0] == $from) {
654 $critical_count = -2;
657 foreach my $subprov (split(';', $_->[5])) {
658 my ($node, $setlist) = (split('->', $subprov)) ;
660 $setlist =~ s/(\)|\(|s)//g;
661 @subsets = (split(',', $setlist));
663 if (($critical_count >= 0) && (checkSubscribesAnySets(\@origsets, \@subsets))) {
668 # Only push nodes with active subscribers to sets into failed list unless explicitly told to
669 if (($g_fail_subonly) || (defined($_->[9]))) {
670 push(@g_failed, \@$_);
671 $g_backups{$_->[0]} = $to;
673 push(@g_unresponsive, \@$_);
677 return ($result_count, $critical_count);
681 my $dbconninfo = shift;
687 my $log_prefix = shift;
698 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_cluster_load', $dbconninfo));
701 $dsn = "DBI:Pg:$dbconninfo;";
703 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
704 $qw_clname = $dbh->quote_identifier("_" . $clname);
706 $query = "SELECT $qw_clname.getModuleVersion()";
707 $sth = $dbh->prepare($query);
709 ($version) = $sth->fetchrow;
712 $query = "WITH z AS (
713 SELECT a.no_id, b.sub_provider AS no_prov,
714 COALESCE(c.pa_conninfo,(SELECT pa_conninfo FROM $qw_clname.sl_path WHERE pa_server = $qw_clname.getlocalnodeid(?) LIMIT 1)) AS no_conninfo,
715 array_to_string(array(SELECT set_id FROM $qw_clname.sl_set WHERE set_origin = a.no_id ORDER BY set_id),',') AS origin_sets,
716 string_agg(CASE WHEN b.sub_receiver = a.no_id AND b.sub_forward AND b.sub_active THEN b.sub_set::text END, ',' ORDER BY b.sub_set) AS prov_sets,
717 coalesce(trim(regexp_replace(substring(a.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_','g')), 'node' || a.no_id) AS no_name,
718 'n' || b.sub_provider || '->(' || string_agg(CASE WHEN b.sub_receiver = a.no_id THEN 's' || b.sub_set END,',' ORDER BY b.sub_set,',') || ')' AS sub_tree,
719 coalesce(trim(regexp_replace(substring(d.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_','g')), 'node' || b.sub_provider, '')
720 || '->(' || string_agg(CASE WHEN b.sub_receiver = a.no_id THEN coalesce(trim(regexp_replace(e.set_comment, '[^0-9A-Za-z]', '_', 'g')), 'set' || b.sub_set) END,',' ORDER BY b.sub_set) || ')' AS sub_tree_name,
721 CASE " . ((substr($version,0,3) >= 2.2) ? "WHEN a.no_failed THEN 'FAILED' " : "") . "WHEN a.no_active THEN 'ACTIVE' ELSE 'INACTIVE' END AS no_status,
722 array_to_string(array(SELECT DISTINCT sub_set::text FROM $qw_clname.sl_subscribe WHERE sub_provider = a.no_id AND sub_active ORDER BY sub_set),',') AS prov_sets_active,
723 string_agg(CASE WHEN b.sub_receiver = a.no_id THEN b.sub_set::text END,',' ORDER BY b.sub_set,',') AS sub_sets
724 FROM $qw_clname.sl_node a
725 LEFT OUTER JOIN $qw_clname.sl_subscribe b ON a.no_id = b.sub_receiver
726 LEFT OUTER JOIN $qw_clname.sl_path c ON c.pa_server = a.no_id AND c.pa_client = $qw_clname.getlocalnodeid(?)
727 LEFT OUTER JOIN $qw_clname.sl_node d ON b.sub_provider = d.no_id
728 LEFT OUTER JOIN $qw_clname.sl_set e ON b.sub_set = e.set_id
729 GROUP BY b.sub_provider, a.no_id, a.no_comment, c.pa_conninfo, d.no_comment, a.no_active
733 nullif(string_agg(no_prov::text, ',' ORDER BY no_prov),'') AS no_provs,
735 nullif(string_agg(origin_sets::text, ',' ORDER BY origin_sets),'') AS origin_sets,
737 nullif(string_agg(sub_tree, ';' ORDER BY sub_tree),'') AS no_sub_tree,
739 nullif(string_agg(prov_sets::text, ',' ORDER BY prov_sets),'') AS prov_sets,
740 nullif(string_agg(sub_tree_name, ';' ORDER BY sub_tree_name),'') AS no_sub_tree_name,
741 nullif(string_agg(prov_sets_active::text, ',' ORDER BY prov_sets_active),'') AS prov_sets_active,
742 nullif(string_agg(sub_sets::text, ',' ORDER BY sub_sets),'') AS no_subs
743 FROM z GROUP BY no_id, no_conninfo, no_name, no_status";
744 $sth = $dbh->prepare($query);
746 $sth->bind_param(1, "_" . $clname);
747 $sth->bind_param(2, "_" . $clname);
751 while (my @node = $sth->fetchrow) {
752 push(@g_cluster, \@node);
761 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
763 die lookupMsg('err_pgsql_connect');
766 if (substr($version,0,1) < 2) {
767 printlogln($prefix,$logfile,$log_prefix,lookupMsg('wrn_not_tested', $version));
769 if (($g_use_try_blocks) && ($g_lockset_method eq 'multiple') && (substr($version,0,3) <= 9.9)) {
770 # It's currently not possible to lock multiple sets at a time within a try block (v2.2.2), leave the logic in and set a high version number for now.
771 printlogln($prefix,$logfile,$log_prefix, lookupMsg('note_multiple_try', $version));
772 $g_lockset_method = 'single';
774 if (substr($version,0,3) >= 2.2) {
775 $g_failover_method = 'new';
776 $g_resubscribe_method = 'resubscribe';
780 return (scalar(@g_cluster), $version);
784 my $dbconninfo = shift;
786 my $nodenumber = shift;
791 my $log_prefix = shift;
801 $dsn = "DBI:Pg:$dbconninfo;";
803 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
804 $qw_clname = $dbh->quote_identifier("_" . $clname);
805 $query = "SELECT set_id, trim(regexp_replace(set_comment,'[^0-9,A-Z,a-z]','_','g')) FROM $qw_clname.sl_set WHERE set_origin = ? ORDER BY set_id;";
807 $sth = $dbh->prepare($query);
808 $sth->bind_param(1, $nodenumber);
812 while (my @set = $sth->fetchrow) {
813 push(@g_sets, \@set);
821 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
823 die lookupMsg('err_pgsql_connect');
826 return scalar(@g_sets);
830 my $dbconninfo = shift;
836 my $log_prefix = shift;
846 $dsn = "DBI:Pg:$dbconninfo;";
848 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
849 $qw_clname = $dbh->quote_identifier("_" . $clname);
850 $query = "SELECT a.st_origin || ' (' || coalesce(trim(regexp_replace(substring(b.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_', 'g')), 'node' || b.no_id) || ')<->'
851 || a.st_received || ' (' || coalesce(trim(regexp_replace(substring(c.no_comment from E'\\\\((.+)\\\\)'), '[^0-9A-Za-z]','_', 'g')), 'node' || c.no_id) || ') Events: '
852 || a.st_lag_num_events || ' Time: ' || a.st_lag_time
853 FROM $qw_clname.sl_status a
854 INNER JOIN $qw_clname.sl_node b on a.st_origin = b.no_id
855 INNER JOIN $qw_clname.sl_node c on a.st_received = c.no_id";
857 $sth = $dbh->prepare($query);
860 while (my $lag = $sth->fetchrow) {
869 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
871 die lookupMsg('err_pgsql_connect');
874 return scalar(@g_lags);
881 my $log_prefix = shift;
882 my $last_choice = shift;
891 $line = sprintf "%-4s %-14s %-10s %-24s %-s\n", lookupMsg('interactive_head_id'), lookupMsg('interactive_head_name'), lookupMsg('interactive_head_status'), lookupMsg('interactive_head_providers'), lookupMsg('interactive_head_config');
892 printlog($prefix,$logfile,$log_prefix,"$line");
893 $line = sprintf "%-4s %-14s %-10s %-24s %-s\n", "="x(length(lookupMsg('interactive_head_id'))), "="x(length(lookupMsg('interactive_head_name'))), "="x(length(lookupMsg('interactive_head_status'))), "="x(length(lookupMsg('interactive_head_providers'))), "="x(length(lookupMsg('interactive_head_config')));
894 printlog($prefix,$logfile,$log_prefix,"$line");
896 foreach (@g_cluster) {
897 $line = sprintf "%-4s %-14s %-10s %-24s %-s\n", $_->[0], $_->[4], $_->[6], ($_->[1] // "<NONE>"), (lookupMsg('interactive_detail_1') . ($_->[3] // "<NONE>"));
898 printlog($prefix,$logfile,$log_prefix,"$line");
899 $line = sprintf "%-55s %-s\n", " ", (lookupMsg('interactive_detail_2') . ($_->[7] // "<NONE>"));
900 printlog($prefix,$logfile,$log_prefix,"$line");
901 $line = sprintf "%-55s %-s\n", " ", (lookupMsg('interactive_detail_3') . ($_->[5] // "<NONE>"));
902 printlogln($prefix,$logfile,$log_prefix,"$line");
903 $options{$_->[0]} = {name => $_->[4], sets => ($_->[3] // ""), status => $_->[6], provider => $_->[7]};
905 if ($type !~ m/info/i) {
906 printlog($prefix,$logfile,$log_prefix,lookupMsg('interactive_choose_node', $type));
910 if(exists($options{$choice})) {
911 if ($options{$choice}->{status} ne "ACTIVE") {
912 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_inactive', $choice, lc($options{$choice}->{status})));
915 elsif (($type =~ m/from/i) && (length(trim($options{$choice}->{sets})) <= 0)) {
916 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_not_origin', $choice));
919 elsif ($type =~ m/to/i) {
920 if (length(trim($options{$choice}->{provider})) <= 0) {
921 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_not_provider', $choice));
925 foreach my $old_origin (@g_cluster) {
926 if ($old_origin->[0] == $last_choice) {
927 @sets_from = split(',', $old_origin->[3]);
928 @sets_to = split(',', $options{$choice}->{provider});
929 if (checkProvidesAllSets(\@sets_from, \@sets_to)) {
933 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_not_provider_sets',$choice,$old_origin->[3],$options{$choice}->{providers}));
940 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_no_configuration', $last_choice));
946 printlog($prefix,$logfile,$log_prefix,lookupMsg('interactive_confirm',$type,$choice,$options{$choice}->{name}));
949 if ($ok !~ /^[Y|O]$/i) {
950 printlogln($prefix,$logfile,$log_prefix,lookupMsg('exit_noaction'));
955 elsif (!length($choice)) {
956 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_must_enter_node_id'));
960 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_not_a_node_id', $choice));
969 my $filename = shift;
970 my $dbconninfo = shift;
978 my $log_prefix = shift;
979 my $comment_all_failed = shift;
984 my ($year, $month, $day, $hour, $min, $sec) = (localtime(time))[5,4,3,2,1,0];
985 my $date = sprintf ("%02d:%02d:%02d on %02d/%02d/%04d", $hour, $min, $sec, $day, $month+1, $year+1900);
987 if (open(SLONFILE, ">", $filename)) {
988 print SLONFILE ("# Script autogenerated on $date\n\n");
989 print SLONFILE ("######\n# Preamble (cluster structure)\n######\n\n# Cluster name\n");
991 print SLONFILE ("DEFINE slony_cluster_name $clname;\n");
992 print SLONFILE ("CLUSTER NAME = \@slony_cluster_name;\n\n");
995 print SLONFILE ("CLUSTER NAME = $clname;\n\n");
997 foreach (@g_cluster) {
999 if (($comment_all_failed) && (exists $g_backups{$_->[0]})) {
1000 $line_prefix = "# (Node $_->[0] unavailable) ";
1002 elsif (!$g_fail_subonly) {
1003 foreach my $unresponsive (@g_unresponsive) {
1004 if (($_->[0] == $unresponsive->[0]) && !defined($_->[9])) {
1005 $line_prefix = "# (Node $_->[0] unavailable subscriber only) ";
1009 print SLONFILE ("# Preamble for node $_->[0] named $_->[4]\n");
1011 print SLONFILE ($line_prefix . "DEFINE $_->[4] $_->[0];\n");
1012 print SLONFILE ($line_prefix . "DEFINE $_->[4]_conninfo '$_->[2]';\n");
1013 print SLONFILE ($line_prefix . "NODE \@$_->[4] ADMIN CONNINFO = \@$_->[4]_conninfo;\n\n");
1016 print SLONFILE ($line_prefix . "NODE $_->[0] ADMIN CONNINFO = '$_->[2]';\n\n");
1018 if (($aliases) && ($sets)) {
1019 $set_count = loadSets($dbconninfo, $clname, $_->[0], $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
1020 if ($set_count > 0) {
1021 print SLONFILE ("# Sets provided (currently) by node $_->[0]\n");
1022 foreach my $set (@g_sets) {
1023 print SLONFILE ($line_prefix . "DEFINE $set->[1] $set->[0];\n");
1025 print SLONFILE ("\n");
1032 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_write_fail', "script", $filename));
1040 my $dbconninfo = shift;
1047 my $aliases = shift;
1048 my $logfile = shift;
1049 my $log_prefix = shift;
1054 my $try_prefix = "";
1055 my ($year, $month, $day, $hour, $min, $sec) = (localtime(time))[5,4,3,2,1,0];
1056 my $filetime = sprintf ("%02d_%02d_%04d_%02d:%02d:%02d", $day, $month+1, $year+1900, $hour, $min, $sec);
1057 my $filename = $prefix . "/" . $clname . "-move_sets_from_" . $from . "_to_" . $to . "_on_" . $filetime . ".scr";
1059 if ($g_use_try_blocks) {
1066 my ($node, $setlist);
1067 my ($node_name, $setlist_name);
1070 unless (writePreamble($filename, $dbconninfo, $clname, $dbuser, $dbpass, true, $aliases, $prefix, $logfile, $log_prefix, true)) {
1071 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_incomplete_preamble'));
1074 foreach (@g_cluster) {
1075 if ($_->[0] == $from) {
1076 $from_name = $_->[4];
1078 elsif ($_->[0] == $to) {
1083 if (open(SLONFILE, ">>", $filename)) {
1085 print SLONFILE ("######\n# Actions (changes to cluster structure)\n######\n");
1087 $set_count = loadSets($dbconninfo, $clname, $from, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
1088 if ($set_count > 0) {
1090 if ($g_lockset_method ne "single") {
1091 if ($g_use_try_blocks) {
1092 print SLONFILE ("TRY {\n");
1096 print SLONFILE ($try_prefix . "ECHO 'Locking set $_->[1] ($_->[0])';\n");
1097 print SLONFILE ($try_prefix . "LOCK SET ( ID = \@$_->[1], ORIGIN = \@$from_name);\n");
1100 print SLONFILE ($try_prefix . "ECHO 'Locking set $_->[0]';\n");
1101 print SLONFILE ($try_prefix . "LOCK SET ( ID = $_->[0], ORIGIN = $from);\n");
1105 print SLONFILE ("\n");
1108 print SLONFILE ($try_prefix . "ECHO 'Moving set $_->[1] ($_->[0])';\n");
1109 print SLONFILE ($try_prefix . "MOVE SET ( ID = \@$_->[1], OLD ORIGIN = \@$from_name, NEW ORIGIN = \@$to_name);\n");
1112 print SLONFILE ($try_prefix . "ECHO 'Moving set $_->[0]';\n");
1113 print SLONFILE ($try_prefix . "MOVE SET ( ID = $_->[0], OLD ORIGIN = $from, NEW ORIGIN = $to);\n");
1117 if ($g_use_try_blocks) {
1118 print SLONFILE ("}\nON ERROR {\n");
1121 print SLONFILE ($try_prefix . "ECHO 'Unlocking set $_->[1] ($_->[0])';\n");
1122 print SLONFILE ($try_prefix . "UNLOCK SET ( ID = \@$_->[1], ORIGIN = \@$from_name);\n");
1125 print SLONFILE ($try_prefix . "ECHO 'Unlocking set $_->[0]';\n");
1126 print SLONFILE ($try_prefix . "UNLOCK SET ( ID = $_->[0], ORIGIN = $from);\n");
1129 print SLONFILE ("\tEXIT 1;\n}\nON SUCCESS {\n");
1132 print SLONFILE ($try_prefix . "WAIT FOR EVENT (ORIGIN = \@$from_name, CONFIRMED = ALL, WAIT ON = \@$from_name, TIMEOUT = 0);\n");
1135 print SLONFILE ($try_prefix . "WAIT FOR EVENT (ORIGIN = $from, CONFIRMED = ALL, WAIT ON = $from, TIMEOUT = 0);\n");
1137 if ($g_use_try_blocks) {
1138 print SLONFILE ("}\n");
1142 if ($g_lockset_method eq "single") {
1144 print SLONFILE ("\nECHO 'Moving set $_->[1] ($_->[0])';\n");
1145 if ($g_use_try_blocks) {
1146 print SLONFILE ("TRY {\n");
1148 print SLONFILE ($try_prefix . "LOCK SET ( ID = \@$_->[1], ORIGIN = \@$from_name);\n");
1149 print SLONFILE ($try_prefix . "MOVE SET ( ID = \@$_->[1], OLD ORIGIN = \@$from_name, NEW ORIGIN = \@$to_name);\n");
1150 if ($g_use_try_blocks) {
1151 print SLONFILE ("}\nON ERROR {\n" . $try_prefix . "UNLOCK SET ( ID = \@$_->[1], ORIGIN = \@$from_name);\n" . $try_prefix . "EXIT 1;\n}\n");
1153 print SLONFILE ("WAIT FOR EVENT (ORIGIN = \@$from_name, CONFIRMED = ALL, WAIT ON = \@$from_name, TIMEOUT = 0);\n");
1156 print SLONFILE ("\nECHO 'Moving set $_->[0]';\n");
1157 if ($g_use_try_blocks) {
1158 print SLONFILE ("TRY {\n");
1160 print SLONFILE ($try_prefix . "LOCK SET ( ID = $_->[0], ORIGIN = $from);\n");
1161 print SLONFILE ($try_prefix . "MOVE SET ( ID = $_->[0], OLD ORIGIN = $from, NEW ORIGIN = $to);\n");
1162 if ($g_use_try_blocks) {
1163 print SLONFILE ("}\nON ERROR {\n" . $try_prefix . "UNLOCK SET ( ID = $_->[0], ORIGIN = $from);\n" . $try_prefix . "EXIT 1;\n}\n");
1165 print SLONFILE ("WAIT FOR EVENT (ORIGIN = $from, CONFIRMED = ALL, WAIT ON = $from, TIMEOUT = 0);\n");
1168 if (($subs) && ($g_resubscribe_method eq 'subscribe')) {
1170 foreach my $other_subs (@g_cluster) {
1171 if (($other_subs->[6] eq "ACTIVE") && ($other_subs->[0] != $from) && ($other_subs->[0] != $to)) {
1173 if (exists $g_backups{$other_subs->[0]}) {
1174 $line_prefix = "# (Node $other_subs->[0] unavailable) ";
1180 # mess here needs cleaning up
1181 @subprov_name = (split(';', $other_subs->[8]));
1183 foreach $subprov (split(';', $other_subs->[5])) {
1184 ($node, $setlist) = (split('->', $subprov)) ;
1185 ($node_name, $setlist_name) = (split('->', $subprov_name[$subprov_idx])) ;
1188 $setlist =~ s/(\)|\(|s)//g;
1189 @subsets = (split(',', $setlist)) ;
1192 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_resubscribe', $_->[1], $_->[0]), $other_subs->[0], $other_subs->[4], $setlist, $setlist_name, $node, $node_name);
1195 if ($_->[0] ~~ @subsets) {
1196 if ($node == $from) {
1198 print SLONFILE ($line_prefix .
1199 "ECHO 'Issuing subscribe for set $_->[1] ($_->[0]) provider $to_name ($to) -> " .
1200 "receiver $other_subs->[4] ($other_subs->[0])';\n");
1201 print SLONFILE ($line_prefix .
1202 "SUBSCRIBE SET ( ID = \@$_->[1], PROVIDER = \@$to_name, " .
1203 "RECEIVER = \@$other_subs->[4], FORWARD = YES);\n");
1206 print SLONFILE ($line_prefix .
1207 "ECHO 'Issuing subscribe for set $_->[1] ($_->[0]) provider $to -> " .
1208 "receiver $other_subs->[0]';\n");
1209 print SLONFILE ($line_prefix . "SUBSCRIBE SET ( ID = $_->[0], PROVIDER = $to, " .
1210 "RECEIVER = $other_subs->[0], FORWARD = YES);\n");
1215 print SLONFILE ($line_prefix .
1216 "ECHO 'Issuing subscribe for set $_->[1] ($_->[0]) provider $node_name ($node) -> " .
1217 "receiver $other_subs->[4] ($other_subs->[0])';\n");
1218 print SLONFILE ($line_prefix . "SUBSCRIBE SET ( ID = \@$_->[1], PROVIDER = \@$node_name, " .
1219 "RECEIVER = \@$other_subs->[4], FORWARD = YES);\n");
1222 print SLONFILE ($line_prefix .
1223 "ECHO 'Issuing subscribe for set $_->[1] ($_->[0]) provider $node -> " .
1224 "receiver $other_subs->[0]';\n");
1225 print SLONFILE ($line_prefix . "SUBSCRIBE SET ( ID = $_->[0], PROVIDER = $node, " .
1226 "RECEIVER = $other_subs->[0], FORWARD = YES);\n");
1236 if (($subs) && ($g_resubscribe_method eq 'resubscribe')) {
1238 foreach my $other_subs (@g_cluster) {
1239 if (($other_subs->[6] eq "ACTIVE") && ($other_subs->[0] != $from) && ($other_subs->[0] != $to)) {
1240 if (exists $g_backups{$other_subs->[0]}) {
1241 $line_prefix = "# (Node $other_subs->[0] unavailable) ";
1247 @subprov_name = (split(';', $other_subs->[8]));
1249 foreach $subprov (split(';', $other_subs->[5])) {
1250 ($node, $setlist) = (split('->', $subprov)) ;
1251 ($node_name, $setlist_name) = (split('->', $subprov_name[$subprov_idx])) ;
1255 print SLONFILE ("\n");
1256 if ($node == $from) {
1258 print SLONFILE ($line_prefix .
1259 "ECHO 'Issuing resubscribe for provider $to_name ($to) -> receiver $other_subs->[4] ($other_subs->[0])';\n");
1260 print SLONFILE ($line_prefix .
1261 "RESUBSCRIBE NODE ( ORIGIN = \@$to_name, PROVIDER = \@$to_name, RECEIVER = \@$other_subs->[4]);\n");
1264 print SLONFILE ($line_prefix .
1265 "ECHO 'Issuing resubscribe for provider $to -> receiver $other_subs->[0]';\n");
1266 print SLONFILE ($line_prefix .
1267 "SUBSCRIBE NODE ( ORIGIN = $to, PROVIDER = $to, RECEIVER = $other_subs->[0] );\n");
1272 print SLONFILE ($line_prefix .
1273 "ECHO 'Issuing resubscribe for provider $node_name ($node) -> receiver $other_subs->[4] ($other_subs->[0])';\n");
1274 print SLONFILE ($line_prefix .
1275 "RESUBSCRIBE NODE ( ORIGIN = \@$to_name, PROVIDER = \@$node_name, RECEIVER = \@$other_subs->[4]);\n");
1278 print SLONFILE ($line_prefix .
1279 "ECHO 'Issuing resubscribe for provider $node -> receiver $other_subs->[0]';\n");
1280 print SLONFILE ($line_prefix .
1281 "RESUBSCRIBE NODE ( ORIGIN = $to, PROVIDER = $node, RECEIVER = $other_subs->[0]);\n");
1292 print SLONFILE ("\nECHO 'All sets originating from $from_name (id $from) have been moved to $to_name (id $to), ensure you modify any existing slonik scripts to reflect the new origin';\n");
1295 print SLONFILE ("\nECHO 'All sets originating from node $from have been moved to node $to, ensure you modify the any existing slonik scripts to reflect the new origin';\n");
1300 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_write_fail', "script", $filename));
1307 my $dbconninfo = shift;
1314 my $aliases = shift;
1315 my $logfile = shift;
1316 my $log_prefix = shift;
1320 my ($year, $month, $day, $hour, $min, $sec) = (localtime(time))[5,4,3,2,1,0];
1321 my $filetime = sprintf ("%02d_%02d_%04d_%02d:%02d:%02d", $day, $month+1, $year+1900, $hour, $min, $sec);
1323 if (defined($from) && defined($to)) {
1324 $filename = $prefix . "/" . $clname . "-failover_from_" . $from . "_to_" . $to . "_on_" . $filetime . ".scr";
1327 $filename = $prefix . "/" . $clname . "-autofailover_on_" . $filetime . ".scr";
1330 unless (writePreamble($filename, $dbconninfo, $clname, $dbuser, $dbpass, false, $aliases, $prefix, $logfile, $log_prefix, false)) {
1331 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_incomplete_preamble'));
1334 if (open(SLONFILE, ">>", $filename)) {
1336 print SLONFILE ("######\n# Actions (changes to cluster structure)\n######\n\n");
1338 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_failover_method',$g_failover_method));
1341 foreach (@g_failed) {
1342 foreach my $backup (@g_cluster) {
1343 if ($backup->[0] == $g_backups{$_->[0]}) {
1344 ## Here we have both details of the backup node and the failed node
1346 print SLONFILE ("ECHO 'Failing over slony cluster from $_->[4] (id $_->[0]) to $backup->[4] (id $backup->[0])';\n");
1349 print SLONFILE ("ECHO 'Failing over slony cluster from node $_->[0] to node $backup->[0]';\n");
1356 print SLONFILE ("FAILOVER (\n\t");
1358 foreach (@g_failed) {
1359 foreach my $backup (@g_cluster) {
1361 if ($backup->[0] == $g_backups{$_->[0]}) {
1362 ## Here we have both details of the backup node and the failed node
1363 if ($g_failover_method eq 'new') {
1364 if( $written != 0 ) {
1365 print SLONFILE (",\n\t");
1367 print SLONFILE ("NODE = (");
1370 if( $written != 0 ) {
1371 print SLONFILE ("\n);\nFAILOVER (\n\t");
1375 print SLONFILE ("ID = \@$_->[4], BACKUP NODE = \@$backup->[4]");
1378 print SLONFILE ("ID = $_->[0], BACKUP NODE = $backup->[0]");
1380 if ($g_failover_method eq 'new') {
1381 print SLONFILE (")");
1388 print SLONFILE ("\n);\n");
1390 if ($g_drop_failed) {
1393 if (($g_failover_method eq 'new') && (scalar(@g_failed) > 1)) {
1394 foreach (@g_failed) {
1396 print SLONFILE ("ECHO 'Dropping node $_->[4] ($_->[0])';\n");
1399 print SLONFILE ("ECHO 'Dropping node $_->[0]';\n");
1403 print SLONFILE ("DROP NODE (ID = '");
1407 foreach (@g_failed) {
1408 foreach my $backup (@g_cluster) {
1409 if ($backup->[0] == $g_backups{$_->[0]}) {
1410 if (!defined($event_node)) {
1412 $event_node = $backup->[4];
1415 $event_node = $backup->[0];
1418 if (($g_failover_method eq 'new') && (scalar(@g_failed) > 1)) {
1419 if( $written != 0 ) {
1420 print SLONFILE (",");
1422 ## Don;t bother being pissy and trying to define array values
1424 # print SLONFILE "\@$_->[4]";
1427 print SLONFILE $_->[0];
1433 print SLONFILE ("ECHO 'Dropping node $_->[4] ($_->[0])';\n");
1434 print SLONFILE ("DROP NODE (ID = \@$_->[4], EVENT NODE = \@$backup->[4]);\n");
1437 print SLONFILE ("ECHO 'Dropping node $_->[0]';\n");
1438 print SLONFILE ("DROP NODE (ID = $_->[0], EVENT NODE = $backup->[0]);\n");
1445 if (($g_failover_method eq 'new') && (scalar(@g_failed) > 1)) {
1447 print SLONFILE ("', EVENT NODE = \@$event_node);\n");
1450 print SLONFILE ("', EVENT NODE = $event_node);\n");
1457 printlog($prefix,$logfile,$log_prefix,lookupMsg('err_write_fail', "script", $filename));
1464 my $name = shift || '?';
1468 if (exists $message{$g_lang}{$name}) {
1469 $text = $message{$g_lang}{$name};
1471 elsif (exists $message{'en'}{$name}) {
1472 $text = $message{'en'}{$name};
1475 $line_call = (caller)[2];
1476 $text = qq{Failed to lookup text "$name" at line $line_call};
1482 $val = '?' if ! defined $val;
1483 last unless $text =~ s/\$$x/$val/g;
1492 $string =~ s/^\s+//;
1493 $string =~ s/\s+$//;
1498 print ((@_ ? join($/, @_) : $_), $/);
1503 my $logfile_name = shift;
1504 my $log_prefix = shift;
1505 my $message = shift;
1511 if (defined($logfile_name)) {
1513 # Do we have to do this all the time? Perhaps could check parameters first
1514 if ($logfile_name =~ /^\//i) {
1515 $logfile = strftime($logfile_name, localtime);
1518 $logfile = "$prefix/" . strftime($logfile_name, localtime);
1521 if ($log_prefix =~ m/(\%[mt])/) {
1522 my ($year, $month, $day, $hour, $min, $sec) = (localtime(time))[5,4,3,2,1,0];
1523 my ($h_sec, $h_msec) = gettimeofday;
1524 $date = sprintf ("%02d-%02d-%04d %02d:%02d:%02d.%03d", $day, $month+1, $year+1900, $hour, $min, $sec, $h_msec/1000);
1525 $log_prefix =~ s/\%m/$date/g;
1527 $date = sprintf ("%02d-%02d-%04d %02d:%02d:%02d", $day, $month+1, $year+1900, $hour, $min, $sec);
1528 $log_prefix =~ s/\%t/$date/g;
1530 if ($log_prefix =~ m/(\%p)/) {
1531 $log_prefix =~ s/\%p/$g_pid/g;
1534 if (open(LOGFILE, ">>", $logfile)) {
1535 print LOGFILE $log_prefix . " " . $message;
1539 println(lookupMsg('err_write_fail', "logfile", $logfile));
1545 printlog ($_[0], $_[1], $_[2], $_[3] . $/);
1549 my $dbconninfo = shift;
1552 my $exit_code = shift;
1555 my $logfile = shift;
1556 my $log_prefix = shift;
1568 unless($results = (read_file($logfile))) {
1569 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_read_fail', "logfile", $logfile));
1572 if (defined($script) && (-e $script)) {
1573 unless ($script_data = (read_file($script))) {
1574 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_read_fail', "script file", $script));
1578 $script_data = "No script data was generated.";
1579 $script = "No script generated.";
1582 $dsn = "DBI:Pg:$dbconninfo;";
1584 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
1585 $query = "INSERT INTO public.failovers (reason, exit_code, results, script, cluster_name)
1586 VALUES (?, ?, ?, ?, ?)";
1588 $sth = $dbh->prepare($query);
1590 $sth->bind_param(1, $reason);
1591 $sth->bind_param(2, $exit_code);
1592 $sth->bind_param(3, $results);
1593 $sth->bind_param(4, $script . ":\n" . $script_data);
1594 $sth->bind_param(5, $clname);
1603 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
1605 die lookupMsg('err_pgsql_connect');
1612 my $date_string = shift;
1613 my $g_ug = new Data::UUID;
1614 my $g_uuid = $g_ug->create_from_name("failover_script", $date_string);
1615 my $g_uuid_str = $g_ug->to_string($g_uuid);
1621 my $logfile = shift;
1622 my $log_prefix = shift;
1623 my $pidfile_name = shift;
1627 if ($pidfile_name =~ /^\//i) {
1628 $pidfile = $pidfile_name;
1631 $pidfile = "$prefix/" . $pidfile_name;
1634 open (PIDFILE, ">", $pidfile);
1640 printlogln($prefix,$logfile,$log_prefix, lookupMsg('dbg_generic', $!));
1642 printlogln($prefix,$logfile,$log_prefix, lookupMsg('err_write_fail', "pid file", $pidfile));
1650 my $logfile = shift;
1651 my $log_prefix = shift;
1652 my $pidfile_name = shift;
1656 if ($pidfile_name =~ /^\//i) {
1657 $pidfile = $pidfile_name;
1660 $pidfile = "$prefix/" . $pidfile_name;
1667 printlogln($prefix,$logfile,$log_prefix, lookupMsg('dbg_generic', 'PID file never existed to be removed'));
1672 printlogln($prefix,$logfile,$log_prefix, lookupMsg('dbg_generic', $!));
1674 printlogln($prefix,$logfile,$log_prefix, lookupMsg('err_unlink_fail', "pid file", $pidfile));
1680 sub checkProvidesAllSets {
1681 my ($originSets, $providerSets) = @_;
1684 undef @test_hash{@$originSets}; # add a hash key for each element of @$originSets
1685 delete @test_hash{@$providerSets}; # remove all keys for elements of @$providerSets
1687 return !%test_hash; # return false if any keys are left in the hash
1690 sub checkSubscribesAnySets {
1691 my ($originSets, $subscriberSets) = @_;
1696 undef @test_hash{@$originSets}; # add a hash key for each element of @$originSets
1697 $before = scalar(keys %test_hash);
1698 delete @test_hash{@$subscriberSets}; # remove all keys for elements of @$subscriberSets
1699 $after = scalar(keys %test_hash);
1700 return ($before != $after); # return false if no keys were removed from the hash
1704 my $cfgfile = shift;
1706 my $success = false;
1708 if (open(CFGFILE, "<", $cfgfile)) {
1709 foreach (<CFGFILE>) {
1715 if (length(trim($_))) {
1716 @fields = split('=', $_, 2);
1717 given(lc($fields[0])) {
1719 $g_lang = trim($fields[1]);
1721 when(/\bslony_database_host\b/i) {
1722 $g_dbhost = trim($fields[1]);
1724 when(/\bslony_database_port\b/i) {
1725 $g_dbport = $fields[1];
1727 when(/\bslony_database_name\b/i) {
1728 $g_dbname = trim($fields[1]);
1730 when(/\bslony_database_user\b/i) {
1731 $g_dbuser = trim($fields[1]);
1733 when(/\bslony_database_password\b/i) {
1734 $g_dbpass = trim($fields[1]);
1736 when(/\bslony_cluster_name\b/i) {
1737 $g_clname = trim($fields[1]);
1739 when(/\benable_debugging\b/i) {
1740 $g_debug = checkBoolean(trim($fields[1]));
1742 when(/\bprefix_directory\b/i) {
1743 $g_prefix = trim($fields[1]);
1745 when(/\bseparate_working_directory\b/i) {
1746 $g_separate_working = checkBoolean(trim($fields[1]));
1748 when(/\bpid_filename\b/i) {
1749 $g_pidfile = trim($fields[1]);
1751 when(/\bfailover_offline_subscriber_only\b/i) {
1752 $g_fail_subonly = checkBoolean(trim($fields[1]));
1754 when(/\bdrop_failed_nodes\b/i) {
1755 $g_drop_failed = checkBoolean(trim($fields[1]));
1757 when(/\blog_line_prefix\b/i) {
1758 $g_log_prefix = trim($fields[1]);
1760 when(/\blog_filename\b/i) {
1761 $g_logfile = trim($fields[1]);
1763 when(/\blog_to_postgresql\b/i) {
1764 $g_log_to_db = checkBoolean(trim($fields[1]));
1766 when(/\blog_database_host\b/i) {
1767 $g_logdb_host = trim($fields[1]);
1769 when(/\blog_database_port\b/i) {
1770 $g_logdb_port = $fields[1];
1772 when(/\blog_database_name\b/i) {
1773 $g_logdb_name = trim($fields[1]);
1775 when(/\blog_database_user\b/i) {
1776 $g_logdb_user = trim($fields[1]);
1778 when(/\blog_database_password\b/i) {
1779 $g_logdb_pass = trim($fields[1]);
1781 when(/\benable_try_blocks\b/i) {
1782 $g_use_try_blocks = checkBoolean(trim($fields[1]));
1784 when(/\bpull_aliases_from_comments\b/i) {
1785 $g_use_comment_aliases = checkBoolean(trim($fields[1]));
1787 when(/\bslonik_path\b/i) {
1788 $g_slonikpath = trim($fields[1]);
1790 when(/\blockset_method\b/i) {
1791 $g_lockset_method = trim($fields[1]);
1793 when(/\benable_autofailover\b/i) {
1794 $g_autofailover = checkBoolean(trim($fields[1]));
1796 when(/\bautofailover_poll_interval\b/i) {
1797 $g_autofailover_poll_interval = checkInteger(trim($fields[1]));
1799 when(/\bautofailover_node_retry\b/i) {
1800 $g_autofailover_retry = checkInteger(trim($fields[1]));
1802 when(/\bautofailover_sleep_time\b/i) {
1803 $g_autofailover_retry_sleep = checkInteger(trim($fields[1]));
1805 when(/\bautofailover_forwarding_providers\b/i) {
1806 $g_autofailover_provs = checkBoolean(trim($fields[1]));
1808 when(/\bautofailover_config_any_node\b/i) {
1809 $g_autofailover_config_any = checkBoolean(trim($fields[1]));
1819 println(lookupMsg('err_fail_config'));
1828 if ( grep /^$text$/i, ("y","yes","t","true","on") ) {
1831 elsif ( grep /^$text$/i, ("n","no","f","false","off") ) {
1838 my $integer = shift;
1841 if (($integer * 1) eq $integer) {
1842 $value = int($integer);
1851 my $logfile = shift;
1852 my $log_prefix = shift;
1856 printlogln($prefix,$logfile,$log_prefix, lookupMsg('dbg_slonik_script', $script));
1858 if (open(SLONIKSTATUS, "-|", "slonik $script 2>&1")) {
1859 while (<SLONIKSTATUS>) {
1860 printlogln($prefix,$logfile,$log_prefix,lookupMsg('slonik_output', $_));
1862 close(SLONIKSTATUS);
1866 printlogln($prefix,$logfile,$log_prefix, lookupMsg('err_running_slonik', $!));
1873 my $dbconninfo = shift;
1878 my $logfile = shift;
1879 my $log_prefix = shift;
1890 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_init'));
1891 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_init_cnf', ($g_autofailover_config_any ? 'any' : 'specified target')));
1892 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_init_pol', $g_autofailover_poll_interval));
1893 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_init_ret', $g_autofailover_retry, $g_autofailover_retry_sleep));
1894 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_init_set', ($g_autofailover_provs ? 'will' : 'will not')));
1897 # Probe current cluster configuration every minute
1898 if (!defined($cluster_time) || (time()-$cluster_time > 60)) {
1900 $cluster_loaded = false;
1901 if (!defined($cluster_time) || !$g_autofailover_config_any) {
1903 ($node_count, $version) = loadCluster($dbconninfo, $clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
1904 die lookupMsg('err_cluster_empty') if ($node_count == 0);
1905 @cluster = @g_cluster;
1906 die lookupMsg('err_cluster_lone') if ($node_count == 1);
1907 $cluster_loaded = true;
1910 printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from supplied configuration'));
1912 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
1917 foreach (@cluster) {
1918 if ($_->[6] eq "ACTIVE") {
1919 unless ($cluster_loaded) {
1921 ($node_count, $version) = loadCluster($_->[2], $clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
1922 die lookupMsg('err_cluster_empty') if ($node_count == 0);
1923 @cluster = @g_cluster;
1924 die lookupMsg('err_cluster_lone') if ($node_count == 1);
1925 $cluster_loaded = true;
1928 printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from node ' . $_->[0] . ': trying next node'));
1930 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
1938 if ($cluster_loaded) {
1939 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_load_cluster', (!defined($cluster_time) ? "Loaded" : "Reloaded"), $version, $clname, $node_count));
1940 $cluster_time = time();
1943 printlogln($prefix,$logfile,$log_prefix, lookupMsg('load_cluster_fail', 'from any node'));
1947 if ($cluster_loaded) {
1950 while(($current_retry <= $g_autofailover_retry) && ((!defined($failed)) || ($failed > 0))) {
1951 # Check status of cluster
1952 $failed = checkFailed($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
1955 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_cluster_good'));
1957 if ($current_retry > 0) {
1958 printlogln($prefix,$logfile,$log_prefix,lookupMsg('cluster_fixed'));
1962 if (($failed > 0) && ($current_retry <= $g_autofailover_retry)) {
1963 printlogln($prefix,$logfile,$log_prefix,lookupMsg('cluster_failed', $failed,$g_autofailover_retry_sleep,$current_retry,$g_autofailover_retry));
1964 usleep($g_autofailover_retry_sleep * 1000);
1968 $actions = findBackup($clname, $dbuser, $dbpass, $prefix, $logfile, $log_prefix);
1970 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_proceed'));
1971 foreach my $failed ( keys %g_backups ) {
1972 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_detail', $failed, $g_backups{$failed}));
1974 $g_script = writeFailover($prefix, $dbconninfo, $clname, $dbuser, $dbpass, undef, undef, $g_subs_follow_origin, $g_use_comment_aliases, $logfile, $log_prefix);
1975 unless (runSlonik($g_script, $prefix, $logfile, $log_prefix)) {
1976 printlogln($prefix,$logfile,$log_prefix,lookupMsg('err_execute_fail', 'slonik script', $g_script));
1978 $cluster_loaded = false;
1979 #print "SCRIPT: $g_script\n";
1983 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_halt', $failed));
1986 usleep($g_autofailover_poll_interval * 1000);
2000 my $logfile = shift;
2001 my $log_prefix = shift;
2007 my $result_count = 0;
2008 my $prov_failed = 0;
2009 my $subonly_failed = 0;
2011 undef @g_unresponsive;
2013 foreach (@g_cluster) {
2014 if ($_->[6] eq "ACTIVE") {
2016 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_autofailover_check',$_->[0], ($_->[4] // "unnamed"),(defined($_->[9]) ? "provider of sets $_->[9]" : "sole subscriber"),$_->[2]));
2020 if ((defined($_->[3])) || ($g_autofailover_provs && defined($_->[9]))) {
2021 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_autofailover_active_check', 'provider', $_->[0]));
2024 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_autofailover_active_check', 'subscriber only', $_->[0]));
2028 $dsn = "DBI:Pg:$_->[2];";
2030 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
2031 $query = "SELECT count(*) FROM pg_namespace WHERE nspname = ?";
2032 $sth = $dbh->prepare($query);
2033 $sth->bind_param(1, "_" . $clname);
2036 $result_count = $result_count+$sth->rows;
2043 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
2045 push(@g_unresponsive, \@$_);
2046 if ((defined($_->[3])) || ($g_autofailover_provs && defined($_->[9]))) {
2047 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive', $_->[0]));
2051 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_unresponsive_subonly', $_->[0]));
2052 if ($g_fail_subonly) {
2060 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_autofailover_check',$_->[0], ($_->[4] // "unnamed"), lc($_->[6] // "unknown") . ' node', $_->[2]));
2064 if ($prov_failed > 0) {
2065 return ($prov_failed+$subonly_failed);
2068 return $prov_failed;
2077 my $logfile = shift;
2078 my $log_prefix = shift;
2085 my $result_count = 0;
2088 my $best_node_is_direct;
2091 my %backup_for_set_chosen;
2096 foreach (@g_unresponsive) {
2097 if ($g_fail_subonly || (defined($_->[3])) || ($g_autofailover_provs && defined($_->[9]))) {
2098 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_find', ($_->[9] // "none"), $_->[0]));
2100 undef $best_node_id;
2101 $lowest_lag = (1<<$Config{ivsize}*8-1)-1;
2103 if (defined($_->[9]) && (exists $backup_for_set_chosen{$_->[9]})) {
2104 $best_node_id = $backup_for_set_chosen{$_->[9]};
2105 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_found', $_->[9], $_->[0]));
2108 foreach my $subscriber (@g_cluster) {
2109 if ($subscriber->[0] != $_->[0]) {
2111 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_check_sub',$subscriber->[0]));
2114 $dsn = "DBI:Pg:$subscriber->[2]";
2117 $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 1});
2118 $qw_clname = $dbh->quote_identifier("_" . $clname);
2120 $query = "SELECT extract(epoch from a.st_lag_time), (a.st_received = ?) AS direct
2121 FROM $qw_clname.sl_status a
2122 INNER JOIN $qw_clname.sl_subscribe b ON b.sub_provider = a.st_received AND b.sub_receiver = a.st_origin
2124 GROUP BY a.st_lag_time, a.st_received;";
2126 $sth = $dbh->prepare($query);
2127 $sth->bind_param(1, $_->[0]);
2130 while (my @subinfo = $sth->fetchrow) {
2133 if (defined($_->[9])) {
2134 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], ($subinfo[1] ? "a direct subscriber" : "an indirect subscriber"), $subscriber->[7], $subinfo[0]));
2135 @sets_from = split(',',$_->[9]);
2136 @sets_to = split(',',$subscriber->[7]);
2138 elsif ($g_fail_subonly) {
2139 # Subscriber only node will have no active sets forwarding sets to check
2140 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_node_detail', $subscriber->[0], "suitable backup for this subscriber only node" , $subscriber->[7], $subinfo[0]));
2145 if ((checkProvidesAllSets(\@sets_from, \@sets_to)) && (($subinfo[0] < $lowest_lag) || (!$best_node_is_direct && $subinfo[1]))) {
2146 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_best', $subscriber->[0], $subinfo[0]));
2147 $best_node_id = $subscriber->[0];
2148 $lowest_lag = $subinfo[0];
2149 $best_node_is_direct = $subinfo[1];
2154 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_check_sub_fail', $subscriber->[0]));
2156 printlogln($prefix,$logfile,$log_prefix,lookupMsg('dbg_generic', $@));
2162 if (defined($best_node_id)) {
2163 push(@g_failed, \@$_);
2164 $g_backups{$_->[0]} = $best_node_id;
2165 if (defined($_->[9]) && !(exists $g_backups{$_->[9]})) {
2166 $backup_for_set_chosen{$_->[9]} = $best_node_id;
2170 printlog($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_fail'));
2174 printlogln($prefix,$logfile,$log_prefix,lookupMsg('autofailover_promote_skip', $_->[0]));
2177 return keys(%g_backups);