]> git.8kb.co.uk Git - pgpool-ii/pgpool-ii_2.2.5/blob - recovery.c
Attempt to send a proper failure message to frontend when authentication
[pgpool-ii/pgpool-ii_2.2.5] / recovery.c
1 /* -*-pgsql-c-*- */
2 /*
3  * $Header: /cvsroot/pgpool/pgpool-II/recovery.c,v 1.12.2.1 2009/08/22 04:19:49 t-ishii Exp $
4  *
5  * pgpool: a language independent connection pool server for PostgreSQL
6  * written by Tatsuo Ishii
7  *
8  * Copyright (c) 2003-2008      PgPool Global Development Group
9  *
10  * Permission to use, copy, modify, and distribute this software and
11  * its documentation for any purpose and without fee is hereby
12  * granted, provided that the above copyright notice appear in all
13  * copies and that both that copyright notice and this permission
14  * notice appear in supporting documentation, and that the name of the
15  * author not be used in advertising or publicity pertaining to
16  * distribution of the software without specific, written prior
17  * permission. The author makes no representations about the
18  * suitability of this software for any purpose.  It is provided "as
19  * is" without express or implied warranty.
20  *
21  * recovery.c: online recovery process
22  *
23  */
24
25 #include "config.h"
26
27 #include <unistd.h>
28 #include <string.h>
29
30 #include "pool.h"
31 #include "libpq-fe.h"
32
33 #define WAIT_RETRY_COUNT (pool_config->recovery_timeout / 3)
34
35 #define FIRST_STAGE 0
36 #define SECOND_STAGE 1
37
38 static int exec_checkpoint(PGconn *conn);
39 static int exec_recovery(PGconn *conn, BackendInfo *backend, char stage);
40 static int exec_remote_start(PGconn *conn, BackendInfo *backend);
41 static PGconn *connect_backend_libpq(BackendInfo *backend);
42 static int wait_connection_closed(void);
43 static int check_postmaster_started(BackendInfo *backend);
44
45 static char recovery_command[1024];
46
47 extern volatile sig_atomic_t pcp_wakeup_request;
48
49 int start_recovery(int recovery_node)
50 {
51         BackendInfo *backend;
52         BackendInfo *recovery_backend;
53         PGconn *conn;
54
55         pool_log("starting recovering node %d", recovery_node);
56
57         if (VALID_BACKEND(recovery_node))
58         {
59                 pool_error("start_recovery: backend node %d is alive", recovery_node);
60                 return 1;
61         }
62
63         Req_info->kind = NODE_RECOVERY_REQUEST;
64
65         backend = &pool_config->backend_desc->backend_info[MASTER_NODE_ID];
66         recovery_backend = &pool_config->backend_desc->backend_info[recovery_node];
67
68         conn = connect_backend_libpq(backend);
69         if (conn == NULL)
70         {
71                 PQfinish(conn);
72                 pool_error("start_recover: could not connect master node.");
73                 return 1;
74         }
75
76         /* 1st stage */
77         if (exec_checkpoint(conn) != 0)
78         {
79                 PQfinish(conn);
80                 pool_error("start_recovery: CHECKPOINT failed");
81                 return 1;
82         }
83
84         pool_log("CHECKPOINT in the 1st stage done");
85
86         if (exec_recovery(conn, recovery_backend, FIRST_STAGE) != 0)
87         {
88                 PQfinish(conn);
89                 return 1;
90         }
91
92         pool_log("1st stage is done");
93
94         pool_log("starting 2nd stage");
95
96         /* 2nd stage */
97         *InRecovery = 1;
98         if (wait_connection_closed() != 0)
99         {
100                 PQfinish(conn);
101                 pool_error("start_recovery: timeover for waiting connection closed");
102                 return 1;
103         }
104
105         pool_log("all connections from clients have been closed");
106
107         if (exec_checkpoint(conn) != 0)
108         {
109                 PQfinish(conn);
110                 pool_error("start_recovery: CHECKPOINT failed");
111                 return 1;
112         }
113
114         pool_log("CHECKPOINT in the 2nd stage done");
115
116         if (exec_recovery(conn, recovery_backend, SECOND_STAGE) != 0)
117         {
118                 PQfinish(conn);
119                 return 1;
120         }
121         if (exec_remote_start(conn, recovery_backend) != 0)
122         {
123                 PQfinish(conn);
124                 pool_error("start_recovery: remote start failed");
125                 return 1;
126         }
127
128         if (check_postmaster_started(recovery_backend))
129         {
130                 PQfinish(conn);
131                 pool_error("start_recovery: check start failed");
132                 return 1;
133         }
134
135         pool_log("%d node restarted", recovery_node);
136
137         /*
138          * reset failover completion flag.  this is necessary since
139          * previous failover/failback will set the flag to 1.
140          */
141         pcp_wakeup_request = 0;
142
143         /* send failback request to pgpool parent */
144         send_failback_request(recovery_node);
145
146         /* wait for failback */
147         while (!pcp_wakeup_request)
148         {
149                 struct timeval t = {1, 0};
150                 /* polling SIGUSR2 signal every 1 sec */
151                 select(0, NULL, NULL, NULL, &t);
152         }
153         pcp_wakeup_request = 0;
154
155         PQfinish(conn);
156
157         pool_log("recovery done");
158
159         return 0;
160 }
161
162 /*
163  * Notice all children finishing recovery.
164  */
165 void finish_recovery(void)
166 {
167         *InRecovery = 0;
168         kill(getppid(), SIGUSR2);
169 }
170
171 /*
172  * Execute CHECKPOINT
173  */
174 static int exec_checkpoint(PGconn *conn)
175 {
176         PGresult *result;
177         int r;
178
179         pool_debug("exec_checkpoint: start checkpoint");
180         result = PQexec(conn, "CHECKPOINT");
181         r = (PQresultStatus(result) !=  PGRES_COMMAND_OK);
182         PQclear(result);
183         pool_debug("exec_checkpoint: finish checkpoint");
184         return r;
185 }
186
187 /*
188  * Call pgpool_recovery() function.
189  */
190 static int exec_recovery(PGconn *conn, BackendInfo *backend, char stage)
191 {
192         PGresult *result;
193         char *hostname;
194         char *script;
195         int r;
196
197         if (strlen(backend->backend_hostname) == 0)
198                 hostname = "localhost";
199         else
200                 hostname = backend->backend_hostname;
201
202         script = (stage == FIRST_STAGE) ?
203                 pool_config->recovery_1st_stage_command : pool_config->recovery_2nd_stage_command;
204
205         if (script == NULL || strlen(script) == 0)
206         {
207                 /* do not execute script */
208                 return 0;
209         }
210
211         snprintf(recovery_command,
212                          sizeof(recovery_command),
213                          "SELECT pgpool_recovery('%s', '%s', '%s')",
214                          script,
215                          hostname,
216                          backend->backend_data_directory);
217
218         pool_log("starting recovery command: \"%s\"", recovery_command);
219
220         pool_debug("exec_recovery: start recovery");
221         result = PQexec(conn, recovery_command);
222         r = (PQresultStatus(result) !=  PGRES_TUPLES_OK);
223         if (r != 0)
224         {
225                 pool_error("exec_recovery: %s command failed at %s",
226                                    script,
227                                    (stage == FIRST_STAGE) ? "1st stage" : "2nd stage");
228         }
229         PQclear(result);
230         pool_debug("exec_recovery: finish recovery");
231         return r;
232 }
233
234 /*
235  * Call pgpool_remote_start() function.
236  */
237 static int exec_remote_start(PGconn *conn, BackendInfo *backend)
238 {
239         PGresult *result;
240         char *hostname;
241         int r;
242
243         if (strlen(backend->backend_hostname) == 0)
244                 hostname = "localhost";
245         else
246                 hostname = backend->backend_hostname;
247
248         snprintf(recovery_command, sizeof(recovery_command),
249                          "SELECT pgpool_remote_start('%s', '%s')",
250                          hostname,
251                          backend->backend_data_directory);
252
253         pool_debug("exec_remote_start: start pgpool_remote_start");
254         result = PQexec(conn, recovery_command);
255         r = (PQresultStatus(result) !=  PGRES_TUPLES_OK);
256         if (r != 0)
257                 pool_error("exec_remote_start: pgpool_remote_start failed: %s", PQresultErrorMessage(result));
258         PQclear(result);
259         pool_debug("exec_remote_start: finish pgpool_remote_start");
260         return r;
261 }
262
263 /*
264  * Check postmaster is started.
265  */
266 static int check_postmaster_started(BackendInfo *backend)
267 {
268         int i = 0;
269         char port_str[16];
270         PGconn *conn;
271
272         snprintf(port_str, sizeof(port_str),
273                          "%d", backend->backend_port);
274         do {
275                 ConnStatusType r;
276                 conn = PQsetdbLogin(backend->backend_hostname,
277                                                         port_str,
278                                                         NULL,
279                                                         NULL,
280                                                         "template1",
281                                                         pool_config->recovery_user,
282                                                         pool_config->recovery_password);
283                 r = PQstatus(conn);
284                 PQfinish(conn);
285                 if (r == CONNECTION_OK)
286                         return 0;
287
288                 if (WAIT_RETRY_COUNT != 0)
289                         sleep(3);
290         } while (i++ < WAIT_RETRY_COUNT);
291
292         pool_error("check_postmaster_started: remote host start up did not finish in %d sec.", pool_config->recovery_timeout);
293         return 1;
294 }
295
296 static PGconn *connect_backend_libpq(BackendInfo *backend)
297 {
298         char port_str[16];
299         PGconn *conn;
300
301         snprintf(port_str, sizeof(port_str),
302                          "%d", backend->backend_port);
303         conn = PQsetdbLogin(backend->backend_hostname,
304                                                 port_str,
305                                                 NULL,
306                                                 NULL,
307                                                 "template1",
308                                                 pool_config->recovery_user,
309                                                 pool_config->recovery_password);
310
311         if (PQstatus(conn) != CONNECTION_OK)
312         {
313                 PQfinish(conn);
314                 return NULL;
315         }
316         return conn;
317 }
318
319 /*
320  * Wait all connections are closed.
321  */
322 static int wait_connection_closed(void)
323 {
324         int i = 0;
325
326         do {
327
328                 if (Req_info->conn_counter == 0)
329                         return 0;
330
331                 if (WAIT_RETRY_COUNT != 0)
332                         sleep(3);
333         } while (i++ < WAIT_RETRY_COUNT);
334
335         pool_error("wait_connection_closed: existing connections did not close in %d sec.", pool_config->recovery_timeout);
336         return 1;
337 }