]> git.8kb.co.uk Git - pgpool-ii/pgpool-ii_2.2.5/blob - parser/scan.l
Attempt to send a proper failure message to frontend when authentication
[pgpool-ii/pgpool-ii_2.2.5] / parser / scan.l
1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * scan.l
5  *        lexical scanner for PostgreSQL
6  *
7  * NOTE NOTE NOTE:
8  *
9  * The rules in this file must be kept in sync with psql's lexer!!!
10  *
11  * The rules are designed so that the scanner never has to backtrack,
12  * in the sense that there is always a rule that can match the input
13  * consumed so far (the rule action may internally throw back some input
14  * with yyless(), however).  As explained in the flex manual, this makes
15  * for a useful speed increase --- about a third faster than a plain -CF
16  * lexer, in simple testing.  The extra complexity is mostly in the rules
17  * for handling float numbers and continued string literals.  If you change
18  * the lexical rules, verify that you haven't broken the no-backtrack
19  * property by running flex with the "-b" option and checking that the
20  * resulting "lex.backup" file says that no backing up is needed.
21  *
22  *
23  * Portions Copyright (c) 2003-2008, PgPool Global Development Group
24  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  * IDENTIFICATION
28  *        $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.141 2007/09/12 20:49:27 adunstan Exp $
29  *
30  *-------------------------------------------------------------------------
31  */
32 #include "pool_parser.h"
33
34 #include <ctype.h>
35 #include <unistd.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <setjmp.h>
39
40 #ifndef ereport
41 #define ereport(a,b) yyerror("")
42 #endif
43 #define IS_HIGHBIT_SET(c) 0
44
45 #include "gramparse.h"
46 #include "keywords.h"
47 /* Not needed now that this file is compiled as part of gram.y */
48 /* #include "parser/parse.h" */
49 #include "gram.h"
50 #include "scansup.h"
51
52 #include "pool_memory.h"
53
54
55 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
56 /*
57 #undef fprintf
58 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
59 */
60
61 static int              xcdepth = 0;    /* depth of nesting in slash-star comments */
62 static char    *dolqstart;      /* current $foo$ quote start string */
63
64 /*
65  * GUC variables.  This is a DIRECT violation of the warning given at the
66  * head of gram.y, ie flex/bison code must not depend on any GUC variables;
67  * as such, changing their values can induce very unintuitive behavior.
68  * But we shall have to live with it as a short-term thing until the switch
69  * to SQL-standard string syntax is complete.
70  */
71 typedef enum
72 {
73         BACKSLASH_QUOTE_OFF,
74         BACKSLASH_QUOTE_ON,
75         BACKSLASH_QUOTE_SAFE_ENCODING
76 } BackslashQuoteType;
77
78 BackslashQuoteType backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
79 int                     escape_string_warning = true;
80 int                     standard_conforming_strings = false;
81
82 static bool             warn_on_first_escape;
83 static bool     saw_high_bit = false;
84
85 /*
86  * literalbuf is used to accumulate literal values when multiple rules
87  * are needed to parse a single literal.  Call startlit to reset buffer
88  * to empty, addlit to add text.  Note that the buffer is palloc'd and
89  * starts life afresh on every parse cycle.
90  */
91 static char        *literalbuf;         /* expandable buffer */
92 static int              literallen;             /* actual current length */
93 static int              literalalloc;   /* current allocated buffer size */
94
95 #define startlit()  (literalbuf[0] = '\0', literallen = 0)
96 static void addlit(char *ytext, int yleng);
97 static void addlitchar(unsigned char ychar);
98 static char *litbufdup(void);
99
100 #if 0
101 static int      lexer_errposition(void);
102 #endif
103 static void check_escape_warning(void);
104 static void check_string_escape_warning(unsigned char ychar);
105
106 extern char *yytext;
107
108 /*
109  * Each call to yylex must set yylloc to the location of the found token
110  * (expressed as a byte offset from the start of the input text).
111  * When we parse a token that requires multiple lexer rules to process,
112  * this should be done in the first such rule, else yylloc will point
113  * into the middle of the token.
114  */
115 #define SET_YYLLOC()  (yylloc = yytext - scanbuf)
116
117 /* Handles to the buffer that the lexer uses internally */
118 static YY_BUFFER_STATE scanbufhandle;
119 static char *scanbuf;
120
121 static unsigned char unescape_single_char(unsigned char c);
122 void yyerror(const char *s);
123
124 %}
125
126 %option 8bit
127 %option never-interactive
128 %option nodefault
129 %option nounput
130 %option noyywrap
131 %option prefix="base_yy"
132
133 /*
134  * OK, here is a short description of lex/flex rules behavior.
135  * The longest pattern which matches an input string is always chosen.
136  * For equal-length patterns, the first occurring in the rules list is chosen.
137  * INITIAL is the starting state, to which all non-conditional rules apply.
138  * Exclusive states change parsing rules while the state is active.  When in
139  * an exclusive state, only those rules defined for that state apply.
140  *
141  * We use exclusive states for quoted strings, extended comments,
142  * and to eliminate parsing troubles for numeric strings.
143  * Exclusive states:
144  *  <xb> bit string literal
145  *  <xc> extended C-style comments
146  *  <xd> delimited identifiers (double-quoted identifiers)
147  *  <xh> hexadecimal numeric string
148  *  <xq> standard quoted strings
149  *  <xe> extended quoted strings (support backslash escape sequences)
150  *  <xdolq> $foo$ quoted strings
151  */
152
153 %x xb
154 %x xc
155 %x xd
156 %x xh
157 %x xe
158 %x xq
159 %x xdolq
160
161 /*
162  * In order to make the world safe for Windows and Mac clients as well as
163  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
164  * sequence will be seen as two successive newlines, but that doesn't cause
165  * any problems.  Comments that start with -- and extend to the next
166  * newline are treated as equivalent to a single whitespace character.
167  *
168  * NOTE a fine point: if there is no newline following --, we will absorb
169  * everything to the end of the input as a comment.  This is correct.  Older
170  * versions of Postgres failed to recognize -- as a comment if the input
171  * did not end with a newline.
172  *
173  * XXX perhaps \f (formfeed) should be treated as a newline as well?
174  *
175  * XXX if you change the set of whitespace characters, fix scanner_isspace()
176  * to agree, and see also the plpgsql lexer.
177  */
178
179 space                   [ \t\n\r\f]
180 horiz_space             [ \t\f]
181 newline                 [\n\r]
182 non_newline             [^\n\r]
183
184 comment                 ("--"{non_newline}*)
185
186 whitespace              ({space}+|{comment})
187
188 /*
189  * SQL requires at least one newline in the whitespace separating
190  * string literals that are to be concatenated.  Silly, but who are we
191  * to argue?  Note that {whitespace_with_newline} should not have * after
192  * it, whereas {whitespace} should generally have a * after it...
193  */
194
195 special_whitespace              ({space}+|{comment}{newline})
196 horiz_whitespace                ({horiz_space}|{comment})
197 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
198
199 /*
200  * To ensure that {quotecontinue} can be scanned without having to back up
201  * if the full pattern isn't matched, we include trailing whitespace in
202  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
203  * except for {quote} followed by whitespace and just one "-" (not two,
204  * which would start a {comment}).  To cover that we have {quotefail}.
205  * The actions for {quotestop} and {quotefail} must throw back characters
206  * beyond the quote proper.
207  */
208 quote                   '
209 quotestop               {quote}{whitespace}*
210 quotecontinue   {quote}{whitespace_with_newline}{quote}
211 quotefail               {quote}{whitespace}*"-"
212
213 /* Bit string
214  * It is tempting to scan the string for only those characters
215  * which are allowed. However, this leads to silently swallowed
216  * characters if illegal characters are included in the string.
217  * For example, if xbinside is [01] then B'ABCD' is interpreted
218  * as a zero-length string, and the ABCD' is lost!
219  * Better to pass the string forward and let the input routines
220  * validate the contents.
221  */
222 xbstart                 [bB]{quote}
223 xbinside                [^']*
224
225 /* Hexadecimal number */
226 xhstart                 [xX]{quote}
227 xhinside                [^']*
228
229 /* National character */
230 xnstart                 [nN]{quote}
231
232 /* Quoted string that allows backslash escapes */
233 xestart                 [eE]{quote}
234 xeinside                [^\\']+
235 xeescape                [\\][^0-7]
236 xeoctesc                [\\][0-7]{1,3}
237 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
238
239 /* Extended quote
240  * xqdouble implements embedded quote, ''''
241  */
242 xqstart                 {quote}
243 xqdouble                {quote}{quote}
244 xqinside                [^']+
245
246 /* $foo$ style quotes ("dollar quoting")
247  * The quoted string starts with $foo$ where "foo" is an optional string
248  * in the form of an identifier, except that it may not contain "$", 
249  * and extends to the first occurrence of an identical string.  
250  * There is *no* processing of the quoted text.
251  *
252  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
253  * fails to match its trailing "$".
254  */
255 dolq_start              [A-Za-z\200-\377_]
256 dolq_cont               [A-Za-z\200-\377_0-9]
257 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
258 dolqfailed              \${dolq_start}{dolq_cont}*
259 dolqinside              [^$]+
260
261 /* Double quote
262  * Allows embedded spaces and other special characters into identifiers.
263  */
264 dquote                  \"
265 xdstart                 {dquote}
266 xdstop                  {dquote}
267 xddouble                {dquote}{dquote}
268 xdinside                [^"]+
269
270 /* C-style comments
271  *
272  * The "extended comment" syntax closely resembles allowable operator syntax.
273  * The tricky part here is to get lex to recognize a string starting with
274  * slash-star as a comment, when interpreting it as an operator would produce
275  * a longer match --- remember lex will prefer a longer match!  Also, if we
276  * have something like plus-slash-star, lex will think this is a 3-character
277  * operator whereas we want to see it as a + operator and a comment start.
278  * The solution is two-fold:
279  * 1. append {op_chars}* to xcstart so that it matches as much text as
280  *    {operator} would. Then the tie-breaker (first matching rule of same
281  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
282  *    in case it contains a star-slash that should terminate the comment.
283  * 2. In the operator rule, check for slash-star within the operator, and
284  *    if found throw it back with yyless().  This handles the plus-slash-star
285  *    problem.
286  * Dash-dash comments have similar interactions with the operator rule.
287  */
288 xcstart                 \/\*{op_chars}*
289 xcstop                  \*+\/
290 xcinside                [^*/]+
291
292 digit                   [0-9]
293 ident_start             [A-Za-z\200-\377_]
294 ident_cont              [A-Za-z\200-\377_0-9\$]
295
296 identifier              {ident_start}{ident_cont}*
297
298 typecast                "::"
299
300 /*
301  * "self" is the set of chars that should be returned as single-character
302  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
303  * which can be one or more characters long (but if a single-char token
304  * appears in the "self" set, it is not to be returned as an Op).  Note
305  * that the sets overlap, but each has some chars that are not in the other.
306  *
307  * If you change either set, adjust the character lists appearing in the
308  * rule for "operator"!
309  */
310 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
311 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
312 operator                {op_chars}+
313
314 /* we no longer allow unary minus in numbers. 
315  * instead we pass it separately to parser. there it gets
316  * coerced via doNegate() -- Leon aug 20 1999
317  *
318  * {realfail1} and {realfail2} are added to prevent the need for scanner
319  * backup when the {real} rule fails to match completely.
320  */
321
322 integer                 {digit}+
323 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
324 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
325 realfail1               ({integer}|{decimal})[Ee]
326 realfail2               ({integer}|{decimal})[Ee][-+]
327
328 param                   \${integer}
329
330 other                   .
331
332 /*
333  * Dollar quoted strings are totally opaque, and no escaping is done on them.
334  * Other quoted strings must allow some special characters such as single-quote
335  *  and newline.
336  * Embedded single-quotes are implemented both in the SQL standard
337  *  style of two adjacent single quotes "''" and in the Postgres/Java style
338  *  of escaped-quote "\'".
339  * Other embedded escaped characters are matched explicitly and the leading
340  *  backslash is dropped from the string.
341  * Note that xcstart must appear before operator, as explained above!
342  *  Also whitespace (comment) must appear before operator.
343  */
344
345 %%
346
347 {whitespace}    {
348                                         /* ignore */
349                                 }
350
351 {xcstart}               {
352                                         /* Set location in case of syntax error in comment */
353                                         SET_YYLLOC();
354                                         xcdepth = 0;
355                                         BEGIN(xc);
356                                         /* Put back any characters past slash-star; see above */
357                                         yyless(2);
358                                 }
359
360 <xc>{xcstart}   {
361                                         xcdepth++;
362                                         /* Put back any characters past slash-star; see above */
363                                         yyless(2);
364                                 }
365
366 <xc>{xcstop}    {
367                                         if (xcdepth <= 0)
368                                                 BEGIN(INITIAL);
369                                         else
370                                                 xcdepth--;
371                                 }
372
373 <xc>{xcinside}  {
374                                         /* ignore */
375                                 }
376
377 <xc>{op_chars}  {
378                                         /* ignore */
379                                 }
380
381 <xc>\*+                 {
382                                         /* ignore */
383                                 }
384
385 <xc><<EOF>>             { yyerror("unterminated /* comment"); }
386
387 {xbstart}               {
388                                         /* Binary bit type.
389                                          * At some point we should simply pass the string
390                                          * forward to the parser and label it there.
391                                          * In the meantime, place a leading "b" on the string
392                                          * to mark it for the input routine as a binary string.
393                                          */
394                                         SET_YYLLOC();
395                                         BEGIN(xb);
396                                         startlit();
397                                         addlitchar('b');
398                                 }
399 <xb>{quotestop} |
400 <xb>{quotefail} {
401                                         yyless(1);
402                                         BEGIN(INITIAL);
403                                         yylval.str = litbufdup();
404                                         return BCONST;
405                                 }
406 <xh>{xhinside}  |
407 <xb>{xbinside}  {
408                                         addlit(yytext, yyleng);
409                                 }
410 <xh>{quotecontinue}     |
411 <xb>{quotecontinue}     {
412                                         /* ignore */
413                                 }
414 <xb><<EOF>>             { yyerror("unterminated bit string literal"); }
415
416 {xhstart}               {
417                                         /* Hexadecimal bit type.
418                                          * At some point we should simply pass the string
419                                          * forward to the parser and label it there.
420                                          * In the meantime, place a leading "x" on the string
421                                          * to mark it for the input routine as a hex string.
422                                          */
423                                         SET_YYLLOC();
424                                         BEGIN(xh);
425                                         startlit();
426                                         addlitchar('x');
427                                 }
428 <xh>{quotestop} |
429 <xh>{quotefail} {
430                                         yyless(1);
431                                         BEGIN(INITIAL);
432                                         yylval.str = litbufdup();
433                                         return XCONST;
434                                 }
435 <xh><<EOF>>             { yyerror("unterminated hexadecimal string literal"); }
436
437 {xnstart}               {
438                                         /* National character.
439                                          * We will pass this along as a normal character string,
440                                          * but preceded with an internally-generated "NCHAR".
441                                          */
442                                         const ScanKeyword *keyword;
443
444                                         SET_YYLLOC();
445                                         yyless(1);                              /* eat only 'n' this time */
446                                         /* nchar had better be a keyword! */
447                                         keyword = ScanKeywordLookup("nchar");
448                                         yylval.keyword = keyword->name;
449                                         return keyword->value;
450                                 }
451
452 {xqstart}               {
453                                         warn_on_first_escape = true;
454                                         saw_high_bit = false;
455                                         SET_YYLLOC();
456                                         if (standard_conforming_strings)
457                                                 BEGIN(xq);
458                                         else
459                                                 BEGIN(xe);
460                                         startlit();
461                                 }
462 {xestart}               {
463                                         warn_on_first_escape = false;
464                                         saw_high_bit = false;
465                                         SET_YYLLOC();
466                                         BEGIN(xe);
467                                         startlit();
468                                 }
469 <xq,xe>{quotestop}      |
470 <xq,xe>{quotefail} {
471                                         yyless(1);
472                                         BEGIN(INITIAL);
473                                         /* check that the data remains valid if it might have been
474                                          * made invalid by unescaping any chars.
475                                          */
476                                         if (saw_high_bit);
477 /*                                              pg_verifymbstr(literalbuf, literallen, false);*/
478                                         yylval.str = litbufdup();
479                                         return SCONST;
480                                 }
481 <xq,xe>{xqdouble} {
482                                         addlitchar('\'');
483                                 }
484 <xq>{xqinside}  {
485                                         addlit(yytext, yyleng);
486                                 }
487 <xe>{xeinside}  {
488                                         addlit(yytext, yyleng);
489                                 }
490 <xe>{xeescape}  {
491                                         if (yytext[1] == '\'')
492                                         {
493 #if 0
494                                                 if (backslash_quote == BACKSLASH_QUOTE_OFF ||
495                                                         (backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
496                                                          PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
497                                                         ereport(ERROR,
498                                                                         (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
499                                                                          errmsg("unsafe use of \\' in a string literal"),
500                                                                          errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
501                                                                          lexer_errposition()));
502 #endif
503                                         }
504                                         check_string_escape_warning(yytext[1]);
505                                         addlitchar(unescape_single_char(yytext[1]));
506                                 }
507 <xe>{xeoctesc}  {
508                                         unsigned char c = strtoul(yytext+1, NULL, 8);
509
510                                         check_escape_warning();
511                                         addlitchar(c);
512                                         if (IS_HIGHBIT_SET(c))
513                                                 saw_high_bit = true;
514                                 }
515 <xe>{xehexesc}  {
516                                         unsigned char c = strtoul(yytext+2, NULL, 16);
517
518                                         check_escape_warning();
519                                         addlitchar(c);
520                                         if (IS_HIGHBIT_SET(c))
521                                                 saw_high_bit = true;
522                                 }
523 <xq,xe>{quotecontinue} {
524                                         /* ignore */
525                                 }
526 <xe>.                   {
527                                         /* This is only needed for \ just before EOF */
528                                         addlitchar(yytext[0]);
529                                 }
530 <xq,xe><<EOF>>          { yyerror("unterminated quoted string"); }
531
532 {dolqdelim}             {
533                                         SET_YYLLOC();
534                                         dolqstart = pstrdup(yytext);
535                                         BEGIN(xdolq);
536                                         startlit();
537                                 }
538 {dolqfailed}    {
539                                         /* throw back all but the initial "$" */
540                                         yyless(1);
541                                         /* and treat it as {other} */
542                                         return yytext[0];
543                                 }
544 <xdolq>{dolqdelim} {
545                                         if (strcmp(yytext, dolqstart) == 0)
546                                         {
547                                                 pfree(dolqstart);
548                                                 BEGIN(INITIAL);
549                                                 yylval.str = litbufdup();
550                                                 return SCONST;
551                                         }
552                                         else
553                                         {
554                                                 /*
555                                                  * When we fail to match $...$ to dolqstart, transfer
556                                                  * the $... part to the output, but put back the final
557                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
558                                                  */
559                                                 addlit(yytext, yyleng-1);
560                                                 yyless(yyleng-1);
561                                         }
562                                 }
563 <xdolq>{dolqinside} {
564                                         addlit(yytext, yyleng);
565                                 }
566 <xdolq>{dolqfailed} {
567                                         addlit(yytext, yyleng);
568                                 }
569 <xdolq>.                {
570                                         /* This is only needed for $ inside the quoted text */
571                                         addlitchar(yytext[0]);
572                                 }
573 <xdolq><<EOF>>  { yyerror("unterminated dollar-quoted string"); }
574
575 {xdstart}               {
576                                         SET_YYLLOC();
577                                         BEGIN(xd);
578                                         startlit();
579                                 }
580 <xd>{xdstop}    {
581                                         char               *ident;
582
583                                         BEGIN(INITIAL);
584                                         if (literallen == 0)
585                                                 yyerror("zero-length delimited identifier");
586                                         ident = litbufdup();
587                                         if (literallen >= NAMEDATALEN)
588                                                 truncate_identifier(ident, literallen, true);
589                                         yylval.str = ident;
590                                         return IDENT;
591                                 }
592 <xd>{xddouble}  {
593                                         addlitchar('"');
594                                 }
595 <xd>{xdinside}  {
596                                         addlit(yytext, yyleng);
597                                 }
598 <xd><<EOF>>             { yyerror("unterminated quoted identifier"); }
599
600 {typecast}              {
601                                         SET_YYLLOC();
602                                         return TYPECAST;
603                                 }
604
605 {self}                  {
606                                         SET_YYLLOC();
607                                         return yytext[0];
608                                 }
609
610 {operator}              {
611                                         /*
612                                          * Check for embedded slash-star or dash-dash; those
613                                          * are comment starts, so operator must stop there.
614                                          * Note that slash-star or dash-dash at the first
615                                          * character will match a prior rule, not this one.
616                                          */
617                                         int             nchars = yyleng;
618                                         char   *slashstar = strstr(yytext, "/*");
619                                         char   *dashdash = strstr(yytext, "--");
620
621                                         if (slashstar && dashdash)
622                                         {
623                                                 /* if both appear, take the first one */
624                                                 if (slashstar > dashdash)
625                                                         slashstar = dashdash;
626                                         }
627                                         else if (!slashstar)
628                                                 slashstar = dashdash;
629                                         if (slashstar)
630                                                 nchars = slashstar - yytext;
631
632                                         /*
633                                          * For SQL compatibility, '+' and '-' cannot be the
634                                          * last char of a multi-char operator unless the operator
635                                          * contains chars that are not in SQL operators.
636                                          * The idea is to lex '=-' as two operators, but not
637                                          * to forbid operator names like '?-' that could not be
638                                          * sequences of SQL operators.
639                                          */
640                                         while (nchars > 1 &&
641                                                    (yytext[nchars-1] == '+' ||
642                                                         yytext[nchars-1] == '-'))
643                                         {
644                                                 int             ic;
645
646                                                 for (ic = nchars-2; ic >= 0; ic--)
647                                                 {
648                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
649                                                                 break;
650                                                 }
651                                                 if (ic >= 0)
652                                                         break; /* found a char that makes it OK */
653                                                 nchars--; /* else remove the +/-, and check again */
654                                         }
655
656                                         SET_YYLLOC();
657
658                                         if (nchars < yyleng)
659                                         {
660                                                 /* Strip the unwanted chars from the token */
661                                                 yyless(nchars);
662                                                 /*
663                                                  * If what we have left is only one char, and it's
664                                                  * one of the characters matching "self", then
665                                                  * return it as a character token the same way
666                                                  * that the "self" rule would have.
667                                                  */
668                                                 if (nchars == 1 &&
669                                                         strchr(",()[].;:+-*/%^<>=", yytext[0]))
670                                                         return yytext[0];
671                                         }
672
673                                         /*
674                                          * Complain if operator is too long.  Unlike the case
675                                          * for identifiers, we make this an error not a notice-
676                                          * and-truncate, because the odds are we are looking at
677                                          * a syntactic mistake anyway.
678                                          */
679                                         if (nchars >= NAMEDATALEN)
680                                                 yyerror("operator too long");
681
682                                         /* Convert "!=" operator to "<>" for compatibility */
683                                         if (strcmp(yytext, "!=") == 0)
684                                                 yylval.str = pstrdup("<>");
685                                         else
686                                                 yylval.str = pstrdup(yytext);
687                                         return Op;
688                                 }
689
690 {param}                 {
691                                         SET_YYLLOC();
692                                         yylval.ival = atol(yytext + 1);
693                                         return PARAM;
694                                 }
695
696 {integer}               {
697                                         long val;
698                                         char* endptr;
699
700                                         SET_YYLLOC();
701                                         errno = 0;
702                                         val = strtol(yytext, &endptr, 10);
703                                         if (*endptr != '\0' || errno == ERANGE
704 #ifdef HAVE_LONG_INT_64
705                                                 /* if long > 32 bits, check for overflow of int4 */
706                                                 || val != (long) ((int32) val)
707 #endif
708                                                 )
709                                         {
710                                                 /* integer too large, treat it as a float */
711                                                 yylval.str = pstrdup(yytext);
712                                                 return FCONST;
713                                         }
714                                         yylval.ival = val;
715                                         return ICONST;
716                                 }
717 {decimal}               {
718                                         SET_YYLLOC();
719                                         yylval.str = pstrdup(yytext);
720                                         return FCONST;
721                                 }
722 {real}                  {
723                                         SET_YYLLOC();
724                                         yylval.str = pstrdup(yytext);
725                                         return FCONST;
726                                 }
727 {realfail1}             {
728                                         /*
729                                          * throw back the [Ee], and treat as {decimal}.  Note
730                                          * that it is possible the input is actually {integer},
731                                          * but since this case will almost certainly lead to a
732                                          * syntax error anyway, we don't bother to distinguish.
733                                          */
734                                         yyless(yyleng-1);
735                                         SET_YYLLOC();
736                                         yylval.str = pstrdup(yytext);
737                                         return FCONST;
738                                 }
739 {realfail2}             {
740                                         /* throw back the [Ee][+-], and proceed as above */
741                                         yyless(yyleng-2);
742                                         SET_YYLLOC();
743                                         yylval.str = pstrdup(yytext);
744                                         return FCONST;
745                                 }
746
747
748 {identifier}    {
749                                         const ScanKeyword *keyword;
750                                         char               *ident;
751
752                                         SET_YYLLOC();
753
754                                         /* Is it a keyword? */
755                                         keyword = ScanKeywordLookup(yytext);
756                                         if (keyword != NULL)
757                                         {
758                                                 yylval.keyword = keyword->name;
759                                                 return keyword->value;
760                                         }
761
762                                         /*
763                                          * No.  Convert the identifier to lower case, and truncate
764                                          * if necessary.
765                                          */
766                                         ident = downcase_truncate_identifier(yytext, yyleng, true);
767                                         yylval.str = ident;
768                                         return IDENT;
769                                 }
770
771 {other}                 {
772                                         SET_YYLLOC();
773                                         return yytext[0];
774                                 }
775
776 <<EOF>>                 {
777                                         SET_YYLLOC();
778                                         yyterminate();
779                                 }
780
781 %%
782
783 /*
784  * lexer_errposition
785  *              Report a lexical-analysis-time cursor position, if possible.
786  *
787  * This is expected to be used within an ereport() call.  The return value
788  * is a dummy (always 0, in fact).
789  *
790  * Note that this can only be used for messages from the lexer itself,
791  * since it depends on scanbuf to still be valid.
792  */
793 #if 0
794 static int
795 lexer_errposition(void)
796 {
797         int             pos;
798
799         /* Convert byte offset to character number */
800         pos = pg_mbstrlen_with_len(scanbuf, yylloc) + 1;
801         /* And pass it to the ereport mechanism */
802         return errposition(pos);
803 }
804 #endif
805
806 /*
807  * yyerror
808  *              Report a lexer or grammar error.
809  *
810  * The message's cursor position identifies the most recently lexed token.
811  * This is OK for syntax error messages from the Bison parser, because Bison
812  * parsers report error as soon as the first unparsable token is reached.
813  * Beware of using yyerror for other purposes, as the cursor position might
814  * be misleading!
815  */
816 void
817 yyerror(const char *message)
818 {
819         longjmp(jmpbuffer, 1);
820 }
821
822
823 /*
824  * Called before any actual parsing is done
825  */
826 void
827 scanner_init(const char *str)
828 {
829         int     slen = strlen(str);
830
831         /*
832          * Might be left over after ereport()
833          */
834         if (YY_CURRENT_BUFFER)
835                 yy_delete_buffer(YY_CURRENT_BUFFER);
836
837         /*
838          * Make a scan buffer with special termination needed by flex.
839          */
840         scanbuf = palloc(slen + 2);
841         memcpy(scanbuf, str, slen);
842         scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
843         scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
844
845         /* initialize literal buffer to a reasonable but expansible size */
846         literalalloc = 1024;
847         literalbuf = (char *) palloc(literalalloc);
848         startlit();
849
850         BEGIN(INITIAL);
851 }
852
853
854 /*
855  * Called after parsing is done to clean up after scanner_init()
856  */
857 void
858 scanner_finish(void)
859 {
860         yy_delete_buffer(scanbufhandle);
861         pfree(scanbuf);
862 }
863
864
865 static void
866 addlit(char *ytext, int yleng)
867 {
868         /* enlarge buffer if needed */
869         if ((literallen+yleng) >= literalalloc)
870         {
871                 do {
872                         literalalloc *= 2;
873                 } while ((literallen+yleng) >= literalalloc);
874                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
875         }
876         /* append new data, add trailing null */
877         memcpy(literalbuf+literallen, ytext, yleng);
878         literallen += yleng;
879         literalbuf[literallen] = '\0';
880 }
881
882
883 static void
884 addlitchar(unsigned char ychar)
885 {
886         /* enlarge buffer if needed */
887         if ((literallen+1) >= literalalloc)
888         {
889                 literalalloc *= 2;
890                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
891         }
892         /* append new data, add trailing null */
893         literalbuf[literallen] = ychar;
894         literallen += 1;
895         literalbuf[literallen] = '\0';
896 }
897
898
899 /*
900  * One might be tempted to write pstrdup(literalbuf) instead of this,
901  * but for long literals this is much faster because the length is
902  * already known.
903  */
904 static char *
905 litbufdup(void)
906 {
907         char *new;
908
909         new = palloc(literallen + 1);
910         memcpy(new, literalbuf, literallen+1);
911         return new;
912 }
913
914
915 static unsigned char
916 unescape_single_char(unsigned char c)
917 {
918         /* Normally we wouldn't expect to see \n where n has its high bit set
919          * but we set the flag to check the string if we do get it, so
920          * that this doesn't become a way of getting around the coding validity
921          * checks.
922          */
923         if (IS_HIGHBIT_SET(c))
924                 saw_high_bit = true;
925
926         switch (c)
927         {
928                 case 'b':
929                         return '\b';
930                 case 'f':
931                         return '\f';
932                 case 'n':
933                         return '\n';
934                 case 'r':
935                         return '\r';
936                 case 't':
937                         return '\t';
938                 default:
939                         return c;
940         }
941 }
942
943 static void
944 check_string_escape_warning(unsigned char ychar)
945 {
946 #if 0
947         if (ychar == '\'')
948         {
949                 if (warn_on_first_escape && escape_string_warning)
950                         ereport(WARNING,
951                                         (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
952                                          errmsg("nonstandard use of \\' in a string literal"),
953                                          errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
954                                          lexer_errposition()));
955                 warn_on_first_escape = false;   /* warn only once per string */
956         }
957         else if (ychar == '\\')
958         {
959                 if (warn_on_first_escape && escape_string_warning)
960                         ereport(WARNING,
961                                         (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
962                                          errmsg("nonstandard use of \\\\ in a string literal"),
963                                          errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
964                                          lexer_errposition()));
965                 warn_on_first_escape = false;   /* warn only once per string */
966         }
967         else
968                 check_escape_warning();
969 #endif
970 }
971
972 static void
973 check_escape_warning(void)
974 {
975 #if 0
976         if (warn_on_first_escape && escape_string_warning)
977                 ereport(WARNING,
978                                 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
979                                  errmsg("nonstandard use of escape in a string literal"),
980                                  errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
981                                  lexer_errposition()));
982         warn_on_first_escape = false;   /* warn only once per string */
983 #endif
984 }
985
986 /*
987  * downcase_truncate_identifier() --- do appropriate downcasing and
988  * truncation of an unquoted identifier.  Optionally warn of truncation.
989  *
990  * Returns a palloc'd string containing the adjusted identifier.
991  *
992  * Note: in some usages the passed string is not null-terminated.
993  *
994  * Note: the API of this function is designed to allow for downcasing
995  * transformations that increase the string length, but we don't yet
996  * support that.  If you want to implement it, you'll need to fix
997  * SplitIdentifierString() in utils/adt/varlena.c.
998  */
999 char *
1000 downcase_truncate_identifier(const char *ident, int len, int warn)
1001 {
1002         char       *result;
1003         int                     i;
1004
1005         result = palloc(len + 1);
1006
1007         /*
1008          * SQL99 specifies Unicode-aware case normalization, which we don't yet
1009          * have the infrastructure for.  Instead we use tolower() to provide a
1010          * locale-aware translation.  However, there are some locales where this
1011          * is not right either (eg, Turkish may do strange things with 'i' and
1012          * 'I').  Our current compromise is to use tolower() for characters with
1013          * the high bit set, and use an ASCII-only downcasing for 7-bit
1014          * characters.
1015          */
1016         for (i = 0; i < len; i++)
1017         {
1018                 unsigned char ch = (unsigned char) ident[i];
1019
1020                 if (ch >= 'A' && ch <= 'Z')
1021                         ch += 'a' - 'A';
1022                 else if (ch >= 0x80 && isupper(ch))
1023                         ch = tolower(ch);
1024                 result[i] = (char) ch;
1025         }
1026         result[i] = '\0';
1027
1028         if (i >= NAMEDATALEN)
1029                 truncate_identifier(result, i, warn);
1030
1031         return result;
1032 }
1033
1034 /*
1035  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
1036  *
1037  * The given string is modified in-place, if necessary.  A warning is
1038  * issued if requested.
1039  *
1040  * We require the caller to pass in the string length since this saves a
1041  * strlen() call in some common usages.
1042  */
1043 void
1044 truncate_identifier(char *ident, int len, int warn)
1045 {
1046         if (len >= NAMEDATALEN)
1047         {
1048                 len = strlen(ident); /*pg_mbcliplen(ident, len, NAMEDATALEN - 1);*/
1049 #if 0
1050                 if (warn)
1051                         ereport(NOTICE,
1052                                         (errcode(ERRCODE_NAME_TOO_LONG),
1053                                          errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
1054                                                         ident, len, ident)));
1055 #endif
1056                 ident[len] = '\0';
1057         }
1058 }