git.8kb.co.uk Git - pgpool-ii/pgpool-ii_2.2.5/blob - parser/scan.l

   1 %{
   2 /*-------------------------------------------------------------------------
   3  *
   4  * scan.l
   5  *        lexical scanner for PostgreSQL
   6  *
   7  * NOTE NOTE NOTE:
   8  *
   9  * The rules in this file must be kept in sync with psql's lexer!!!
  10  *
  11  * The rules are designed so that the scanner never has to backtrack,
  12  * in the sense that there is always a rule that can match the input
  13  * consumed so far (the rule action may internally throw back some input
  14  * with yyless(), however).  As explained in the flex manual, this makes
  15  * for a useful speed increase --- about a third faster than a plain -CF
  16  * lexer, in simple testing.  The extra complexity is mostly in the rules
  17  * for handling float numbers and continued string literals.  If you change
  18  * the lexical rules, verify that you haven't broken the no-backtrack
  19  * property by running flex with the "-b" option and checking that the
  20  * resulting "lex.backup" file says that no backing up is needed.
  21  *
  22  *
  23  * Portions Copyright (c) 2003-2008, PgPool Global Development Group
  24  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  25  * Portions Copyright (c) 1994, Regents of the University of California
  26  *
  27  * IDENTIFICATION
  28  *        $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.141 2007/09/12 20:49:27 adunstan Exp $
  29  *
  30  *-------------------------------------------------------------------------
  31  */
  32 #include "pool_parser.h"
  33
  34 #include <ctype.h>
  35 #include <unistd.h>
  36 #include <errno.h>
  37 #include <string.h>
  38 #include <setjmp.h>
  39
  40 #ifndef ereport
  41 #define ereport(a,b) yyerror("")
  42 #endif
  43 #define IS_HIGHBIT_SET(c) 0
  44
  45 #include "gramparse.h"
  46 #include "keywords.h"
  47 /* Not needed now that this file is compiled as part of gram.y */
  48 /* #include "parser/parse.h" */
  49 #include "gram.h"
  50 #include "scansup.h"
  51
  52 #include "pool_memory.h"
  53
  54
  55 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
  56 /*
  57 #undef fprintf
  58 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
  59 */
  60
  61 static int              xcdepth = 0;    /* depth of nesting in slash-star comments */
  62 static char    *dolqstart;      /* current $foo$ quote start string */
  63
  64 /*
  65  * GUC variables.  This is a DIRECT violation of the warning given at the
  66  * head of gram.y, ie flex/bison code must not depend on any GUC variables;
  67  * as such, changing their values can induce very unintuitive behavior.
  68  * But we shall have to live with it as a short-term thing until the switch
  69  * to SQL-standard string syntax is complete.
  70  */
  71 typedef enum
  72 {
  73         BACKSLASH_QUOTE_OFF,
  74         BACKSLASH_QUOTE_ON,
  75         BACKSLASH_QUOTE_SAFE_ENCODING
  76 } BackslashQuoteType;
  77
  78 BackslashQuoteType backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
  79 int                     escape_string_warning = true;
  80 int                     standard_conforming_strings = false;
  81
  82 static bool             warn_on_first_escape;
  83 static bool     saw_high_bit = false;
  84
  85 /*
  86  * literalbuf is used to accumulate literal values when multiple rules
  87  * are needed to parse a single literal.  Call startlit to reset buffer
  88  * to empty, addlit to add text.  Note that the buffer is palloc'd and
  89  * starts life afresh on every parse cycle.
  90  */
  91 static char        *literalbuf;         /* expandable buffer */
  92 static int              literallen;             /* actual current length */
  93 static int              literalalloc;   /* current allocated buffer size */
  94
  95 #define startlit()  (literalbuf[0] = '\0', literallen = 0)
  96 static void addlit(char *ytext, int yleng);
  97 static void addlitchar(unsigned char ychar);
  98 static char *litbufdup(void);
  99
 100 #if 0
 101 static int      lexer_errposition(void);
 102 #endif
 103 static void check_escape_warning(void);
 104 static void check_string_escape_warning(unsigned char ychar);
 105
 106 extern char *yytext;
 107
 108 /*
 109  * Each call to yylex must set yylloc to the location of the found token
 110  * (expressed as a byte offset from the start of the input text).
 111  * When we parse a token that requires multiple lexer rules to process,
 112  * this should be done in the first such rule, else yylloc will point
 113  * into the middle of the token.
 114  */
 115 #define SET_YYLLOC()  (yylloc = yytext - scanbuf)
 116
 117 /* Handles to the buffer that the lexer uses internally */
 118 static YY_BUFFER_STATE scanbufhandle;
 119 static char *scanbuf;
 120
 121 static unsigned char unescape_single_char(unsigned char c);
 122 void yyerror(const char *s);
 123
 124 %}
 125
 126 %option 8bit
 127 %option never-interactive
 128 %option nodefault
 129 %option nounput
 130 %option noyywrap
 131 %option prefix="base_yy"
 132
 133 /*
 134  * OK, here is a short description of lex/flex rules behavior.
 135  * The longest pattern which matches an input string is always chosen.
 136  * For equal-length patterns, the first occurring in the rules list is chosen.
 137  * INITIAL is the starting state, to which all non-conditional rules apply.
 138  * Exclusive states change parsing rules while the state is active.  When in
 139  * an exclusive state, only those rules defined for that state apply.
 140  *
 141  * We use exclusive states for quoted strings, extended comments,
 142  * and to eliminate parsing troubles for numeric strings.
 143  * Exclusive states:
 144  *  <xb> bit string literal
 145  *  <xc> extended C-style comments
 146  *  <xd> delimited identifiers (double-quoted identifiers)
 147  *  <xh> hexadecimal numeric string
 148  *  <xq> standard quoted strings
 149  *  <xe> extended quoted strings (support backslash escape sequences)
 150  *  <xdolq> $foo$ quoted strings
 151  */
 152
 153 %x xb
 154 %x xc
 155 %x xd
 156 %x xh
 157 %x xe
 158 %x xq
 159 %x xdolq
 160
 161 /*
 162  * In order to make the world safe for Windows and Mac clients as well as
 163  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 164  * sequence will be seen as two successive newlines, but that doesn't cause
 165  * any problems.  Comments that start with -- and extend to the next
 166  * newline are treated as equivalent to a single whitespace character.
 167  *
 168  * NOTE a fine point: if there is no newline following --, we will absorb
 169  * everything to the end of the input as a comment.  This is correct.  Older
 170  * versions of Postgres failed to recognize -- as a comment if the input
 171  * did not end with a newline.
 172  *
 173  * XXX perhaps \f (formfeed) should be treated as a newline as well?
 174  *
 175  * XXX if you change the set of whitespace characters, fix scanner_isspace()
 176  * to agree, and see also the plpgsql lexer.
 177  */
 178
 179 space                   [ \t\n\r\f]
 180 horiz_space             [ \t\f]
 181 newline                 [\n\r]
 182 non_newline             [^\n\r]
 183
 184 comment                 ("--"{non_newline}*)
 185
 186 whitespace              ({space}+|{comment})
 187
 188 /*
 189  * SQL requires at least one newline in the whitespace separating
 190  * string literals that are to be concatenated.  Silly, but who are we
 191  * to argue?  Note that {whitespace_with_newline} should not have * after
 192  * it, whereas {whitespace} should generally have a * after it...
 193  */
 194
 195 special_whitespace              ({space}+|{comment}{newline})
 196 horiz_whitespace                ({horiz_space}|{comment})
 197 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
 198
 199 /*
 200  * To ensure that {quotecontinue} can be scanned without having to back up
 201  * if the full pattern isn't matched, we include trailing whitespace in
 202  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 203  * except for {quote} followed by whitespace and just one "-" (not two,
 204  * which would start a {comment}).  To cover that we have {quotefail}.
 205  * The actions for {quotestop} and {quotefail} must throw back characters
 206  * beyond the quote proper.
 207  */
 208 quote                   '
 209 quotestop               {quote}{whitespace}*
 210 quotecontinue   {quote}{whitespace_with_newline}{quote}
 211 quotefail               {quote}{whitespace}*"-"
 212
 213 /* Bit string
 214  * It is tempting to scan the string for only those characters
 215  * which are allowed. However, this leads to silently swallowed
 216  * characters if illegal characters are included in the string.
 217  * For example, if xbinside is [01] then B'ABCD' is interpreted
 218  * as a zero-length string, and the ABCD' is lost!
 219  * Better to pass the string forward and let the input routines
 220  * validate the contents.
 221  */
 222 xbstart                 [bB]{quote}
 223 xbinside                [^']*
 224
 225 /* Hexadecimal number */
 226 xhstart                 [xX]{quote}
 227 xhinside                [^']*
 228
 229 /* National character */
 230 xnstart                 [nN]{quote}
 231
 232 /* Quoted string that allows backslash escapes */
 233 xestart                 [eE]{quote}
 234 xeinside                [^\\']+
 235 xeescape                [\\][^0-7]
 236 xeoctesc                [\\][0-7]{1,3}
 237 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
 238
 239 /* Extended quote
 240  * xqdouble implements embedded quote, ''''
 241  */
 242 xqstart                 {quote}
 243 xqdouble                {quote}{quote}
 244 xqinside                [^']+
 245
 246 /* $foo$ style quotes ("dollar quoting")
 247  * The quoted string starts with $foo$ where "foo" is an optional string
 248  * in the form of an identifier, except that it may not contain "$",
 249  * and extends to the first occurrence of an identical string.
 250  * There is *no* processing of the quoted text.
 251  *
 252  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 253  * fails to match its trailing "$".
 254  */
 255 dolq_start              [A-Za-z\200-\377_]
 256 dolq_cont               [A-Za-z\200-\377_0-9]
 257 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
 258 dolqfailed              \${dolq_start}{dolq_cont}*
 259 dolqinside              [^$]+
 260
 261 /* Double quote
 262  * Allows embedded spaces and other special characters into identifiers.
 263  */
 264 dquote                  \"
 265 xdstart                 {dquote}
 266 xdstop                  {dquote}
 267 xddouble                {dquote}{dquote}
 268 xdinside                [^"]+
 269
 270 /* C-style comments
 271  *
 272  * The "extended comment" syntax closely resembles allowable operator syntax.
 273  * The tricky part here is to get lex to recognize a string starting with
 274  * slash-star as a comment, when interpreting it as an operator would produce
 275  * a longer match --- remember lex will prefer a longer match!  Also, if we
 276  * have something like plus-slash-star, lex will think this is a 3-character
 277  * operator whereas we want to see it as a + operator and a comment start.
 278  * The solution is two-fold:
 279  * 1. append {op_chars}* to xcstart so that it matches as much text as
 280  *    {operator} would. Then the tie-breaker (first matching rule of same
 281  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 282  *    in case it contains a star-slash that should terminate the comment.
 283  * 2. In the operator rule, check for slash-star within the operator, and
 284  *    if found throw it back with yyless().  This handles the plus-slash-star
 285  *    problem.
 286  * Dash-dash comments have similar interactions with the operator rule.
 287  */
 288 xcstart                 \/\*{op_chars}*
 289 xcstop                  \*+\/
 290 xcinside                [^*/]+
 291
 292 digit                   [0-9]
 293 ident_start             [A-Za-z\200-\377_]
 294 ident_cont              [A-Za-z\200-\377_0-9\$]
 295
 296 identifier              {ident_start}{ident_cont}*
 297
 298 typecast                "::"
 299
 300 /*
 301  * "self" is the set of chars that should be returned as single-character
 302  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 303  * which can be one or more characters long (but if a single-char token
 304  * appears in the "self" set, it is not to be returned as an Op).  Note
 305  * that the sets overlap, but each has some chars that are not in the other.
 306  *
 307  * If you change either set, adjust the character lists appearing in the
 308  * rule for "operator"!
 309  */
 310 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 311 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 312 operator                {op_chars}+
 313
 314 /* we no longer allow unary minus in numbers.
 315  * instead we pass it separately to parser. there it gets
 316  * coerced via doNegate() -- Leon aug 20 1999
 317  *
 318  * {realfail1} and {realfail2} are added to prevent the need for scanner
 319  * backup when the {real} rule fails to match completely.
 320  */
 321
 322 integer                 {digit}+
 323 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 324 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
 325 realfail1               ({integer}|{decimal})[Ee]
 326 realfail2               ({integer}|{decimal})[Ee][-+]
 327
 328 param                   \${integer}
 329
 330 other                   .
 331
 332 /*
 333  * Dollar quoted strings are totally opaque, and no escaping is done on them.
 334  * Other quoted strings must allow some special characters such as single-quote
 335  *  and newline.
 336  * Embedded single-quotes are implemented both in the SQL standard
 337  *  style of two adjacent single quotes "''" and in the Postgres/Java style
 338  *  of escaped-quote "\'".
 339  * Other embedded escaped characters are matched explicitly and the leading
 340  *  backslash is dropped from the string.
 341  * Note that xcstart must appear before operator, as explained above!
 342  *  Also whitespace (comment) must appear before operator.
 343  */
 344
 345 %%
 346
 347 {whitespace}    {
 348                                         /* ignore */
 349                                 }
 350
 351 {xcstart}               {
 352                                         /* Set location in case of syntax error in comment */
 353                                         SET_YYLLOC();
 354                                         xcdepth = 0;
 355                                         BEGIN(xc);
 356                                         /* Put back any characters past slash-star; see above */
 357                                         yyless(2);
 358                                 }
 359
 360 <xc>{xcstart}   {
 361                                         xcdepth++;
 362                                         /* Put back any characters past slash-star; see above */
 363                                         yyless(2);
 364                                 }
 365
 366 <xc>{xcstop}    {
 367                                         if (xcdepth <= 0)
 368                                                 BEGIN(INITIAL);
 369                                         else
 370                                                 xcdepth--;
 371                                 }
 372
 373 <xc>{xcinside}  {
 374                                         /* ignore */
 375                                 }
 376
 377 <xc>{op_chars}  {
 378                                         /* ignore */
 379                                 }
 380
 381 <xc>\*+                 {
 382                                         /* ignore */
 383                                 }
 384
 385 <xc><<EOF>>             { yyerror("unterminated /* comment"); }
 386
 387 {xbstart}               {
 388                                         /* Binary bit type.
 389                                          * At some point we should simply pass the string
 390                                          * forward to the parser and label it there.
 391                                          * In the meantime, place a leading "b" on the string
 392                                          * to mark it for the input routine as a binary string.
 393                                          */
 394                                         SET_YYLLOC();
 395                                         BEGIN(xb);
 396                                         startlit();
 397                                         addlitchar('b');
 398                                 }
 399 <xb>{quotestop} |
 400 <xb>{quotefail} {
 401                                         yyless(1);
 402                                         BEGIN(INITIAL);
 403                                         yylval.str = litbufdup();
 404                                         return BCONST;
 405                                 }
 406 <xh>{xhinside}  |
 407 <xb>{xbinside}  {
 408                                         addlit(yytext, yyleng);
 409                                 }
 410 <xh>{quotecontinue}     |
 411 <xb>{quotecontinue}     {
 412                                         /* ignore */
 413                                 }
 414 <xb><<EOF>>             { yyerror("unterminated bit string literal"); }
 415
 416 {xhstart}               {
 417                                         /* Hexadecimal bit type.
 418                                          * At some point we should simply pass the string
 419                                          * forward to the parser and label it there.
 420                                          * In the meantime, place a leading "x" on the string
 421                                          * to mark it for the input routine as a hex string.
 422                                          */
 423                                         SET_YYLLOC();
 424                                         BEGIN(xh);
 425                                         startlit();
 426                                         addlitchar('x');
 427                                 }
 428 <xh>{quotestop} |
 429 <xh>{quotefail} {
 430                                         yyless(1);
 431                                         BEGIN(INITIAL);
 432                                         yylval.str = litbufdup();
 433                                         return XCONST;
 434                                 }
 435 <xh><<EOF>>             { yyerror("unterminated hexadecimal string literal"); }
 436
 437 {xnstart}               {
 438                                         /* National character.
 439                                          * We will pass this along as a normal character string,
 440                                          * but preceded with an internally-generated "NCHAR".
 441                                          */
 442                                         const ScanKeyword *keyword;
 443
 444                                         SET_YYLLOC();
 445                                         yyless(1);                              /* eat only 'n' this time */
 446                                         /* nchar had better be a keyword! */
 447                                         keyword = ScanKeywordLookup("nchar");
 448                                         yylval.keyword = keyword->name;
 449                                         return keyword->value;
 450                                 }
 451
 452 {xqstart}               {
 453                                         warn_on_first_escape = true;
 454                                         saw_high_bit = false;
 455                                         SET_YYLLOC();
 456                                         if (standard_conforming_strings)
 457                                                 BEGIN(xq);
 458                                         else
 459                                                 BEGIN(xe);
 460                                         startlit();
 461                                 }
 462 {xestart}               {
 463                                         warn_on_first_escape = false;
 464                                         saw_high_bit = false;
 465                                         SET_YYLLOC();
 466                                         BEGIN(xe);
 467                                         startlit();
 468                                 }
 469 <xq,xe>{quotestop}      |
 470 <xq,xe>{quotefail} {
 471                                         yyless(1);
 472                                         BEGIN(INITIAL);
 473                                         /* check that the data remains valid if it might have been
 474                                          * made invalid by unescaping any chars.
 475                                          */
 476                                         if (saw_high_bit);
 477 /*                                              pg_verifymbstr(literalbuf, literallen, false);*/
 478                                         yylval.str = litbufdup();
 479                                         return SCONST;
 480                                 }
 481 <xq,xe>{xqdouble} {
 482                                         addlitchar('\'');
 483                                 }
 484 <xq>{xqinside}  {
 485                                         addlit(yytext, yyleng);
 486                                 }
 487 <xe>{xeinside}  {
 488                                         addlit(yytext, yyleng);
 489                                 }
 490 <xe>{xeescape}  {
 491                                         if (yytext[1] == '\'')
 492                                         {
 493 #if 0
 494                                                 if (backslash_quote == BACKSLASH_QUOTE_OFF ||
 495                                                         (backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
 496                                                          PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
 497                                                         ereport(ERROR,
 498                                                                         (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
 499                                                                          errmsg("unsafe use of \\' in a string literal"),
 500                                                                          errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
 501                                                                          lexer_errposition()));
 502 #endif
 503                                         }
 504                                         check_string_escape_warning(yytext[1]);
 505                                         addlitchar(unescape_single_char(yytext[1]));
 506                                 }
 507 <xe>{xeoctesc}  {
 508                                         unsigned char c = strtoul(yytext+1, NULL, 8);
 509
 510                                         check_escape_warning();
 511                                         addlitchar(c);
 512                                         if (IS_HIGHBIT_SET(c))
 513                                                 saw_high_bit = true;
 514                                 }
 515 <xe>{xehexesc}  {
 516                                         unsigned char c = strtoul(yytext+2, NULL, 16);
 517
 518                                         check_escape_warning();
 519                                         addlitchar(c);
 520                                         if (IS_HIGHBIT_SET(c))
 521                                                 saw_high_bit = true;
 522                                 }
 523 <xq,xe>{quotecontinue} {
 524                                         /* ignore */
 525                                 }
 526 <xe>.                   {
 527                                         /* This is only needed for \ just before EOF */
 528                                         addlitchar(yytext[0]);
 529                                 }
 530 <xq,xe><<EOF>>          { yyerror("unterminated quoted string"); }
 531
 532 {dolqdelim}             {
 533                                         SET_YYLLOC();
 534                                         dolqstart = pstrdup(yytext);
 535                                         BEGIN(xdolq);
 536                                         startlit();
 537                                 }
 538 {dolqfailed}    {
 539                                         /* throw back all but the initial "$" */
 540                                         yyless(1);
 541                                         /* and treat it as {other} */
 542                                         return yytext[0];
 543                                 }
 544 <xdolq>{dolqdelim} {
 545                                         if (strcmp(yytext, dolqstart) == 0)
 546                                         {
 547                                                 pfree(dolqstart);
 548                                                 BEGIN(INITIAL);
 549                                                 yylval.str = litbufdup();
 550                                                 return SCONST;
 551                                         }
 552                                         else
 553                                         {
 554                                                 /*
 555                                                  * When we fail to match $...$ to dolqstart, transfer
 556                                                  * the $... part to the output, but put back the final
 557                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
 558                                                  */
 559                                                 addlit(yytext, yyleng-1);
 560                                                 yyless(yyleng-1);
 561                                         }
 562                                 }
 563 <xdolq>{dolqinside} {
 564                                         addlit(yytext, yyleng);
 565                                 }
 566 <xdolq>{dolqfailed} {
 567                                         addlit(yytext, yyleng);
 568                                 }
 569 <xdolq>.                {
 570                                         /* This is only needed for $ inside the quoted text */
 571                                         addlitchar(yytext[0]);
 572                                 }
 573 <xdolq><<EOF>>  { yyerror("unterminated dollar-quoted string"); }
 574
 575 {xdstart}               {
 576                                         SET_YYLLOC();
 577                                         BEGIN(xd);
 578                                         startlit();
 579                                 }
 580 <xd>{xdstop}    {
 581                                         char               *ident;
 582
 583                                         BEGIN(INITIAL);
 584                                         if (literallen == 0)
 585                                                 yyerror("zero-length delimited identifier");
 586                                         ident = litbufdup();
 587                                         if (literallen >= NAMEDATALEN)
 588                                                 truncate_identifier(ident, literallen, true);
 589                                         yylval.str = ident;
 590                                         return IDENT;
 591                                 }
 592 <xd>{xddouble}  {
 593                                         addlitchar('"');
 594                                 }
 595 <xd>{xdinside}  {
 596                                         addlit(yytext, yyleng);
 597                                 }
 598 <xd><<EOF>>             { yyerror("unterminated quoted identifier"); }
 599
 600 {typecast}              {
 601                                         SET_YYLLOC();
 602                                         return TYPECAST;
 603                                 }
 604
 605 {self}                  {
 606                                         SET_YYLLOC();
 607                                         return yytext[0];
 608                                 }
 609
 610 {operator}              {
 611                                         /*
 612                                          * Check for embedded slash-star or dash-dash; those
 613                                          * are comment starts, so operator must stop there.
 614                                          * Note that slash-star or dash-dash at the first
 615                                          * character will match a prior rule, not this one.
 616                                          */
 617                                         int             nchars = yyleng;
 618                                         char   *slashstar = strstr(yytext, "/*");
 619                                         char   *dashdash = strstr(yytext, "--");
 620
 621                                         if (slashstar && dashdash)
 622                                         {
 623                                                 /* if both appear, take the first one */
 624                                                 if (slashstar > dashdash)
 625                                                         slashstar = dashdash;
 626                                         }
 627                                         else if (!slashstar)
 628                                                 slashstar = dashdash;
 629                                         if (slashstar)
 630                                                 nchars = slashstar - yytext;
 631
 632                                         /*
 633                                          * For SQL compatibility, '+' and '-' cannot be the
 634                                          * last char of a multi-char operator unless the operator
 635                                          * contains chars that are not in SQL operators.
 636                                          * The idea is to lex '=-' as two operators, but not
 637                                          * to forbid operator names like '?-' that could not be
 638                                          * sequences of SQL operators.
 639                                          */
 640                                         while (nchars > 1 &&
 641                                                    (yytext[nchars-1] == '+' ||
 642                                                         yytext[nchars-1] == '-'))
 643                                         {
 644                                                 int             ic;
 645
 646                                                 for (ic = nchars-2; ic >= 0; ic--)
 647                                                 {
 648                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
 649                                                                 break;
 650                                                 }
 651                                                 if (ic >= 0)
 652                                                         break; /* found a char that makes it OK */
 653                                                 nchars--; /* else remove the +/-, and check again */
 654                                         }
 655
 656                                         SET_YYLLOC();
 657
 658                                         if (nchars < yyleng)
 659                                         {
 660                                                 /* Strip the unwanted chars from the token */
 661                                                 yyless(nchars);
 662                                                 /*
 663                                                  * If what we have left is only one char, and it's
 664                                                  * one of the characters matching "self", then
 665                                                  * return it as a character token the same way
 666                                                  * that the "self" rule would have.
 667                                                  */
 668                                                 if (nchars == 1 &&
 669                                                         strchr(",()[].;:+-*/%^<>=", yytext[0]))
 670                                                         return yytext[0];
 671                                         }
 672
 673                                         /*
 674                                          * Complain if operator is too long.  Unlike the case
 675                                          * for identifiers, we make this an error not a notice-
 676                                          * and-truncate, because the odds are we are looking at
 677                                          * a syntactic mistake anyway.
 678                                          */
 679                                         if (nchars >= NAMEDATALEN)
 680                                                 yyerror("operator too long");
 681
 682                                         /* Convert "!=" operator to "<>" for compatibility */
 683                                         if (strcmp(yytext, "!=") == 0)
 684                                                 yylval.str = pstrdup("<>");
 685                                         else
 686                                                 yylval.str = pstrdup(yytext);
 687                                         return Op;
 688                                 }
 689
 690 {param}                 {
 691                                         SET_YYLLOC();
 692                                         yylval.ival = atol(yytext + 1);
 693                                         return PARAM;
 694                                 }
 695
 696 {integer}               {
 697                                         long val;
 698                                         char* endptr;
 699
 700                                         SET_YYLLOC();
 701                                         errno = 0;
 702                                         val = strtol(yytext, &endptr, 10);
 703                                         if (*endptr != '\0' || errno == ERANGE
 704 #ifdef HAVE_LONG_INT_64
 705                                                 /* if long > 32 bits, check for overflow of int4 */
 706                                                 || val != (long) ((int32) val)
 707 #endif
 708                                                 )
 709                                         {
 710                                                 /* integer too large, treat it as a float */
 711                                                 yylval.str = pstrdup(yytext);
 712                                                 return FCONST;
 713                                         }
 714                                         yylval.ival = val;
 715                                         return ICONST;
 716                                 }
 717 {decimal}               {
 718                                         SET_YYLLOC();
 719                                         yylval.str = pstrdup(yytext);
 720                                         return FCONST;
 721                                 }
 722 {real}                  {
 723                                         SET_YYLLOC();
 724                                         yylval.str = pstrdup(yytext);
 725                                         return FCONST;
 726                                 }
 727 {realfail1}             {
 728                                         /*
 729                                          * throw back the [Ee], and treat as {decimal}.  Note
 730                                          * that it is possible the input is actually {integer},
 731                                          * but since this case will almost certainly lead to a
 732                                          * syntax error anyway, we don't bother to distinguish.
 733                                          */
 734                                         yyless(yyleng-1);
 735                                         SET_YYLLOC();
 736                                         yylval.str = pstrdup(yytext);
 737                                         return FCONST;
 738                                 }
 739 {realfail2}             {
 740                                         /* throw back the [Ee][+-], and proceed as above */
 741                                         yyless(yyleng-2);
 742                                         SET_YYLLOC();
 743                                         yylval.str = pstrdup(yytext);
 744                                         return FCONST;
 745                                 }
 746
 747
 748 {identifier}    {
 749                                         const ScanKeyword *keyword;
 750                                         char               *ident;
 751
 752                                         SET_YYLLOC();
 753
 754                                         /* Is it a keyword? */
 755                                         keyword = ScanKeywordLookup(yytext);
 756                                         if (keyword != NULL)
 757                                         {
 758                                                 yylval.keyword = keyword->name;
 759                                                 return keyword->value;
 760                                         }
 761
 762                                         /*
 763                                          * No.  Convert the identifier to lower case, and truncate
 764                                          * if necessary.
 765                                          */
 766                                         ident = downcase_truncate_identifier(yytext, yyleng, true);
 767                                         yylval.str = ident;
 768                                         return IDENT;
 769                                 }
 770
 771 {other}                 {
 772                                         SET_YYLLOC();
 773                                         return yytext[0];
 774                                 }
 775
 776 <<EOF>>                 {
 777                                         SET_YYLLOC();
 778                                         yyterminate();
 779                                 }
 780
 781 %%
 782
 783 /*
 784  * lexer_errposition
 785  *              Report a lexical-analysis-time cursor position, if possible.
 786  *
 787  * This is expected to be used within an ereport() call.  The return value
 788  * is a dummy (always 0, in fact).
 789  *
 790  * Note that this can only be used for messages from the lexer itself,
 791  * since it depends on scanbuf to still be valid.
 792  */
 793 #if 0
 794 static int
 795 lexer_errposition(void)
 796 {
 797         int             pos;
 798
 799         /* Convert byte offset to character number */
 800         pos = pg_mbstrlen_with_len(scanbuf, yylloc) + 1;
 801         /* And pass it to the ereport mechanism */
 802         return errposition(pos);
 803 }
 804 #endif
 805
 806 /*
 807  * yyerror
 808  *              Report a lexer or grammar error.
 809  *
 810  * The message's cursor position identifies the most recently lexed token.
 811  * This is OK for syntax error messages from the Bison parser, because Bison
 812  * parsers report error as soon as the first unparsable token is reached.
 813  * Beware of using yyerror for other purposes, as the cursor position might
 814  * be misleading!
 815  */
 816 void
 817 yyerror(const char *message)
 818 {
 819         longjmp(jmpbuffer, 1);
 820 }
 821
 822
 823 /*
 824  * Called before any actual parsing is done
 825  */
 826 void
 827 scanner_init(const char *str)
 828 {
 829         int     slen = strlen(str);
 830
 831         /*
 832          * Might be left over after ereport()
 833          */
 834         if (YY_CURRENT_BUFFER)
 835                 yy_delete_buffer(YY_CURRENT_BUFFER);
 836
 837         /*
 838          * Make a scan buffer with special termination needed by flex.
 839          */
 840         scanbuf = palloc(slen + 2);
 841         memcpy(scanbuf, str, slen);
 842         scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
 843         scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
 844
 845         /* initialize literal buffer to a reasonable but expansible size */
 846         literalalloc = 1024;
 847         literalbuf = (char *) palloc(literalalloc);
 848         startlit();
 849
 850         BEGIN(INITIAL);
 851 }
 852
 853
 854 /*
 855  * Called after parsing is done to clean up after scanner_init()
 856  */
 857 void
 858 scanner_finish(void)
 859 {
 860         yy_delete_buffer(scanbufhandle);
 861         pfree(scanbuf);
 862 }
 863
 864
 865 static void
 866 addlit(char *ytext, int yleng)
 867 {
 868         /* enlarge buffer if needed */
 869         if ((literallen+yleng) >= literalalloc)
 870         {
 871                 do {
 872                         literalalloc *= 2;
 873                 } while ((literallen+yleng) >= literalalloc);
 874                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
 875         }
 876         /* append new data, add trailing null */
 877         memcpy(literalbuf+literallen, ytext, yleng);
 878         literallen += yleng;
 879         literalbuf[literallen] = '\0';
 880 }
 881
 882
 883 static void
 884 addlitchar(unsigned char ychar)
 885 {
 886         /* enlarge buffer if needed */
 887         if ((literallen+1) >= literalalloc)
 888         {
 889                 literalalloc *= 2;
 890                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
 891         }
 892         /* append new data, add trailing null */
 893         literalbuf[literallen] = ychar;
 894         literallen += 1;
 895         literalbuf[literallen] = '\0';
 896 }
 897
 898
 899 /*
 900  * One might be tempted to write pstrdup(literalbuf) instead of this,
 901  * but for long literals this is much faster because the length is
 902  * already known.
 903  */
 904 static char *
 905 litbufdup(void)
 906 {
 907         char *new;
 908
 909         new = palloc(literallen + 1);
 910         memcpy(new, literalbuf, literallen+1);
 911         return new;
 912 }
 913
 914
 915 static unsigned char
 916 unescape_single_char(unsigned char c)
 917 {
 918         /* Normally we wouldn't expect to see \n where n has its high bit set
 919          * but we set the flag to check the string if we do get it, so
 920          * that this doesn't become a way of getting around the coding validity
 921          * checks.
 922          */
 923         if (IS_HIGHBIT_SET(c))
 924                 saw_high_bit = true;
 925
 926         switch (c)
 927         {
 928                 case 'b':
 929                         return '\b';
 930                 case 'f':
 931                         return '\f';
 932                 case 'n':
 933                         return '\n';
 934                 case 'r':
 935                         return '\r';
 936                 case 't':
 937                         return '\t';
 938                 default:
 939                         return c;
 940         }
 941 }
 942
 943 static void
 944 check_string_escape_warning(unsigned char ychar)
 945 {
 946 #if 0
 947         if (ychar == '\'')
 948         {
 949                 if (warn_on_first_escape && escape_string_warning)
 950                         ereport(WARNING,
 951                                         (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
 952                                          errmsg("nonstandard use of \\' in a string literal"),
 953                                          errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
 954                                          lexer_errposition()));
 955                 warn_on_first_escape = false;   /* warn only once per string */
 956         }
 957         else if (ychar == '\\')
 958         {
 959                 if (warn_on_first_escape && escape_string_warning)
 960                         ereport(WARNING,
 961                                         (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
 962                                          errmsg("nonstandard use of \\\\ in a string literal"),
 963                                          errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
 964                                          lexer_errposition()));
 965                 warn_on_first_escape = false;   /* warn only once per string */
 966         }
 967         else
 968                 check_escape_warning();
 969 #endif
 970 }
 971
 972 static void
 973 check_escape_warning(void)
 974 {
 975 #if 0
 976         if (warn_on_first_escape && escape_string_warning)
 977                 ereport(WARNING,
 978                                 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
 979                                  errmsg("nonstandard use of escape in a string literal"),
 980                                  errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
 981                                  lexer_errposition()));
 982         warn_on_first_escape = false;   /* warn only once per string */
 983 #endif
 984 }
 985
 986 /*
 987  * downcase_truncate_identifier() --- do appropriate downcasing and
 988  * truncation of an unquoted identifier.  Optionally warn of truncation.
 989  *
 990  * Returns a palloc'd string containing the adjusted identifier.
 991  *
 992  * Note: in some usages the passed string is not null-terminated.
 993  *
 994  * Note: the API of this function is designed to allow for downcasing
 995  * transformations that increase the string length, but we don't yet
 996  * support that.  If you want to implement it, you'll need to fix
 997  * SplitIdentifierString() in utils/adt/varlena.c.
 998  */
 999 char *
1000 downcase_truncate_identifier(const char *ident, int len, int warn)
1001 {
1002         char       *result;
1003         int                     i;
1004
1005         result = palloc(len + 1);
1006
1007         /*
1008          * SQL99 specifies Unicode-aware case normalization, which we don't yet
1009          * have the infrastructure for.  Instead we use tolower() to provide a
1010          * locale-aware translation.  However, there are some locales where this
1011          * is not right either (eg, Turkish may do strange things with 'i' and
1012          * 'I').  Our current compromise is to use tolower() for characters with
1013          * the high bit set, and use an ASCII-only downcasing for 7-bit
1014          * characters.
1015          */
1016         for (i = 0; i < len; i++)
1017         {
1018                 unsigned char ch = (unsigned char) ident[i];
1019
1020                 if (ch >= 'A' && ch <= 'Z')
1021                         ch += 'a' - 'A';
1022                 else if (ch >= 0x80 && isupper(ch))
1023                         ch = tolower(ch);
1024                 result[i] = (char) ch;
1025         }
1026         result[i] = '\0';
1027
1028         if (i >= NAMEDATALEN)
1029                 truncate_identifier(result, i, warn);
1030
1031         return result;
1032 }
1033
1034 /*
1035  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
1036  *
1037  * The given string is modified in-place, if necessary.  A warning is
1038  * issued if requested.
1039  *
1040  * We require the caller to pass in the string length since this saves a
1041  * strlen() call in some common usages.
1042  */
1043 void
1044 truncate_identifier(char *ident, int len, int warn)
1045 {
1046         if (len >= NAMEDATALEN)
1047         {
1048                 len = strlen(ident); /*pg_mbcliplen(ident, len, NAMEDATALEN - 1);*/
1049 #if 0
1050                 if (warn)
1051                         ereport(NOTICE,
1052                                         (errcode(ERRCODE_NAME_TOO_LONG),
1053                                          errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
1054                                                         ident, len, ident)));
1055 #endif
1056                 ident[len] = '\0';
1057         }
1058 }