sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.build_cast( 763 strict=self.STRICT_CAST, this=this, to=to 764 ), 765 TokenType.ARROW: lambda self, this, path: self.expression( 766 exp.JSONExtract, 767 this=this, 768 expression=self.dialect.to_json_path(path), 769 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 770 ), 771 TokenType.DARROW: lambda self, this, path: self.expression( 772 exp.JSONExtractScalar, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtract, 779 this=this, 780 expression=path, 781 ), 782 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtractScalar, 784 this=this, 785 expression=path, 786 ), 787 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 788 exp.JSONBContains, 789 this=this, 790 expression=key, 791 ), 792 } 793 794 CAST_COLUMN_OPERATORS = { 795 TokenType.DOTCOLON, 796 TokenType.DCOLON, 797 } 798 799 EXPRESSION_PARSERS = { 800 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 801 exp.Column: lambda self: self._parse_column(), 802 exp.Condition: lambda self: self._parse_assignment(), 803 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 804 exp.Expression: lambda self: self._parse_expression(), 805 exp.From: lambda self: self._parse_from(joins=True), 806 exp.Group: lambda self: self._parse_group(), 807 exp.Having: lambda self: self._parse_having(), 808 exp.Hint: lambda self: self._parse_hint_body(), 809 exp.Identifier: lambda self: self._parse_id_var(), 810 exp.Join: lambda self: self._parse_join(), 811 exp.Lambda: lambda self: self._parse_lambda(), 812 exp.Lateral: lambda self: self._parse_lateral(), 813 exp.Limit: lambda self: self._parse_limit(), 814 exp.Offset: lambda self: self._parse_offset(), 815 exp.Order: lambda self: self._parse_order(), 816 exp.Ordered: lambda self: self._parse_ordered(), 817 exp.Properties: lambda self: self._parse_properties(), 818 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 819 exp.Qualify: lambda self: self._parse_qualify(), 820 exp.Returning: lambda self: self._parse_returning(), 821 exp.Select: lambda self: self._parse_select(), 822 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 823 exp.Table: lambda self: self._parse_table_parts(), 824 exp.TableAlias: lambda self: self._parse_table_alias(), 825 exp.Tuple: lambda self: self._parse_value(values=False), 826 exp.Whens: lambda self: self._parse_when_matched(), 827 exp.Where: lambda self: self._parse_where(), 828 exp.Window: lambda self: self._parse_named_window(), 829 exp.With: lambda self: self._parse_with(), 830 "JOIN_TYPE": lambda self: self._parse_join_parts(), 831 } 832 833 STATEMENT_PARSERS = { 834 TokenType.ALTER: lambda self: self._parse_alter(), 835 TokenType.ANALYZE: lambda self: self._parse_analyze(), 836 TokenType.BEGIN: lambda self: self._parse_transaction(), 837 TokenType.CACHE: lambda self: self._parse_cache(), 838 TokenType.COMMENT: lambda self: self._parse_comment(), 839 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 840 TokenType.COPY: lambda self: self._parse_copy(), 841 TokenType.CREATE: lambda self: self._parse_create(), 842 TokenType.DELETE: lambda self: self._parse_delete(), 843 TokenType.DESC: lambda self: self._parse_describe(), 844 TokenType.DESCRIBE: lambda self: self._parse_describe(), 845 TokenType.DROP: lambda self: self._parse_drop(), 846 TokenType.GRANT: lambda self: self._parse_grant(), 847 TokenType.REVOKE: lambda self: self._parse_revoke(), 848 TokenType.INSERT: lambda self: self._parse_insert(), 849 TokenType.KILL: lambda self: self._parse_kill(), 850 TokenType.LOAD: lambda self: self._parse_load(), 851 TokenType.MERGE: lambda self: self._parse_merge(), 852 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 853 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 854 TokenType.REFRESH: lambda self: self._parse_refresh(), 855 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 856 TokenType.SET: lambda self: self._parse_set(), 857 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 858 TokenType.UNCACHE: lambda self: self._parse_uncache(), 859 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 860 TokenType.UPDATE: lambda self: self._parse_update(), 861 TokenType.USE: lambda self: self._parse_use(), 862 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 863 } 864 865 UNARY_PARSERS = { 866 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 867 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 868 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 869 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 870 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 871 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 872 } 873 874 STRING_PARSERS = { 875 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 876 exp.RawString, this=token.text 877 ), 878 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 879 exp.National, this=token.text 880 ), 881 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 882 TokenType.STRING: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=True 884 ), 885 TokenType.UNICODE_STRING: lambda self, token: self.expression( 886 exp.UnicodeString, 887 this=token.text, 888 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 889 ), 890 } 891 892 NUMERIC_PARSERS = { 893 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 894 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 895 TokenType.HEX_STRING: lambda self, token: self.expression( 896 exp.HexString, 897 this=token.text, 898 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 899 ), 900 TokenType.NUMBER: lambda self, token: self.expression( 901 exp.Literal, this=token.text, is_string=False 902 ), 903 } 904 905 PRIMARY_PARSERS = { 906 **STRING_PARSERS, 907 **NUMERIC_PARSERS, 908 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 909 TokenType.NULL: lambda self, _: self.expression(exp.Null), 910 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 911 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 912 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 913 TokenType.STAR: lambda self, _: self._parse_star_ops(), 914 } 915 916 PLACEHOLDER_PARSERS = { 917 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 918 TokenType.PARAMETER: lambda self: self._parse_parameter(), 919 TokenType.COLON: lambda self: ( 920 self.expression(exp.Placeholder, this=self._prev.text) 921 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 922 else None 923 ), 924 } 925 926 RANGE_PARSERS = { 927 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 928 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 929 TokenType.GLOB: binary_range_parser(exp.Glob), 930 TokenType.ILIKE: binary_range_parser(exp.ILike), 931 TokenType.IN: lambda self, this: self._parse_in(this), 932 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 933 TokenType.IS: lambda self, this: self._parse_is(this), 934 TokenType.LIKE: binary_range_parser(exp.Like), 935 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 936 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 937 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 938 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 939 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 940 } 941 942 PIPE_SYNTAX_TRANSFORM_PARSERS = { 943 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 944 "AS": lambda self, query: self._build_pipe_cte( 945 query, [exp.Star()], self._parse_table_alias() 946 ), 947 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 948 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 949 "ORDER BY": lambda self, query: query.order_by( 950 self._parse_order(), append=False, copy=False 951 ), 952 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 953 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 954 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 955 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 956 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 957 } 958 959 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 960 "ALLOWED_VALUES": lambda self: self.expression( 961 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 962 ), 963 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 964 "AUTO": lambda self: self._parse_auto_property(), 965 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 966 "BACKUP": lambda self: self.expression( 967 exp.BackupProperty, this=self._parse_var(any_token=True) 968 ), 969 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 970 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHECKSUM": lambda self: self._parse_checksum(), 973 "CLUSTER BY": lambda self: self._parse_cluster(), 974 "CLUSTERED": lambda self: self._parse_clustered_by(), 975 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 976 exp.CollateProperty, **kwargs 977 ), 978 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 979 "CONTAINS": lambda self: self._parse_contains_property(), 980 "COPY": lambda self: self._parse_copy_property(), 981 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 982 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 983 "DEFINER": lambda self: self._parse_definer(), 984 "DETERMINISTIC": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 986 ), 987 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 988 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 989 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 990 "DISTKEY": lambda self: self._parse_distkey(), 991 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 992 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 993 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 994 "ENVIRONMENT": lambda self: self.expression( 995 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 996 ), 997 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 998 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 999 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1000 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1001 "FREESPACE": lambda self: self._parse_freespace(), 1002 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1003 "HEAP": lambda self: self.expression(exp.HeapProperty), 1004 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1005 "IMMUTABLE": lambda self: self.expression( 1006 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1007 ), 1008 "INHERITS": lambda self: self.expression( 1009 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1010 ), 1011 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1012 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1013 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1014 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1015 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1016 "LIKE": lambda self: self._parse_create_like(), 1017 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1018 "LOCK": lambda self: self._parse_locking(), 1019 "LOCKING": lambda self: self._parse_locking(), 1020 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1021 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1022 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1023 "MODIFIES": lambda self: self._parse_modifies_property(), 1024 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1025 "NO": lambda self: self._parse_no_property(), 1026 "ON": lambda self: self._parse_on_property(), 1027 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1028 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1029 "PARTITION": lambda self: self._parse_partitioned_of(), 1030 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1033 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1034 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1035 "READS": lambda self: self._parse_reads_property(), 1036 "REMOTE": lambda self: self._parse_remote_with_connection(), 1037 "RETURNS": lambda self: self._parse_returns(), 1038 "STRICT": lambda self: self.expression(exp.StrictProperty), 1039 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1040 "ROW": lambda self: self._parse_row(), 1041 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1042 "SAMPLE": lambda self: self.expression( 1043 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1044 ), 1045 "SECURE": lambda self: self.expression(exp.SecureProperty), 1046 "SECURITY": lambda self: self._parse_security(), 1047 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1048 "SETTINGS": lambda self: self._parse_settings_property(), 1049 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1050 "SORTKEY": lambda self: self._parse_sortkey(), 1051 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1052 "STABLE": lambda self: self.expression( 1053 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1054 ), 1055 "STORED": lambda self: self._parse_stored(), 1056 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1057 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1058 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1059 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1060 "TO": lambda self: self._parse_to_table(), 1061 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1062 "TRANSFORM": lambda self: self.expression( 1063 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1064 ), 1065 "TTL": lambda self: self._parse_ttl(), 1066 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1067 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1068 "VOLATILE": lambda self: self._parse_volatile_property(), 1069 "WITH": lambda self: self._parse_with_property(), 1070 } 1071 1072 CONSTRAINT_PARSERS = { 1073 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1074 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1075 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1076 "CHARACTER SET": lambda self: self.expression( 1077 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "CHECK": lambda self: self.expression( 1080 exp.CheckColumnConstraint, 1081 this=self._parse_wrapped(self._parse_assignment), 1082 enforced=self._match_text_seq("ENFORCED"), 1083 ), 1084 "COLLATE": lambda self: self.expression( 1085 exp.CollateColumnConstraint, 1086 this=self._parse_identifier() or self._parse_column(), 1087 ), 1088 "COMMENT": lambda self: self.expression( 1089 exp.CommentColumnConstraint, this=self._parse_string() 1090 ), 1091 "COMPRESS": lambda self: self._parse_compress(), 1092 "CLUSTERED": lambda self: self.expression( 1093 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1094 ), 1095 "NONCLUSTERED": lambda self: self.expression( 1096 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1097 ), 1098 "DEFAULT": lambda self: self.expression( 1099 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1102 "EPHEMERAL": lambda self: self.expression( 1103 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "EXCLUDE": lambda self: self.expression( 1106 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1107 ), 1108 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1109 "FORMAT": lambda self: self.expression( 1110 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1111 ), 1112 "GENERATED": lambda self: self._parse_generated_as_identity(), 1113 "IDENTITY": lambda self: self._parse_auto_increment(), 1114 "INLINE": lambda self: self._parse_inline(), 1115 "LIKE": lambda self: self._parse_create_like(), 1116 "NOT": lambda self: self._parse_not_constraint(), 1117 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1118 "ON": lambda self: ( 1119 self._match(TokenType.UPDATE) 1120 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1121 ) 1122 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1123 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1124 "PERIOD": lambda self: self._parse_period_for_system_time(), 1125 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1126 "REFERENCES": lambda self: self._parse_references(match=False), 1127 "TITLE": lambda self: self.expression( 1128 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1129 ), 1130 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1131 "UNIQUE": lambda self: self._parse_unique(), 1132 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1133 "WATERMARK": lambda self: self.expression( 1134 exp.WatermarkColumnConstraint, 1135 this=self._match(TokenType.FOR) and self._parse_column(), 1136 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1137 ), 1138 "WITH": lambda self: self.expression( 1139 exp.Properties, expressions=self._parse_wrapped_properties() 1140 ), 1141 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 } 1144 1145 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1146 if not self._match(TokenType.L_PAREN, advance=False): 1147 # Partitioning by bucket or truncate follows the syntax: 1148 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1149 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1150 self._retreat(self._index - 1) 1151 return None 1152 1153 klass = ( 1154 exp.PartitionedByBucket 1155 if self._prev.text.upper() == "BUCKET" 1156 else exp.PartitionByTruncate 1157 ) 1158 1159 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1160 this, expression = seq_get(args, 0), seq_get(args, 1) 1161 1162 if isinstance(this, exp.Literal): 1163 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1164 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1165 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1166 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1167 # 1168 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1169 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1170 this, expression = expression, this 1171 1172 return self.expression(klass, this=this, expression=expression) 1173 1174 ALTER_PARSERS = { 1175 "ADD": lambda self: self._parse_alter_table_add(), 1176 "AS": lambda self: self._parse_select(), 1177 "ALTER": lambda self: self._parse_alter_table_alter(), 1178 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1179 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1180 "DROP": lambda self: self._parse_alter_table_drop(), 1181 "RENAME": lambda self: self._parse_alter_table_rename(), 1182 "SET": lambda self: self._parse_alter_table_set(), 1183 "SWAP": lambda self: self.expression( 1184 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1185 ), 1186 } 1187 1188 ALTER_ALTER_PARSERS = { 1189 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1190 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1191 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1192 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1193 } 1194 1195 SCHEMA_UNNAMED_CONSTRAINTS = { 1196 "CHECK", 1197 "EXCLUDE", 1198 "FOREIGN KEY", 1199 "LIKE", 1200 "PERIOD", 1201 "PRIMARY KEY", 1202 "UNIQUE", 1203 "WATERMARK", 1204 "BUCKET", 1205 "TRUNCATE", 1206 } 1207 1208 NO_PAREN_FUNCTION_PARSERS = { 1209 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1210 "CASE": lambda self: self._parse_case(), 1211 "CONNECT_BY_ROOT": lambda self: self.expression( 1212 exp.ConnectByRoot, this=self._parse_column() 1213 ), 1214 "IF": lambda self: self._parse_if(), 1215 } 1216 1217 INVALID_FUNC_NAME_TOKENS = { 1218 TokenType.IDENTIFIER, 1219 TokenType.STRING, 1220 } 1221 1222 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1223 1224 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1225 1226 FUNCTION_PARSERS = { 1227 **{ 1228 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1229 }, 1230 **{ 1231 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1232 }, 1233 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1234 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1235 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1236 "DECODE": lambda self: self._parse_decode(), 1237 "EXTRACT": lambda self: self._parse_extract(), 1238 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1239 "GAP_FILL": lambda self: self._parse_gap_fill(), 1240 "JSON_OBJECT": lambda self: self._parse_json_object(), 1241 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1242 "JSON_TABLE": lambda self: self._parse_json_table(), 1243 "MATCH": lambda self: self._parse_match_against(), 1244 "NORMALIZE": lambda self: self._parse_normalize(), 1245 "OPENJSON": lambda self: self._parse_open_json(), 1246 "OVERLAY": lambda self: self._parse_overlay(), 1247 "POSITION": lambda self: self._parse_position(), 1248 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1249 "STRING_AGG": lambda self: self._parse_string_agg(), 1250 "SUBSTRING": lambda self: self._parse_substring(), 1251 "TRIM": lambda self: self._parse_trim(), 1252 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1253 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1254 "XMLELEMENT": lambda self: self.expression( 1255 exp.XMLElement, 1256 this=self._match_text_seq("NAME") and self._parse_id_var(), 1257 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1258 ), 1259 "XMLTABLE": lambda self: self._parse_xml_table(), 1260 } 1261 1262 QUERY_MODIFIER_PARSERS = { 1263 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1264 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1265 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1266 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1267 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1268 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1269 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1270 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1271 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1272 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1273 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1274 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1275 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1276 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1277 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.CLUSTER_BY: lambda self: ( 1279 "cluster", 1280 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1281 ), 1282 TokenType.DISTRIBUTE_BY: lambda self: ( 1283 "distribute", 1284 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1285 ), 1286 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1287 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1288 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1289 } 1290 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1291 1292 SET_PARSERS = { 1293 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1294 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1295 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1296 "TRANSACTION": lambda self: self._parse_set_transaction(), 1297 } 1298 1299 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1300 1301 TYPE_LITERAL_PARSERS = { 1302 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1303 } 1304 1305 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1306 1307 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1308 1309 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1310 1311 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1312 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1313 "ISOLATION": ( 1314 ("LEVEL", "REPEATABLE", "READ"), 1315 ("LEVEL", "READ", "COMMITTED"), 1316 ("LEVEL", "READ", "UNCOMITTED"), 1317 ("LEVEL", "SERIALIZABLE"), 1318 ), 1319 "READ": ("WRITE", "ONLY"), 1320 } 1321 1322 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1323 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1324 ) 1325 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1326 1327 CREATE_SEQUENCE: OPTIONS_TYPE = { 1328 "SCALE": ("EXTEND", "NOEXTEND"), 1329 "SHARD": ("EXTEND", "NOEXTEND"), 1330 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1331 **dict.fromkeys( 1332 ( 1333 "SESSION", 1334 "GLOBAL", 1335 "KEEP", 1336 "NOKEEP", 1337 "ORDER", 1338 "NOORDER", 1339 "NOCACHE", 1340 "CYCLE", 1341 "NOCYCLE", 1342 "NOMINVALUE", 1343 "NOMAXVALUE", 1344 "NOSCALE", 1345 "NOSHARD", 1346 ), 1347 tuple(), 1348 ), 1349 } 1350 1351 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1352 1353 USABLES: OPTIONS_TYPE = dict.fromkeys( 1354 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1355 ) 1356 1357 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1358 1359 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1360 "TYPE": ("EVOLUTION",), 1361 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1362 } 1363 1364 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1365 1366 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1367 1368 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1369 "NOT": ("ENFORCED",), 1370 "MATCH": ( 1371 "FULL", 1372 "PARTIAL", 1373 "SIMPLE", 1374 ), 1375 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1376 "USING": ( 1377 "BTREE", 1378 "HASH", 1379 ), 1380 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1381 } 1382 1383 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1384 "NO": ("OTHERS",), 1385 "CURRENT": ("ROW",), 1386 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1387 } 1388 1389 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1390 1391 CLONE_KEYWORDS = {"CLONE", "COPY"} 1392 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1393 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1394 1395 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1396 1397 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1398 1399 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1400 1401 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1402 1403 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1404 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1405 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1406 1407 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1408 1409 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1410 1411 ADD_CONSTRAINT_TOKENS = { 1412 TokenType.CONSTRAINT, 1413 TokenType.FOREIGN_KEY, 1414 TokenType.INDEX, 1415 TokenType.KEY, 1416 TokenType.PRIMARY_KEY, 1417 TokenType.UNIQUE, 1418 } 1419 1420 DISTINCT_TOKENS = {TokenType.DISTINCT} 1421 1422 NULL_TOKENS = {TokenType.NULL} 1423 1424 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1425 1426 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1427 1428 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1429 1430 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1431 1432 ODBC_DATETIME_LITERALS = { 1433 "d": exp.Date, 1434 "t": exp.Time, 1435 "ts": exp.Timestamp, 1436 } 1437 1438 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1439 1440 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1441 1442 # The style options for the DESCRIBE statement 1443 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1444 1445 # The style options for the ANALYZE statement 1446 ANALYZE_STYLES = { 1447 "BUFFER_USAGE_LIMIT", 1448 "FULL", 1449 "LOCAL", 1450 "NO_WRITE_TO_BINLOG", 1451 "SAMPLE", 1452 "SKIP_LOCKED", 1453 "VERBOSE", 1454 } 1455 1456 ANALYZE_EXPRESSION_PARSERS = { 1457 "ALL": lambda self: self._parse_analyze_columns(), 1458 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1459 "DELETE": lambda self: self._parse_analyze_delete(), 1460 "DROP": lambda self: self._parse_analyze_histogram(), 1461 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1462 "LIST": lambda self: self._parse_analyze_list(), 1463 "PREDICATE": lambda self: self._parse_analyze_columns(), 1464 "UPDATE": lambda self: self._parse_analyze_histogram(), 1465 "VALIDATE": lambda self: self._parse_analyze_validate(), 1466 } 1467 1468 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1469 1470 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1471 1472 OPERATION_MODIFIERS: t.Set[str] = set() 1473 1474 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1475 1476 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1477 1478 STRICT_CAST = True 1479 1480 PREFIXED_PIVOT_COLUMNS = False 1481 IDENTIFY_PIVOT_STRINGS = False 1482 1483 LOG_DEFAULTS_TO_LN = False 1484 1485 # Whether the table sample clause expects CSV syntax 1486 TABLESAMPLE_CSV = False 1487 1488 # The default method used for table sampling 1489 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1490 1491 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1492 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1493 1494 # Whether the TRIM function expects the characters to trim as its first argument 1495 TRIM_PATTERN_FIRST = False 1496 1497 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1498 STRING_ALIASES = False 1499 1500 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1501 MODIFIERS_ATTACHED_TO_SET_OP = True 1502 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1503 1504 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1505 NO_PAREN_IF_COMMANDS = True 1506 1507 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1508 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1509 1510 # Whether the `:` operator is used to extract a value from a VARIANT column 1511 COLON_IS_VARIANT_EXTRACT = False 1512 1513 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1514 # If this is True and '(' is not found, the keyword will be treated as an identifier 1515 VALUES_FOLLOWED_BY_PAREN = True 1516 1517 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1518 SUPPORTS_IMPLICIT_UNNEST = False 1519 1520 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1521 INTERVAL_SPANS = True 1522 1523 # Whether a PARTITION clause can follow a table reference 1524 SUPPORTS_PARTITION_SELECTION = False 1525 1526 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1527 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1528 1529 # Whether the 'AS' keyword is optional in the CTE definition syntax 1530 OPTIONAL_ALIAS_TOKEN_CTE = True 1531 1532 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1533 ALTER_RENAME_REQUIRES_COLUMN = True 1534 1535 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1536 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1537 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1538 # as BigQuery, where all joins have the same precedence. 1539 JOINS_HAVE_EQUAL_PRECEDENCE = False 1540 1541 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1542 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1543 1544 # Whether map literals support arbitrary expressions as keys. 1545 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1546 # When False, keys are typically restricted to identifiers. 1547 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1548 1549 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1550 # is true for Snowflake but not for BigQuery which can also process strings 1551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1552 1553 __slots__ = ( 1554 "error_level", 1555 "error_message_context", 1556 "max_errors", 1557 "dialect", 1558 "sql", 1559 "errors", 1560 "_tokens", 1561 "_index", 1562 "_curr", 1563 "_next", 1564 "_prev", 1565 "_prev_comments", 1566 "_pipe_cte_counter", 1567 ) 1568 1569 # Autofilled 1570 SHOW_TRIE: t.Dict = {} 1571 SET_TRIE: t.Dict = {} 1572 1573 def __init__( 1574 self, 1575 error_level: t.Optional[ErrorLevel] = None, 1576 error_message_context: int = 100, 1577 max_errors: int = 3, 1578 dialect: DialectType = None, 1579 ): 1580 from sqlglot.dialects import Dialect 1581 1582 self.error_level = error_level or ErrorLevel.IMMEDIATE 1583 self.error_message_context = error_message_context 1584 self.max_errors = max_errors 1585 self.dialect = Dialect.get_or_raise(dialect) 1586 self.reset() 1587 1588 def reset(self): 1589 self.sql = "" 1590 self.errors = [] 1591 self._tokens = [] 1592 self._index = 0 1593 self._curr = None 1594 self._next = None 1595 self._prev = None 1596 self._prev_comments = None 1597 self._pipe_cte_counter = 0 1598 1599 def parse( 1600 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens and returns a list of syntax trees, one tree 1604 per parsed SQL statement. 1605 1606 Args: 1607 raw_tokens: The list of tokens. 1608 sql: The original SQL string, used to produce helpful debug messages. 1609 1610 Returns: 1611 The list of the produced syntax trees. 1612 """ 1613 return self._parse( 1614 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1615 ) 1616 1617 def parse_into( 1618 self, 1619 expression_types: exp.IntoType, 1620 raw_tokens: t.List[Token], 1621 sql: t.Optional[str] = None, 1622 ) -> t.List[t.Optional[exp.Expression]]: 1623 """ 1624 Parses a list of tokens into a given Expression type. If a collection of Expression 1625 types is given instead, this method will try to parse the token list into each one 1626 of them, stopping at the first for which the parsing succeeds. 1627 1628 Args: 1629 expression_types: The expression type(s) to try and parse the token list into. 1630 raw_tokens: The list of tokens. 1631 sql: The original SQL string, used to produce helpful debug messages. 1632 1633 Returns: 1634 The target Expression. 1635 """ 1636 errors = [] 1637 for expression_type in ensure_list(expression_types): 1638 parser = self.EXPRESSION_PARSERS.get(expression_type) 1639 if not parser: 1640 raise TypeError(f"No parser registered for {expression_type}") 1641 1642 try: 1643 return self._parse(parser, raw_tokens, sql) 1644 except ParseError as e: 1645 e.errors[0]["into_expression"] = expression_type 1646 errors.append(e) 1647 1648 raise ParseError( 1649 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1650 errors=merge_errors(errors), 1651 ) from errors[-1] 1652 1653 def _parse( 1654 self, 1655 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1656 raw_tokens: t.List[Token], 1657 sql: t.Optional[str] = None, 1658 ) -> t.List[t.Optional[exp.Expression]]: 1659 self.reset() 1660 self.sql = sql or "" 1661 1662 total = len(raw_tokens) 1663 chunks: t.List[t.List[Token]] = [[]] 1664 1665 for i, token in enumerate(raw_tokens): 1666 if token.token_type == TokenType.SEMICOLON: 1667 if token.comments: 1668 chunks.append([token]) 1669 1670 if i < total - 1: 1671 chunks.append([]) 1672 else: 1673 chunks[-1].append(token) 1674 1675 expressions = [] 1676 1677 for tokens in chunks: 1678 self._index = -1 1679 self._tokens = tokens 1680 self._advance() 1681 1682 expressions.append(parse_method(self)) 1683 1684 if self._index < len(self._tokens): 1685 self.raise_error("Invalid expression / Unexpected token") 1686 1687 self.check_errors() 1688 1689 return expressions 1690 1691 def check_errors(self) -> None: 1692 """Logs or raises any found errors, depending on the chosen error level setting.""" 1693 if self.error_level == ErrorLevel.WARN: 1694 for error in self.errors: 1695 logger.error(str(error)) 1696 elif self.error_level == ErrorLevel.RAISE and self.errors: 1697 raise ParseError( 1698 concat_messages(self.errors, self.max_errors), 1699 errors=merge_errors(self.errors), 1700 ) 1701 1702 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1703 """ 1704 Appends an error in the list of recorded errors or raises it, depending on the chosen 1705 error level setting. 1706 """ 1707 token = token or self._curr or self._prev or Token.string("") 1708 start = token.start 1709 end = token.end + 1 1710 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1711 highlight = self.sql[start:end] 1712 end_context = self.sql[end : end + self.error_message_context] 1713 1714 error = ParseError.new( 1715 f"{message}. Line {token.line}, Col: {token.col}.\n" 1716 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1717 description=message, 1718 line=token.line, 1719 col=token.col, 1720 start_context=start_context, 1721 highlight=highlight, 1722 end_context=end_context, 1723 ) 1724 1725 if self.error_level == ErrorLevel.IMMEDIATE: 1726 raise error 1727 1728 self.errors.append(error) 1729 1730 def expression( 1731 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1732 ) -> E: 1733 """ 1734 Creates a new, validated Expression. 1735 1736 Args: 1737 exp_class: The expression class to instantiate. 1738 comments: An optional list of comments to attach to the expression. 1739 kwargs: The arguments to set for the expression along with their respective values. 1740 1741 Returns: 1742 The target expression. 1743 """ 1744 instance = exp_class(**kwargs) 1745 instance.add_comments(comments) if comments else self._add_comments(instance) 1746 return self.validate_expression(instance) 1747 1748 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1749 if expression and self._prev_comments: 1750 expression.add_comments(self._prev_comments) 1751 self._prev_comments = None 1752 1753 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1754 """ 1755 Validates an Expression, making sure that all its mandatory arguments are set. 1756 1757 Args: 1758 expression: The expression to validate. 1759 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1760 1761 Returns: 1762 The validated expression. 1763 """ 1764 if self.error_level != ErrorLevel.IGNORE: 1765 for error_message in expression.error_messages(args): 1766 self.raise_error(error_message) 1767 1768 return expression 1769 1770 def _find_sql(self, start: Token, end: Token) -> str: 1771 return self.sql[start.start : end.end + 1] 1772 1773 def _is_connected(self) -> bool: 1774 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1775 1776 def _advance(self, times: int = 1) -> None: 1777 self._index += times 1778 self._curr = seq_get(self._tokens, self._index) 1779 self._next = seq_get(self._tokens, self._index + 1) 1780 1781 if self._index > 0: 1782 self._prev = self._tokens[self._index - 1] 1783 self._prev_comments = self._prev.comments 1784 else: 1785 self._prev = None 1786 self._prev_comments = None 1787 1788 def _retreat(self, index: int) -> None: 1789 if index != self._index: 1790 self._advance(index - self._index) 1791 1792 def _warn_unsupported(self) -> None: 1793 if len(self._tokens) <= 1: 1794 return 1795 1796 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1797 # interested in emitting a warning for the one being currently processed. 1798 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1799 1800 logger.warning( 1801 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1802 ) 1803 1804 def _parse_command(self) -> exp.Command: 1805 self._warn_unsupported() 1806 return self.expression( 1807 exp.Command, 1808 comments=self._prev_comments, 1809 this=self._prev.text.upper(), 1810 expression=self._parse_string(), 1811 ) 1812 1813 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1814 """ 1815 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1816 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1817 solve this by setting & resetting the parser state accordingly 1818 """ 1819 index = self._index 1820 error_level = self.error_level 1821 1822 self.error_level = ErrorLevel.IMMEDIATE 1823 try: 1824 this = parse_method() 1825 except ParseError: 1826 this = None 1827 finally: 1828 if not this or retreat: 1829 self._retreat(index) 1830 self.error_level = error_level 1831 1832 return this 1833 1834 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1835 start = self._prev 1836 exists = self._parse_exists() if allow_exists else None 1837 1838 self._match(TokenType.ON) 1839 1840 materialized = self._match_text_seq("MATERIALIZED") 1841 kind = self._match_set(self.CREATABLES) and self._prev 1842 if not kind: 1843 return self._parse_as_command(start) 1844 1845 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1846 this = self._parse_user_defined_function(kind=kind.token_type) 1847 elif kind.token_type == TokenType.TABLE: 1848 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1849 elif kind.token_type == TokenType.COLUMN: 1850 this = self._parse_column() 1851 else: 1852 this = self._parse_id_var() 1853 1854 self._match(TokenType.IS) 1855 1856 return self.expression( 1857 exp.Comment, 1858 this=this, 1859 kind=kind.text, 1860 expression=self._parse_string(), 1861 exists=exists, 1862 materialized=materialized, 1863 ) 1864 1865 def _parse_to_table( 1866 self, 1867 ) -> exp.ToTableProperty: 1868 table = self._parse_table_parts(schema=True) 1869 return self.expression(exp.ToTableProperty, this=table) 1870 1871 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1872 def _parse_ttl(self) -> exp.Expression: 1873 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1874 this = self._parse_bitwise() 1875 1876 if self._match_text_seq("DELETE"): 1877 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1878 if self._match_text_seq("RECOMPRESS"): 1879 return self.expression( 1880 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1881 ) 1882 if self._match_text_seq("TO", "DISK"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1885 ) 1886 if self._match_text_seq("TO", "VOLUME"): 1887 return self.expression( 1888 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1889 ) 1890 1891 return this 1892 1893 expressions = self._parse_csv(_parse_ttl_action) 1894 where = self._parse_where() 1895 group = self._parse_group() 1896 1897 aggregates = None 1898 if group and self._match(TokenType.SET): 1899 aggregates = self._parse_csv(self._parse_set_item) 1900 1901 return self.expression( 1902 exp.MergeTreeTTL, 1903 expressions=expressions, 1904 where=where, 1905 group=group, 1906 aggregates=aggregates, 1907 ) 1908 1909 def _parse_statement(self) -> t.Optional[exp.Expression]: 1910 if self._curr is None: 1911 return None 1912 1913 if self._match_set(self.STATEMENT_PARSERS): 1914 comments = self._prev_comments 1915 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1916 stmt.add_comments(comments, prepend=True) 1917 return stmt 1918 1919 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1920 return self._parse_command() 1921 1922 expression = self._parse_expression() 1923 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1924 return self._parse_query_modifiers(expression) 1925 1926 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1927 start = self._prev 1928 temporary = self._match(TokenType.TEMPORARY) 1929 materialized = self._match_text_seq("MATERIALIZED") 1930 1931 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1932 if not kind: 1933 return self._parse_as_command(start) 1934 1935 concurrently = self._match_text_seq("CONCURRENTLY") 1936 if_exists = exists or self._parse_exists() 1937 1938 if kind == "COLUMN": 1939 this = self._parse_column() 1940 else: 1941 this = self._parse_table_parts( 1942 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1943 ) 1944 1945 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1946 1947 if self._match(TokenType.L_PAREN, advance=False): 1948 expressions = self._parse_wrapped_csv(self._parse_types) 1949 else: 1950 expressions = None 1951 1952 return self.expression( 1953 exp.Drop, 1954 exists=if_exists, 1955 this=this, 1956 expressions=expressions, 1957 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1958 temporary=temporary, 1959 materialized=materialized, 1960 cascade=self._match_text_seq("CASCADE"), 1961 constraints=self._match_text_seq("CONSTRAINTS"), 1962 purge=self._match_text_seq("PURGE"), 1963 cluster=cluster, 1964 concurrently=concurrently, 1965 ) 1966 1967 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1968 return ( 1969 self._match_text_seq("IF") 1970 and (not not_ or self._match(TokenType.NOT)) 1971 and self._match(TokenType.EXISTS) 1972 ) 1973 1974 def _parse_create(self) -> exp.Create | exp.Command: 1975 # Note: this can't be None because we've matched a statement parser 1976 start = self._prev 1977 1978 replace = ( 1979 start.token_type == TokenType.REPLACE 1980 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1981 or self._match_pair(TokenType.OR, TokenType.ALTER) 1982 ) 1983 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1984 1985 unique = self._match(TokenType.UNIQUE) 1986 1987 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1988 clustered = True 1989 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1990 "COLUMNSTORE" 1991 ): 1992 clustered = False 1993 else: 1994 clustered = None 1995 1996 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1997 self._advance() 1998 1999 properties = None 2000 create_token = self._match_set(self.CREATABLES) and self._prev 2001 2002 if not create_token: 2003 # exp.Properties.Location.POST_CREATE 2004 properties = self._parse_properties() 2005 create_token = self._match_set(self.CREATABLES) and self._prev 2006 2007 if not properties or not create_token: 2008 return self._parse_as_command(start) 2009 2010 concurrently = self._match_text_seq("CONCURRENTLY") 2011 exists = self._parse_exists(not_=True) 2012 this = None 2013 expression: t.Optional[exp.Expression] = None 2014 indexes = None 2015 no_schema_binding = None 2016 begin = None 2017 end = None 2018 clone = None 2019 2020 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2021 nonlocal properties 2022 if properties and temp_props: 2023 properties.expressions.extend(temp_props.expressions) 2024 elif temp_props: 2025 properties = temp_props 2026 2027 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2028 this = self._parse_user_defined_function(kind=create_token.token_type) 2029 2030 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2031 extend_props(self._parse_properties()) 2032 2033 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2034 extend_props(self._parse_properties()) 2035 2036 if not expression: 2037 if self._match(TokenType.COMMAND): 2038 expression = self._parse_as_command(self._prev) 2039 else: 2040 begin = self._match(TokenType.BEGIN) 2041 return_ = self._match_text_seq("RETURN") 2042 2043 if self._match(TokenType.STRING, advance=False): 2044 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2045 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2046 expression = self._parse_string() 2047 extend_props(self._parse_properties()) 2048 else: 2049 expression = self._parse_user_defined_function_expression() 2050 2051 end = self._match_text_seq("END") 2052 2053 if return_: 2054 expression = self.expression(exp.Return, this=expression) 2055 elif create_token.token_type == TokenType.INDEX: 2056 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2057 if not self._match(TokenType.ON): 2058 index = self._parse_id_var() 2059 anonymous = False 2060 else: 2061 index = None 2062 anonymous = True 2063 2064 this = self._parse_index(index=index, anonymous=anonymous) 2065 elif create_token.token_type in self.DB_CREATABLES: 2066 table_parts = self._parse_table_parts( 2067 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2068 ) 2069 2070 # exp.Properties.Location.POST_NAME 2071 self._match(TokenType.COMMA) 2072 extend_props(self._parse_properties(before=True)) 2073 2074 this = self._parse_schema(this=table_parts) 2075 2076 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2077 extend_props(self._parse_properties()) 2078 2079 has_alias = self._match(TokenType.ALIAS) 2080 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2081 # exp.Properties.Location.POST_ALIAS 2082 extend_props(self._parse_properties()) 2083 2084 if create_token.token_type == TokenType.SEQUENCE: 2085 expression = self._parse_types() 2086 props = self._parse_properties() 2087 if props: 2088 sequence_props = exp.SequenceProperties() 2089 options = [] 2090 for prop in props: 2091 if isinstance(prop, exp.SequenceProperties): 2092 for arg, value in prop.args.items(): 2093 if arg == "options": 2094 options.extend(value) 2095 else: 2096 sequence_props.set(arg, value) 2097 prop.pop() 2098 2099 if options: 2100 sequence_props.set("options", options) 2101 2102 props.append("expressions", sequence_props) 2103 extend_props(props) 2104 else: 2105 expression = self._parse_ddl_select() 2106 2107 # Some dialects also support using a table as an alias instead of a SELECT. 2108 # Here we fallback to this as an alternative. 2109 if not expression and has_alias: 2110 expression = self._try_parse(self._parse_table_parts) 2111 2112 if create_token.token_type == TokenType.TABLE: 2113 # exp.Properties.Location.POST_EXPRESSION 2114 extend_props(self._parse_properties()) 2115 2116 indexes = [] 2117 while True: 2118 index = self._parse_index() 2119 2120 # exp.Properties.Location.POST_INDEX 2121 extend_props(self._parse_properties()) 2122 if not index: 2123 break 2124 else: 2125 self._match(TokenType.COMMA) 2126 indexes.append(index) 2127 elif create_token.token_type == TokenType.VIEW: 2128 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2129 no_schema_binding = True 2130 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2131 extend_props(self._parse_properties()) 2132 2133 shallow = self._match_text_seq("SHALLOW") 2134 2135 if self._match_texts(self.CLONE_KEYWORDS): 2136 copy = self._prev.text.lower() == "copy" 2137 clone = self.expression( 2138 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2139 ) 2140 2141 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2142 return self._parse_as_command(start) 2143 2144 create_kind_text = create_token.text.upper() 2145 return self.expression( 2146 exp.Create, 2147 this=this, 2148 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2149 replace=replace, 2150 refresh=refresh, 2151 unique=unique, 2152 expression=expression, 2153 exists=exists, 2154 properties=properties, 2155 indexes=indexes, 2156 no_schema_binding=no_schema_binding, 2157 begin=begin, 2158 end=end, 2159 clone=clone, 2160 concurrently=concurrently, 2161 clustered=clustered, 2162 ) 2163 2164 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2165 seq = exp.SequenceProperties() 2166 2167 options = [] 2168 index = self._index 2169 2170 while self._curr: 2171 self._match(TokenType.COMMA) 2172 if self._match_text_seq("INCREMENT"): 2173 self._match_text_seq("BY") 2174 self._match_text_seq("=") 2175 seq.set("increment", self._parse_term()) 2176 elif self._match_text_seq("MINVALUE"): 2177 seq.set("minvalue", self._parse_term()) 2178 elif self._match_text_seq("MAXVALUE"): 2179 seq.set("maxvalue", self._parse_term()) 2180 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2181 self._match_text_seq("=") 2182 seq.set("start", self._parse_term()) 2183 elif self._match_text_seq("CACHE"): 2184 # T-SQL allows empty CACHE which is initialized dynamically 2185 seq.set("cache", self._parse_number() or True) 2186 elif self._match_text_seq("OWNED", "BY"): 2187 # "OWNED BY NONE" is the default 2188 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2189 else: 2190 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2191 if opt: 2192 options.append(opt) 2193 else: 2194 break 2195 2196 seq.set("options", options if options else None) 2197 return None if self._index == index else seq 2198 2199 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2200 # only used for teradata currently 2201 self._match(TokenType.COMMA) 2202 2203 kwargs = { 2204 "no": self._match_text_seq("NO"), 2205 "dual": self._match_text_seq("DUAL"), 2206 "before": self._match_text_seq("BEFORE"), 2207 "default": self._match_text_seq("DEFAULT"), 2208 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2209 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2210 "after": self._match_text_seq("AFTER"), 2211 "minimum": self._match_texts(("MIN", "MINIMUM")), 2212 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2213 } 2214 2215 if self._match_texts(self.PROPERTY_PARSERS): 2216 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2217 try: 2218 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2219 except TypeError: 2220 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2221 2222 return None 2223 2224 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2225 return self._parse_wrapped_csv(self._parse_property) 2226 2227 def _parse_property(self) -> t.Optional[exp.Expression]: 2228 if self._match_texts(self.PROPERTY_PARSERS): 2229 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2230 2231 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2232 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2233 2234 if self._match_text_seq("COMPOUND", "SORTKEY"): 2235 return self._parse_sortkey(compound=True) 2236 2237 if self._match_text_seq("SQL", "SECURITY"): 2238 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2239 2240 index = self._index 2241 2242 seq_props = self._parse_sequence_properties() 2243 if seq_props: 2244 return seq_props 2245 2246 self._retreat(index) 2247 key = self._parse_column() 2248 2249 if not self._match(TokenType.EQ): 2250 self._retreat(index) 2251 return None 2252 2253 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2254 if isinstance(key, exp.Column): 2255 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2256 2257 value = self._parse_bitwise() or self._parse_var(any_token=True) 2258 2259 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2260 if isinstance(value, exp.Column): 2261 value = exp.var(value.name) 2262 2263 return self.expression(exp.Property, this=key, value=value) 2264 2265 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2266 if self._match_text_seq("BY"): 2267 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2268 2269 self._match(TokenType.ALIAS) 2270 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2271 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2272 2273 return self.expression( 2274 exp.FileFormatProperty, 2275 this=( 2276 self.expression( 2277 exp.InputOutputFormat, 2278 input_format=input_format, 2279 output_format=output_format, 2280 ) 2281 if input_format or output_format 2282 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2283 ), 2284 hive_format=True, 2285 ) 2286 2287 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2288 field = self._parse_field() 2289 if isinstance(field, exp.Identifier) and not field.quoted: 2290 field = exp.var(field) 2291 2292 return field 2293 2294 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2295 self._match(TokenType.EQ) 2296 self._match(TokenType.ALIAS) 2297 2298 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2299 2300 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2301 properties = [] 2302 while True: 2303 if before: 2304 prop = self._parse_property_before() 2305 else: 2306 prop = self._parse_property() 2307 if not prop: 2308 break 2309 for p in ensure_list(prop): 2310 properties.append(p) 2311 2312 if properties: 2313 return self.expression(exp.Properties, expressions=properties) 2314 2315 return None 2316 2317 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2318 return self.expression( 2319 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2320 ) 2321 2322 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2323 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2324 security_specifier = self._prev.text.upper() 2325 return self.expression(exp.SecurityProperty, this=security_specifier) 2326 return None 2327 2328 def _parse_settings_property(self) -> exp.SettingsProperty: 2329 return self.expression( 2330 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2331 ) 2332 2333 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2334 if self._index >= 2: 2335 pre_volatile_token = self._tokens[self._index - 2] 2336 else: 2337 pre_volatile_token = None 2338 2339 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2340 return exp.VolatileProperty() 2341 2342 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2343 2344 def _parse_retention_period(self) -> exp.Var: 2345 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2346 number = self._parse_number() 2347 number_str = f"{number} " if number else "" 2348 unit = self._parse_var(any_token=True) 2349 return exp.var(f"{number_str}{unit}") 2350 2351 def _parse_system_versioning_property( 2352 self, with_: bool = False 2353 ) -> exp.WithSystemVersioningProperty: 2354 self._match(TokenType.EQ) 2355 prop = self.expression( 2356 exp.WithSystemVersioningProperty, 2357 **{ # type: ignore 2358 "on": True, 2359 "with": with_, 2360 }, 2361 ) 2362 2363 if self._match_text_seq("OFF"): 2364 prop.set("on", False) 2365 return prop 2366 2367 self._match(TokenType.ON) 2368 if self._match(TokenType.L_PAREN): 2369 while self._curr and not self._match(TokenType.R_PAREN): 2370 if self._match_text_seq("HISTORY_TABLE", "="): 2371 prop.set("this", self._parse_table_parts()) 2372 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2373 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2374 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2375 prop.set("retention_period", self._parse_retention_period()) 2376 2377 self._match(TokenType.COMMA) 2378 2379 return prop 2380 2381 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2382 self._match(TokenType.EQ) 2383 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2384 prop = self.expression(exp.DataDeletionProperty, on=on) 2385 2386 if self._match(TokenType.L_PAREN): 2387 while self._curr and not self._match(TokenType.R_PAREN): 2388 if self._match_text_seq("FILTER_COLUMN", "="): 2389 prop.set("filter_column", self._parse_column()) 2390 elif self._match_text_seq("RETENTION_PERIOD", "="): 2391 prop.set("retention_period", self._parse_retention_period()) 2392 2393 self._match(TokenType.COMMA) 2394 2395 return prop 2396 2397 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2398 kind = "HASH" 2399 expressions: t.Optional[t.List[exp.Expression]] = None 2400 if self._match_text_seq("BY", "HASH"): 2401 expressions = self._parse_wrapped_csv(self._parse_id_var) 2402 elif self._match_text_seq("BY", "RANDOM"): 2403 kind = "RANDOM" 2404 2405 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2406 buckets: t.Optional[exp.Expression] = None 2407 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2408 buckets = self._parse_number() 2409 2410 return self.expression( 2411 exp.DistributedByProperty, 2412 expressions=expressions, 2413 kind=kind, 2414 buckets=buckets, 2415 order=self._parse_order(), 2416 ) 2417 2418 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2419 self._match_text_seq("KEY") 2420 expressions = self._parse_wrapped_id_vars() 2421 return self.expression(expr_type, expressions=expressions) 2422 2423 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2424 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2425 prop = self._parse_system_versioning_property(with_=True) 2426 self._match_r_paren() 2427 return prop 2428 2429 if self._match(TokenType.L_PAREN, advance=False): 2430 return self._parse_wrapped_properties() 2431 2432 if self._match_text_seq("JOURNAL"): 2433 return self._parse_withjournaltable() 2434 2435 if self._match_texts(self.VIEW_ATTRIBUTES): 2436 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2437 2438 if self._match_text_seq("DATA"): 2439 return self._parse_withdata(no=False) 2440 elif self._match_text_seq("NO", "DATA"): 2441 return self._parse_withdata(no=True) 2442 2443 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2444 return self._parse_serde_properties(with_=True) 2445 2446 if self._match(TokenType.SCHEMA): 2447 return self.expression( 2448 exp.WithSchemaBindingProperty, 2449 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2450 ) 2451 2452 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2453 return self.expression( 2454 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2455 ) 2456 2457 if not self._next: 2458 return None 2459 2460 return self._parse_withisolatedloading() 2461 2462 def _parse_procedure_option(self) -> exp.Expression | None: 2463 if self._match_text_seq("EXECUTE", "AS"): 2464 return self.expression( 2465 exp.ExecuteAsProperty, 2466 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2467 or self._parse_string(), 2468 ) 2469 2470 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2471 2472 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2473 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2474 self._match(TokenType.EQ) 2475 2476 user = self._parse_id_var() 2477 self._match(TokenType.PARAMETER) 2478 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2479 2480 if not user or not host: 2481 return None 2482 2483 return exp.DefinerProperty(this=f"{user}@{host}") 2484 2485 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2486 self._match(TokenType.TABLE) 2487 self._match(TokenType.EQ) 2488 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2489 2490 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2491 return self.expression(exp.LogProperty, no=no) 2492 2493 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2494 return self.expression(exp.JournalProperty, **kwargs) 2495 2496 def _parse_checksum(self) -> exp.ChecksumProperty: 2497 self._match(TokenType.EQ) 2498 2499 on = None 2500 if self._match(TokenType.ON): 2501 on = True 2502 elif self._match_text_seq("OFF"): 2503 on = False 2504 2505 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2506 2507 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2508 return self.expression( 2509 exp.Cluster, 2510 expressions=( 2511 self._parse_wrapped_csv(self._parse_ordered) 2512 if wrapped 2513 else self._parse_csv(self._parse_ordered) 2514 ), 2515 ) 2516 2517 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2518 self._match_text_seq("BY") 2519 2520 self._match_l_paren() 2521 expressions = self._parse_csv(self._parse_column) 2522 self._match_r_paren() 2523 2524 if self._match_text_seq("SORTED", "BY"): 2525 self._match_l_paren() 2526 sorted_by = self._parse_csv(self._parse_ordered) 2527 self._match_r_paren() 2528 else: 2529 sorted_by = None 2530 2531 self._match(TokenType.INTO) 2532 buckets = self._parse_number() 2533 self._match_text_seq("BUCKETS") 2534 2535 return self.expression( 2536 exp.ClusteredByProperty, 2537 expressions=expressions, 2538 sorted_by=sorted_by, 2539 buckets=buckets, 2540 ) 2541 2542 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2543 if not self._match_text_seq("GRANTS"): 2544 self._retreat(self._index - 1) 2545 return None 2546 2547 return self.expression(exp.CopyGrantsProperty) 2548 2549 def _parse_freespace(self) -> exp.FreespaceProperty: 2550 self._match(TokenType.EQ) 2551 return self.expression( 2552 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2553 ) 2554 2555 def _parse_mergeblockratio( 2556 self, no: bool = False, default: bool = False 2557 ) -> exp.MergeBlockRatioProperty: 2558 if self._match(TokenType.EQ): 2559 return self.expression( 2560 exp.MergeBlockRatioProperty, 2561 this=self._parse_number(), 2562 percent=self._match(TokenType.PERCENT), 2563 ) 2564 2565 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2566 2567 def _parse_datablocksize( 2568 self, 2569 default: t.Optional[bool] = None, 2570 minimum: t.Optional[bool] = None, 2571 maximum: t.Optional[bool] = None, 2572 ) -> exp.DataBlocksizeProperty: 2573 self._match(TokenType.EQ) 2574 size = self._parse_number() 2575 2576 units = None 2577 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2578 units = self._prev.text 2579 2580 return self.expression( 2581 exp.DataBlocksizeProperty, 2582 size=size, 2583 units=units, 2584 default=default, 2585 minimum=minimum, 2586 maximum=maximum, 2587 ) 2588 2589 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2590 self._match(TokenType.EQ) 2591 always = self._match_text_seq("ALWAYS") 2592 manual = self._match_text_seq("MANUAL") 2593 never = self._match_text_seq("NEVER") 2594 default = self._match_text_seq("DEFAULT") 2595 2596 autotemp = None 2597 if self._match_text_seq("AUTOTEMP"): 2598 autotemp = self._parse_schema() 2599 2600 return self.expression( 2601 exp.BlockCompressionProperty, 2602 always=always, 2603 manual=manual, 2604 never=never, 2605 default=default, 2606 autotemp=autotemp, 2607 ) 2608 2609 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2610 index = self._index 2611 no = self._match_text_seq("NO") 2612 concurrent = self._match_text_seq("CONCURRENT") 2613 2614 if not self._match_text_seq("ISOLATED", "LOADING"): 2615 self._retreat(index) 2616 return None 2617 2618 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2619 return self.expression( 2620 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2621 ) 2622 2623 def _parse_locking(self) -> exp.LockingProperty: 2624 if self._match(TokenType.TABLE): 2625 kind = "TABLE" 2626 elif self._match(TokenType.VIEW): 2627 kind = "VIEW" 2628 elif self._match(TokenType.ROW): 2629 kind = "ROW" 2630 elif self._match_text_seq("DATABASE"): 2631 kind = "DATABASE" 2632 else: 2633 kind = None 2634 2635 if kind in ("DATABASE", "TABLE", "VIEW"): 2636 this = self._parse_table_parts() 2637 else: 2638 this = None 2639 2640 if self._match(TokenType.FOR): 2641 for_or_in = "FOR" 2642 elif self._match(TokenType.IN): 2643 for_or_in = "IN" 2644 else: 2645 for_or_in = None 2646 2647 if self._match_text_seq("ACCESS"): 2648 lock_type = "ACCESS" 2649 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2650 lock_type = "EXCLUSIVE" 2651 elif self._match_text_seq("SHARE"): 2652 lock_type = "SHARE" 2653 elif self._match_text_seq("READ"): 2654 lock_type = "READ" 2655 elif self._match_text_seq("WRITE"): 2656 lock_type = "WRITE" 2657 elif self._match_text_seq("CHECKSUM"): 2658 lock_type = "CHECKSUM" 2659 else: 2660 lock_type = None 2661 2662 override = self._match_text_seq("OVERRIDE") 2663 2664 return self.expression( 2665 exp.LockingProperty, 2666 this=this, 2667 kind=kind, 2668 for_or_in=for_or_in, 2669 lock_type=lock_type, 2670 override=override, 2671 ) 2672 2673 def _parse_partition_by(self) -> t.List[exp.Expression]: 2674 if self._match(TokenType.PARTITION_BY): 2675 return self._parse_csv(self._parse_assignment) 2676 return [] 2677 2678 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2679 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2680 if self._match_text_seq("MINVALUE"): 2681 return exp.var("MINVALUE") 2682 if self._match_text_seq("MAXVALUE"): 2683 return exp.var("MAXVALUE") 2684 return self._parse_bitwise() 2685 2686 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2687 expression = None 2688 from_expressions = None 2689 to_expressions = None 2690 2691 if self._match(TokenType.IN): 2692 this = self._parse_wrapped_csv(self._parse_bitwise) 2693 elif self._match(TokenType.FROM): 2694 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2695 self._match_text_seq("TO") 2696 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2697 elif self._match_text_seq("WITH", "(", "MODULUS"): 2698 this = self._parse_number() 2699 self._match_text_seq(",", "REMAINDER") 2700 expression = self._parse_number() 2701 self._match_r_paren() 2702 else: 2703 self.raise_error("Failed to parse partition bound spec.") 2704 2705 return self.expression( 2706 exp.PartitionBoundSpec, 2707 this=this, 2708 expression=expression, 2709 from_expressions=from_expressions, 2710 to_expressions=to_expressions, 2711 ) 2712 2713 # https://www.postgresql.org/docs/current/sql-createtable.html 2714 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2715 if not self._match_text_seq("OF"): 2716 self._retreat(self._index - 1) 2717 return None 2718 2719 this = self._parse_table(schema=True) 2720 2721 if self._match(TokenType.DEFAULT): 2722 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2723 elif self._match_text_seq("FOR", "VALUES"): 2724 expression = self._parse_partition_bound_spec() 2725 else: 2726 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2727 2728 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2729 2730 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2731 self._match(TokenType.EQ) 2732 return self.expression( 2733 exp.PartitionedByProperty, 2734 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2735 ) 2736 2737 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2738 if self._match_text_seq("AND", "STATISTICS"): 2739 statistics = True 2740 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2741 statistics = False 2742 else: 2743 statistics = None 2744 2745 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2746 2747 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2748 if self._match_text_seq("SQL"): 2749 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2750 return None 2751 2752 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2753 if self._match_text_seq("SQL", "DATA"): 2754 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2755 return None 2756 2757 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2758 if self._match_text_seq("PRIMARY", "INDEX"): 2759 return exp.NoPrimaryIndexProperty() 2760 if self._match_text_seq("SQL"): 2761 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2762 return None 2763 2764 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2765 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2766 return exp.OnCommitProperty() 2767 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2768 return exp.OnCommitProperty(delete=True) 2769 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2770 2771 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2772 if self._match_text_seq("SQL", "DATA"): 2773 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2774 return None 2775 2776 def _parse_distkey(self) -> exp.DistKeyProperty: 2777 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2778 2779 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2780 table = self._parse_table(schema=True) 2781 2782 options = [] 2783 while self._match_texts(("INCLUDING", "EXCLUDING")): 2784 this = self._prev.text.upper() 2785 2786 id_var = self._parse_id_var() 2787 if not id_var: 2788 return None 2789 2790 options.append( 2791 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2792 ) 2793 2794 return self.expression(exp.LikeProperty, this=table, expressions=options) 2795 2796 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2797 return self.expression( 2798 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2799 ) 2800 2801 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2802 self._match(TokenType.EQ) 2803 return self.expression( 2804 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2805 ) 2806 2807 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2808 self._match_text_seq("WITH", "CONNECTION") 2809 return self.expression( 2810 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2811 ) 2812 2813 def _parse_returns(self) -> exp.ReturnsProperty: 2814 value: t.Optional[exp.Expression] 2815 null = None 2816 is_table = self._match(TokenType.TABLE) 2817 2818 if is_table: 2819 if self._match(TokenType.LT): 2820 value = self.expression( 2821 exp.Schema, 2822 this="TABLE", 2823 expressions=self._parse_csv(self._parse_struct_types), 2824 ) 2825 if not self._match(TokenType.GT): 2826 self.raise_error("Expecting >") 2827 else: 2828 value = self._parse_schema(exp.var("TABLE")) 2829 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2830 null = True 2831 value = None 2832 else: 2833 value = self._parse_types() 2834 2835 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2836 2837 def _parse_describe(self) -> exp.Describe: 2838 kind = self._match_set(self.CREATABLES) and self._prev.text 2839 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2840 if self._match(TokenType.DOT): 2841 style = None 2842 self._retreat(self._index - 2) 2843 2844 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2845 2846 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2847 this = self._parse_statement() 2848 else: 2849 this = self._parse_table(schema=True) 2850 2851 properties = self._parse_properties() 2852 expressions = properties.expressions if properties else None 2853 partition = self._parse_partition() 2854 return self.expression( 2855 exp.Describe, 2856 this=this, 2857 style=style, 2858 kind=kind, 2859 expressions=expressions, 2860 partition=partition, 2861 format=format, 2862 ) 2863 2864 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2865 kind = self._prev.text.upper() 2866 expressions = [] 2867 2868 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2869 if self._match(TokenType.WHEN): 2870 expression = self._parse_disjunction() 2871 self._match(TokenType.THEN) 2872 else: 2873 expression = None 2874 2875 else_ = self._match(TokenType.ELSE) 2876 2877 if not self._match(TokenType.INTO): 2878 return None 2879 2880 return self.expression( 2881 exp.ConditionalInsert, 2882 this=self.expression( 2883 exp.Insert, 2884 this=self._parse_table(schema=True), 2885 expression=self._parse_derived_table_values(), 2886 ), 2887 expression=expression, 2888 else_=else_, 2889 ) 2890 2891 expression = parse_conditional_insert() 2892 while expression is not None: 2893 expressions.append(expression) 2894 expression = parse_conditional_insert() 2895 2896 return self.expression( 2897 exp.MultitableInserts, 2898 kind=kind, 2899 comments=comments, 2900 expressions=expressions, 2901 source=self._parse_table(), 2902 ) 2903 2904 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2905 comments = [] 2906 hint = self._parse_hint() 2907 overwrite = self._match(TokenType.OVERWRITE) 2908 ignore = self._match(TokenType.IGNORE) 2909 local = self._match_text_seq("LOCAL") 2910 alternative = None 2911 is_function = None 2912 2913 if self._match_text_seq("DIRECTORY"): 2914 this: t.Optional[exp.Expression] = self.expression( 2915 exp.Directory, 2916 this=self._parse_var_or_string(), 2917 local=local, 2918 row_format=self._parse_row_format(match_row=True), 2919 ) 2920 else: 2921 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2922 comments += ensure_list(self._prev_comments) 2923 return self._parse_multitable_inserts(comments) 2924 2925 if self._match(TokenType.OR): 2926 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2927 2928 self._match(TokenType.INTO) 2929 comments += ensure_list(self._prev_comments) 2930 self._match(TokenType.TABLE) 2931 is_function = self._match(TokenType.FUNCTION) 2932 2933 this = ( 2934 self._parse_table(schema=True, parse_partition=True) 2935 if not is_function 2936 else self._parse_function() 2937 ) 2938 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2939 this.set("alias", self._parse_table_alias()) 2940 2941 returning = self._parse_returning() 2942 2943 return self.expression( 2944 exp.Insert, 2945 comments=comments, 2946 hint=hint, 2947 is_function=is_function, 2948 this=this, 2949 stored=self._match_text_seq("STORED") and self._parse_stored(), 2950 by_name=self._match_text_seq("BY", "NAME"), 2951 exists=self._parse_exists(), 2952 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2953 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2954 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2955 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2956 conflict=self._parse_on_conflict(), 2957 returning=returning or self._parse_returning(), 2958 overwrite=overwrite, 2959 alternative=alternative, 2960 ignore=ignore, 2961 source=self._match(TokenType.TABLE) and self._parse_table(), 2962 ) 2963 2964 def _parse_kill(self) -> exp.Kill: 2965 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2966 2967 return self.expression( 2968 exp.Kill, 2969 this=self._parse_primary(), 2970 kind=kind, 2971 ) 2972 2973 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2974 conflict = self._match_text_seq("ON", "CONFLICT") 2975 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2976 2977 if not conflict and not duplicate: 2978 return None 2979 2980 conflict_keys = None 2981 constraint = None 2982 2983 if conflict: 2984 if self._match_text_seq("ON", "CONSTRAINT"): 2985 constraint = self._parse_id_var() 2986 elif self._match(TokenType.L_PAREN): 2987 conflict_keys = self._parse_csv(self._parse_id_var) 2988 self._match_r_paren() 2989 2990 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2991 if self._prev.token_type == TokenType.UPDATE: 2992 self._match(TokenType.SET) 2993 expressions = self._parse_csv(self._parse_equality) 2994 else: 2995 expressions = None 2996 2997 return self.expression( 2998 exp.OnConflict, 2999 duplicate=duplicate, 3000 expressions=expressions, 3001 action=action, 3002 conflict_keys=conflict_keys, 3003 constraint=constraint, 3004 where=self._parse_where(), 3005 ) 3006 3007 def _parse_returning(self) -> t.Optional[exp.Returning]: 3008 if not self._match(TokenType.RETURNING): 3009 return None 3010 return self.expression( 3011 exp.Returning, 3012 expressions=self._parse_csv(self._parse_expression), 3013 into=self._match(TokenType.INTO) and self._parse_table_part(), 3014 ) 3015 3016 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3017 if not self._match(TokenType.FORMAT): 3018 return None 3019 return self._parse_row_format() 3020 3021 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3022 index = self._index 3023 with_ = with_ or self._match_text_seq("WITH") 3024 3025 if not self._match(TokenType.SERDE_PROPERTIES): 3026 self._retreat(index) 3027 return None 3028 return self.expression( 3029 exp.SerdeProperties, 3030 **{ # type: ignore 3031 "expressions": self._parse_wrapped_properties(), 3032 "with": with_, 3033 }, 3034 ) 3035 3036 def _parse_row_format( 3037 self, match_row: bool = False 3038 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3039 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3040 return None 3041 3042 if self._match_text_seq("SERDE"): 3043 this = self._parse_string() 3044 3045 serde_properties = self._parse_serde_properties() 3046 3047 return self.expression( 3048 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3049 ) 3050 3051 self._match_text_seq("DELIMITED") 3052 3053 kwargs = {} 3054 3055 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3056 kwargs["fields"] = self._parse_string() 3057 if self._match_text_seq("ESCAPED", "BY"): 3058 kwargs["escaped"] = self._parse_string() 3059 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3060 kwargs["collection_items"] = self._parse_string() 3061 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3062 kwargs["map_keys"] = self._parse_string() 3063 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3064 kwargs["lines"] = self._parse_string() 3065 if self._match_text_seq("NULL", "DEFINED", "AS"): 3066 kwargs["null"] = self._parse_string() 3067 3068 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3069 3070 def _parse_load(self) -> exp.LoadData | exp.Command: 3071 if self._match_text_seq("DATA"): 3072 local = self._match_text_seq("LOCAL") 3073 self._match_text_seq("INPATH") 3074 inpath = self._parse_string() 3075 overwrite = self._match(TokenType.OVERWRITE) 3076 self._match_pair(TokenType.INTO, TokenType.TABLE) 3077 3078 return self.expression( 3079 exp.LoadData, 3080 this=self._parse_table(schema=True), 3081 local=local, 3082 overwrite=overwrite, 3083 inpath=inpath, 3084 partition=self._parse_partition(), 3085 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3086 serde=self._match_text_seq("SERDE") and self._parse_string(), 3087 ) 3088 return self._parse_as_command(self._prev) 3089 3090 def _parse_delete(self) -> exp.Delete: 3091 # This handles MySQL's "Multiple-Table Syntax" 3092 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3093 tables = None 3094 if not self._match(TokenType.FROM, advance=False): 3095 tables = self._parse_csv(self._parse_table) or None 3096 3097 returning = self._parse_returning() 3098 3099 return self.expression( 3100 exp.Delete, 3101 tables=tables, 3102 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3103 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3104 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3105 where=self._parse_where(), 3106 returning=returning or self._parse_returning(), 3107 limit=self._parse_limit(), 3108 ) 3109 3110 def _parse_update(self) -> exp.Update: 3111 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3112 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3113 returning = self._parse_returning() 3114 return self.expression( 3115 exp.Update, 3116 **{ # type: ignore 3117 "this": this, 3118 "expressions": expressions, 3119 "from": self._parse_from(joins=True), 3120 "where": self._parse_where(), 3121 "returning": returning or self._parse_returning(), 3122 "order": self._parse_order(), 3123 "limit": self._parse_limit(), 3124 }, 3125 ) 3126 3127 def _parse_use(self) -> exp.Use: 3128 return self.expression( 3129 exp.Use, 3130 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3131 this=self._parse_table(schema=False), 3132 ) 3133 3134 def _parse_uncache(self) -> exp.Uncache: 3135 if not self._match(TokenType.TABLE): 3136 self.raise_error("Expecting TABLE after UNCACHE") 3137 3138 return self.expression( 3139 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3140 ) 3141 3142 def _parse_cache(self) -> exp.Cache: 3143 lazy = self._match_text_seq("LAZY") 3144 self._match(TokenType.TABLE) 3145 table = self._parse_table(schema=True) 3146 3147 options = [] 3148 if self._match_text_seq("OPTIONS"): 3149 self._match_l_paren() 3150 k = self._parse_string() 3151 self._match(TokenType.EQ) 3152 v = self._parse_string() 3153 options = [k, v] 3154 self._match_r_paren() 3155 3156 self._match(TokenType.ALIAS) 3157 return self.expression( 3158 exp.Cache, 3159 this=table, 3160 lazy=lazy, 3161 options=options, 3162 expression=self._parse_select(nested=True), 3163 ) 3164 3165 def _parse_partition(self) -> t.Optional[exp.Partition]: 3166 if not self._match_texts(self.PARTITION_KEYWORDS): 3167 return None 3168 3169 return self.expression( 3170 exp.Partition, 3171 subpartition=self._prev.text.upper() == "SUBPARTITION", 3172 expressions=self._parse_wrapped_csv(self._parse_assignment), 3173 ) 3174 3175 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3176 def _parse_value_expression() -> t.Optional[exp.Expression]: 3177 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3178 return exp.var(self._prev.text.upper()) 3179 return self._parse_expression() 3180 3181 if self._match(TokenType.L_PAREN): 3182 expressions = self._parse_csv(_parse_value_expression) 3183 self._match_r_paren() 3184 return self.expression(exp.Tuple, expressions=expressions) 3185 3186 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3187 expression = self._parse_expression() 3188 if expression: 3189 return self.expression(exp.Tuple, expressions=[expression]) 3190 return None 3191 3192 def _parse_projections(self) -> t.List[exp.Expression]: 3193 return self._parse_expressions() 3194 3195 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3196 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3197 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3198 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3199 ) 3200 elif self._match(TokenType.FROM): 3201 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3202 # Support parentheses for duckdb FROM-first syntax 3203 select = self._parse_select() 3204 if select: 3205 select.set("from", from_) 3206 this = select 3207 else: 3208 this = exp.select("*").from_(t.cast(exp.From, from_)) 3209 else: 3210 this = ( 3211 self._parse_table(consume_pipe=True) 3212 if table 3213 else self._parse_select(nested=True, parse_set_operation=False) 3214 ) 3215 3216 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3217 # in case a modifier (e.g. join) is following 3218 if table and isinstance(this, exp.Values) and this.alias: 3219 alias = this.args["alias"].pop() 3220 this = exp.Table(this=this, alias=alias) 3221 3222 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3223 3224 return this 3225 3226 def _parse_select( 3227 self, 3228 nested: bool = False, 3229 table: bool = False, 3230 parse_subquery_alias: bool = True, 3231 parse_set_operation: bool = True, 3232 consume_pipe: bool = True, 3233 ) -> t.Optional[exp.Expression]: 3234 query = self._parse_select_query( 3235 nested=nested, 3236 table=table, 3237 parse_subquery_alias=parse_subquery_alias, 3238 parse_set_operation=parse_set_operation, 3239 ) 3240 3241 if ( 3242 consume_pipe 3243 and self._match(TokenType.PIPE_GT, advance=False) 3244 and isinstance(query, exp.Query) 3245 ): 3246 query = self._parse_pipe_syntax_query(query) 3247 query = query.subquery(copy=False) if query and table else query 3248 3249 return query 3250 3251 def _parse_select_query( 3252 self, 3253 nested: bool = False, 3254 table: bool = False, 3255 parse_subquery_alias: bool = True, 3256 parse_set_operation: bool = True, 3257 ) -> t.Optional[exp.Expression]: 3258 cte = self._parse_with() 3259 3260 if cte: 3261 this = self._parse_statement() 3262 3263 if not this: 3264 self.raise_error("Failed to parse any statement following CTE") 3265 return cte 3266 3267 if "with" in this.arg_types: 3268 this.set("with", cte) 3269 else: 3270 self.raise_error(f"{this.key} does not support CTE") 3271 this = cte 3272 3273 return this 3274 3275 # duckdb supports leading with FROM x 3276 from_ = ( 3277 self._parse_from(consume_pipe=True) 3278 if self._match(TokenType.FROM, advance=False) 3279 else None 3280 ) 3281 3282 if self._match(TokenType.SELECT): 3283 comments = self._prev_comments 3284 3285 hint = self._parse_hint() 3286 3287 if self._next and not self._next.token_type == TokenType.DOT: 3288 all_ = self._match(TokenType.ALL) 3289 distinct = self._match_set(self.DISTINCT_TOKENS) 3290 else: 3291 all_, distinct = None, None 3292 3293 kind = ( 3294 self._match(TokenType.ALIAS) 3295 and self._match_texts(("STRUCT", "VALUE")) 3296 and self._prev.text.upper() 3297 ) 3298 3299 if distinct: 3300 distinct = self.expression( 3301 exp.Distinct, 3302 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3303 ) 3304 3305 if all_ and distinct: 3306 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3307 3308 operation_modifiers = [] 3309 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3310 operation_modifiers.append(exp.var(self._prev.text.upper())) 3311 3312 limit = self._parse_limit(top=True) 3313 projections = self._parse_projections() 3314 3315 this = self.expression( 3316 exp.Select, 3317 kind=kind, 3318 hint=hint, 3319 distinct=distinct, 3320 expressions=projections, 3321 limit=limit, 3322 operation_modifiers=operation_modifiers or None, 3323 ) 3324 this.comments = comments 3325 3326 into = self._parse_into() 3327 if into: 3328 this.set("into", into) 3329 3330 if not from_: 3331 from_ = self._parse_from() 3332 3333 if from_: 3334 this.set("from", from_) 3335 3336 this = self._parse_query_modifiers(this) 3337 elif (table or nested) and self._match(TokenType.L_PAREN): 3338 this = self._parse_wrapped_select(table=table) 3339 3340 # We return early here so that the UNION isn't attached to the subquery by the 3341 # following call to _parse_set_operations, but instead becomes the parent node 3342 self._match_r_paren() 3343 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3344 elif self._match(TokenType.VALUES, advance=False): 3345 this = self._parse_derived_table_values() 3346 elif from_: 3347 this = exp.select("*").from_(from_.this, copy=False) 3348 elif self._match(TokenType.SUMMARIZE): 3349 table = self._match(TokenType.TABLE) 3350 this = self._parse_select() or self._parse_string() or self._parse_table() 3351 return self.expression(exp.Summarize, this=this, table=table) 3352 elif self._match(TokenType.DESCRIBE): 3353 this = self._parse_describe() 3354 elif self._match_text_seq("STREAM"): 3355 this = self._parse_function() 3356 if this: 3357 this = self.expression(exp.Stream, this=this) 3358 else: 3359 self._retreat(self._index - 1) 3360 else: 3361 this = None 3362 3363 return self._parse_set_operations(this) if parse_set_operation else this 3364 3365 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3366 self._match_text_seq("SEARCH") 3367 3368 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3369 3370 if not kind: 3371 return None 3372 3373 self._match_text_seq("FIRST", "BY") 3374 3375 return self.expression( 3376 exp.RecursiveWithSearch, 3377 kind=kind, 3378 this=self._parse_id_var(), 3379 expression=self._match_text_seq("SET") and self._parse_id_var(), 3380 using=self._match_text_seq("USING") and self._parse_id_var(), 3381 ) 3382 3383 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3384 if not skip_with_token and not self._match(TokenType.WITH): 3385 return None 3386 3387 comments = self._prev_comments 3388 recursive = self._match(TokenType.RECURSIVE) 3389 3390 last_comments = None 3391 expressions = [] 3392 while True: 3393 cte = self._parse_cte() 3394 if isinstance(cte, exp.CTE): 3395 expressions.append(cte) 3396 if last_comments: 3397 cte.add_comments(last_comments) 3398 3399 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3400 break 3401 else: 3402 self._match(TokenType.WITH) 3403 3404 last_comments = self._prev_comments 3405 3406 return self.expression( 3407 exp.With, 3408 comments=comments, 3409 expressions=expressions, 3410 recursive=recursive, 3411 search=self._parse_recursive_with_search(), 3412 ) 3413 3414 def _parse_cte(self) -> t.Optional[exp.CTE]: 3415 index = self._index 3416 3417 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3418 if not alias or not alias.this: 3419 self.raise_error("Expected CTE to have alias") 3420 3421 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3422 self._retreat(index) 3423 return None 3424 3425 comments = self._prev_comments 3426 3427 if self._match_text_seq("NOT", "MATERIALIZED"): 3428 materialized = False 3429 elif self._match_text_seq("MATERIALIZED"): 3430 materialized = True 3431 else: 3432 materialized = None 3433 3434 cte = self.expression( 3435 exp.CTE, 3436 this=self._parse_wrapped(self._parse_statement), 3437 alias=alias, 3438 materialized=materialized, 3439 comments=comments, 3440 ) 3441 3442 values = cte.this 3443 if isinstance(values, exp.Values): 3444 if values.alias: 3445 cte.set("this", exp.select("*").from_(values)) 3446 else: 3447 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3448 3449 return cte 3450 3451 def _parse_table_alias( 3452 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3453 ) -> t.Optional[exp.TableAlias]: 3454 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3455 # so this section tries to parse the clause version and if it fails, it treats the token 3456 # as an identifier (alias) 3457 if self._can_parse_limit_or_offset(): 3458 return None 3459 3460 any_token = self._match(TokenType.ALIAS) 3461 alias = ( 3462 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3463 or self._parse_string_as_identifier() 3464 ) 3465 3466 index = self._index 3467 if self._match(TokenType.L_PAREN): 3468 columns = self._parse_csv(self._parse_function_parameter) 3469 self._match_r_paren() if columns else self._retreat(index) 3470 else: 3471 columns = None 3472 3473 if not alias and not columns: 3474 return None 3475 3476 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3477 3478 # We bubble up comments from the Identifier to the TableAlias 3479 if isinstance(alias, exp.Identifier): 3480 table_alias.add_comments(alias.pop_comments()) 3481 3482 return table_alias 3483 3484 def _parse_subquery( 3485 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3486 ) -> t.Optional[exp.Subquery]: 3487 if not this: 3488 return None 3489 3490 return self.expression( 3491 exp.Subquery, 3492 this=this, 3493 pivots=self._parse_pivots(), 3494 alias=self._parse_table_alias() if parse_alias else None, 3495 sample=self._parse_table_sample(), 3496 ) 3497 3498 def _implicit_unnests_to_explicit(self, this: E) -> E: 3499 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3500 3501 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3502 for i, join in enumerate(this.args.get("joins") or []): 3503 table = join.this 3504 normalized_table = table.copy() 3505 normalized_table.meta["maybe_column"] = True 3506 normalized_table = _norm(normalized_table, dialect=self.dialect) 3507 3508 if isinstance(table, exp.Table) and not join.args.get("on"): 3509 if normalized_table.parts[0].name in refs: 3510 table_as_column = table.to_column() 3511 unnest = exp.Unnest(expressions=[table_as_column]) 3512 3513 # Table.to_column creates a parent Alias node that we want to convert to 3514 # a TableAlias and attach to the Unnest, so it matches the parser's output 3515 if isinstance(table.args.get("alias"), exp.TableAlias): 3516 table_as_column.replace(table_as_column.this) 3517 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3518 3519 table.replace(unnest) 3520 3521 refs.add(normalized_table.alias_or_name) 3522 3523 return this 3524 3525 def _parse_query_modifiers( 3526 self, this: t.Optional[exp.Expression] 3527 ) -> t.Optional[exp.Expression]: 3528 if isinstance(this, self.MODIFIABLES): 3529 for join in self._parse_joins(): 3530 this.append("joins", join) 3531 for lateral in iter(self._parse_lateral, None): 3532 this.append("laterals", lateral) 3533 3534 while True: 3535 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3536 modifier_token = self._curr 3537 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3538 key, expression = parser(self) 3539 3540 if expression: 3541 if this.args.get(key): 3542 self.raise_error( 3543 f"Found multiple '{modifier_token.text.upper()}' clauses", 3544 token=modifier_token, 3545 ) 3546 3547 this.set(key, expression) 3548 if key == "limit": 3549 offset = expression.args.pop("offset", None) 3550 3551 if offset: 3552 offset = exp.Offset(expression=offset) 3553 this.set("offset", offset) 3554 3555 limit_by_expressions = expression.expressions 3556 expression.set("expressions", None) 3557 offset.set("expressions", limit_by_expressions) 3558 continue 3559 break 3560 3561 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3562 this = self._implicit_unnests_to_explicit(this) 3563 3564 return this 3565 3566 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3567 start = self._curr 3568 while self._curr: 3569 self._advance() 3570 3571 end = self._tokens[self._index - 1] 3572 return exp.Hint(expressions=[self._find_sql(start, end)]) 3573 3574 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3575 return self._parse_function_call() 3576 3577 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3578 start_index = self._index 3579 should_fallback_to_string = False 3580 3581 hints = [] 3582 try: 3583 for hint in iter( 3584 lambda: self._parse_csv( 3585 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3586 ), 3587 [], 3588 ): 3589 hints.extend(hint) 3590 except ParseError: 3591 should_fallback_to_string = True 3592 3593 if should_fallback_to_string or self._curr: 3594 self._retreat(start_index) 3595 return self._parse_hint_fallback_to_string() 3596 3597 return self.expression(exp.Hint, expressions=hints) 3598 3599 def _parse_hint(self) -> t.Optional[exp.Hint]: 3600 if self._match(TokenType.HINT) and self._prev_comments: 3601 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3602 3603 return None 3604 3605 def _parse_into(self) -> t.Optional[exp.Into]: 3606 if not self._match(TokenType.INTO): 3607 return None 3608 3609 temp = self._match(TokenType.TEMPORARY) 3610 unlogged = self._match_text_seq("UNLOGGED") 3611 self._match(TokenType.TABLE) 3612 3613 return self.expression( 3614 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3615 ) 3616 3617 def _parse_from( 3618 self, 3619 joins: bool = False, 3620 skip_from_token: bool = False, 3621 consume_pipe: bool = False, 3622 ) -> t.Optional[exp.From]: 3623 if not skip_from_token and not self._match(TokenType.FROM): 3624 return None 3625 3626 return self.expression( 3627 exp.From, 3628 comments=self._prev_comments, 3629 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3630 ) 3631 3632 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3633 return self.expression( 3634 exp.MatchRecognizeMeasure, 3635 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3636 this=self._parse_expression(), 3637 ) 3638 3639 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3640 if not self._match(TokenType.MATCH_RECOGNIZE): 3641 return None 3642 3643 self._match_l_paren() 3644 3645 partition = self._parse_partition_by() 3646 order = self._parse_order() 3647 3648 measures = ( 3649 self._parse_csv(self._parse_match_recognize_measure) 3650 if self._match_text_seq("MEASURES") 3651 else None 3652 ) 3653 3654 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3655 rows = exp.var("ONE ROW PER MATCH") 3656 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3657 text = "ALL ROWS PER MATCH" 3658 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3659 text += " SHOW EMPTY MATCHES" 3660 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3661 text += " OMIT EMPTY MATCHES" 3662 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3663 text += " WITH UNMATCHED ROWS" 3664 rows = exp.var(text) 3665 else: 3666 rows = None 3667 3668 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3669 text = "AFTER MATCH SKIP" 3670 if self._match_text_seq("PAST", "LAST", "ROW"): 3671 text += " PAST LAST ROW" 3672 elif self._match_text_seq("TO", "NEXT", "ROW"): 3673 text += " TO NEXT ROW" 3674 elif self._match_text_seq("TO", "FIRST"): 3675 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3676 elif self._match_text_seq("TO", "LAST"): 3677 text += f" TO LAST {self._advance_any().text}" # type: ignore 3678 after = exp.var(text) 3679 else: 3680 after = None 3681 3682 if self._match_text_seq("PATTERN"): 3683 self._match_l_paren() 3684 3685 if not self._curr: 3686 self.raise_error("Expecting )", self._curr) 3687 3688 paren = 1 3689 start = self._curr 3690 3691 while self._curr and paren > 0: 3692 if self._curr.token_type == TokenType.L_PAREN: 3693 paren += 1 3694 if self._curr.token_type == TokenType.R_PAREN: 3695 paren -= 1 3696 3697 end = self._prev 3698 self._advance() 3699 3700 if paren > 0: 3701 self.raise_error("Expecting )", self._curr) 3702 3703 pattern = exp.var(self._find_sql(start, end)) 3704 else: 3705 pattern = None 3706 3707 define = ( 3708 self._parse_csv(self._parse_name_as_expression) 3709 if self._match_text_seq("DEFINE") 3710 else None 3711 ) 3712 3713 self._match_r_paren() 3714 3715 return self.expression( 3716 exp.MatchRecognize, 3717 partition_by=partition, 3718 order=order, 3719 measures=measures, 3720 rows=rows, 3721 after=after, 3722 pattern=pattern, 3723 define=define, 3724 alias=self._parse_table_alias(), 3725 ) 3726 3727 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3728 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3729 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3730 cross_apply = False 3731 3732 if cross_apply is not None: 3733 this = self._parse_select(table=True) 3734 view = None 3735 outer = None 3736 elif self._match(TokenType.LATERAL): 3737 this = self._parse_select(table=True) 3738 view = self._match(TokenType.VIEW) 3739 outer = self._match(TokenType.OUTER) 3740 else: 3741 return None 3742 3743 if not this: 3744 this = ( 3745 self._parse_unnest() 3746 or self._parse_function() 3747 or self._parse_id_var(any_token=False) 3748 ) 3749 3750 while self._match(TokenType.DOT): 3751 this = exp.Dot( 3752 this=this, 3753 expression=self._parse_function() or self._parse_id_var(any_token=False), 3754 ) 3755 3756 ordinality: t.Optional[bool] = None 3757 3758 if view: 3759 table = self._parse_id_var(any_token=False) 3760 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3761 table_alias: t.Optional[exp.TableAlias] = self.expression( 3762 exp.TableAlias, this=table, columns=columns 3763 ) 3764 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3765 # We move the alias from the lateral's child node to the lateral itself 3766 table_alias = this.args["alias"].pop() 3767 else: 3768 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3769 table_alias = self._parse_table_alias() 3770 3771 return self.expression( 3772 exp.Lateral, 3773 this=this, 3774 view=view, 3775 outer=outer, 3776 alias=table_alias, 3777 cross_apply=cross_apply, 3778 ordinality=ordinality, 3779 ) 3780 3781 def _parse_join_parts( 3782 self, 3783 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3784 return ( 3785 self._match_set(self.JOIN_METHODS) and self._prev, 3786 self._match_set(self.JOIN_SIDES) and self._prev, 3787 self._match_set(self.JOIN_KINDS) and self._prev, 3788 ) 3789 3790 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3791 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3792 this = self._parse_column() 3793 if isinstance(this, exp.Column): 3794 return this.this 3795 return this 3796 3797 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3798 3799 def _parse_join( 3800 self, skip_join_token: bool = False, parse_bracket: bool = False 3801 ) -> t.Optional[exp.Join]: 3802 if self._match(TokenType.COMMA): 3803 table = self._try_parse(self._parse_table) 3804 cross_join = self.expression(exp.Join, this=table) if table else None 3805 3806 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3807 cross_join.set("kind", "CROSS") 3808 3809 return cross_join 3810 3811 index = self._index 3812 method, side, kind = self._parse_join_parts() 3813 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3814 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3815 join_comments = self._prev_comments 3816 3817 if not skip_join_token and not join: 3818 self._retreat(index) 3819 kind = None 3820 method = None 3821 side = None 3822 3823 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3824 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3825 3826 if not skip_join_token and not join and not outer_apply and not cross_apply: 3827 return None 3828 3829 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3830 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3831 kwargs["expressions"] = self._parse_csv( 3832 lambda: self._parse_table(parse_bracket=parse_bracket) 3833 ) 3834 3835 if method: 3836 kwargs["method"] = method.text 3837 if side: 3838 kwargs["side"] = side.text 3839 if kind: 3840 kwargs["kind"] = kind.text 3841 if hint: 3842 kwargs["hint"] = hint 3843 3844 if self._match(TokenType.MATCH_CONDITION): 3845 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3846 3847 if self._match(TokenType.ON): 3848 kwargs["on"] = self._parse_assignment() 3849 elif self._match(TokenType.USING): 3850 kwargs["using"] = self._parse_using_identifiers() 3851 elif ( 3852 not method 3853 and not (outer_apply or cross_apply) 3854 and not isinstance(kwargs["this"], exp.Unnest) 3855 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3856 ): 3857 index = self._index 3858 joins: t.Optional[list] = list(self._parse_joins()) 3859 3860 if joins and self._match(TokenType.ON): 3861 kwargs["on"] = self._parse_assignment() 3862 elif joins and self._match(TokenType.USING): 3863 kwargs["using"] = self._parse_using_identifiers() 3864 else: 3865 joins = None 3866 self._retreat(index) 3867 3868 kwargs["this"].set("joins", joins if joins else None) 3869 3870 kwargs["pivots"] = self._parse_pivots() 3871 3872 comments = [c for token in (method, side, kind) if token for c in token.comments] 3873 comments = (join_comments or []) + comments 3874 return self.expression(exp.Join, comments=comments, **kwargs) 3875 3876 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3877 this = self._parse_assignment() 3878 3879 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3880 return this 3881 3882 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3883 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3884 3885 return this 3886 3887 def _parse_index_params(self) -> exp.IndexParameters: 3888 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3889 3890 if self._match(TokenType.L_PAREN, advance=False): 3891 columns = self._parse_wrapped_csv(self._parse_with_operator) 3892 else: 3893 columns = None 3894 3895 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3896 partition_by = self._parse_partition_by() 3897 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3898 tablespace = ( 3899 self._parse_var(any_token=True) 3900 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3901 else None 3902 ) 3903 where = self._parse_where() 3904 3905 on = self._parse_field() if self._match(TokenType.ON) else None 3906 3907 return self.expression( 3908 exp.IndexParameters, 3909 using=using, 3910 columns=columns, 3911 include=include, 3912 partition_by=partition_by, 3913 where=where, 3914 with_storage=with_storage, 3915 tablespace=tablespace, 3916 on=on, 3917 ) 3918 3919 def _parse_index( 3920 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3921 ) -> t.Optional[exp.Index]: 3922 if index or anonymous: 3923 unique = None 3924 primary = None 3925 amp = None 3926 3927 self._match(TokenType.ON) 3928 self._match(TokenType.TABLE) # hive 3929 table = self._parse_table_parts(schema=True) 3930 else: 3931 unique = self._match(TokenType.UNIQUE) 3932 primary = self._match_text_seq("PRIMARY") 3933 amp = self._match_text_seq("AMP") 3934 3935 if not self._match(TokenType.INDEX): 3936 return None 3937 3938 index = self._parse_id_var() 3939 table = None 3940 3941 params = self._parse_index_params() 3942 3943 return self.expression( 3944 exp.Index, 3945 this=index, 3946 table=table, 3947 unique=unique, 3948 primary=primary, 3949 amp=amp, 3950 params=params, 3951 ) 3952 3953 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3954 hints: t.List[exp.Expression] = [] 3955 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3956 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3957 hints.append( 3958 self.expression( 3959 exp.WithTableHint, 3960 expressions=self._parse_csv( 3961 lambda: self._parse_function() or self._parse_var(any_token=True) 3962 ), 3963 ) 3964 ) 3965 self._match_r_paren() 3966 else: 3967 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3968 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3969 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3970 3971 self._match_set((TokenType.INDEX, TokenType.KEY)) 3972 if self._match(TokenType.FOR): 3973 hint.set("target", self._advance_any() and self._prev.text.upper()) 3974 3975 hint.set("expressions", self._parse_wrapped_id_vars()) 3976 hints.append(hint) 3977 3978 return hints or None 3979 3980 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3981 return ( 3982 (not schema and self._parse_function(optional_parens=False)) 3983 or self._parse_id_var(any_token=False) 3984 or self._parse_string_as_identifier() 3985 or self._parse_placeholder() 3986 ) 3987 3988 def _parse_table_parts( 3989 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3990 ) -> exp.Table: 3991 catalog = None 3992 db = None 3993 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3994 3995 while self._match(TokenType.DOT): 3996 if catalog: 3997 # This allows nesting the table in arbitrarily many dot expressions if needed 3998 table = self.expression( 3999 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4000 ) 4001 else: 4002 catalog = db 4003 db = table 4004 # "" used for tsql FROM a..b case 4005 table = self._parse_table_part(schema=schema) or "" 4006 4007 if ( 4008 wildcard 4009 and self._is_connected() 4010 and (isinstance(table, exp.Identifier) or not table) 4011 and self._match(TokenType.STAR) 4012 ): 4013 if isinstance(table, exp.Identifier): 4014 table.args["this"] += "*" 4015 else: 4016 table = exp.Identifier(this="*") 4017 4018 # We bubble up comments from the Identifier to the Table 4019 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4020 4021 if is_db_reference: 4022 catalog = db 4023 db = table 4024 table = None 4025 4026 if not table and not is_db_reference: 4027 self.raise_error(f"Expected table name but got {self._curr}") 4028 if not db and is_db_reference: 4029 self.raise_error(f"Expected database name but got {self._curr}") 4030 4031 table = self.expression( 4032 exp.Table, 4033 comments=comments, 4034 this=table, 4035 db=db, 4036 catalog=catalog, 4037 ) 4038 4039 changes = self._parse_changes() 4040 if changes: 4041 table.set("changes", changes) 4042 4043 at_before = self._parse_historical_data() 4044 if at_before: 4045 table.set("when", at_before) 4046 4047 pivots = self._parse_pivots() 4048 if pivots: 4049 table.set("pivots", pivots) 4050 4051 return table 4052 4053 def _parse_table( 4054 self, 4055 schema: bool = False, 4056 joins: bool = False, 4057 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4058 parse_bracket: bool = False, 4059 is_db_reference: bool = False, 4060 parse_partition: bool = False, 4061 consume_pipe: bool = False, 4062 ) -> t.Optional[exp.Expression]: 4063 lateral = self._parse_lateral() 4064 if lateral: 4065 return lateral 4066 4067 unnest = self._parse_unnest() 4068 if unnest: 4069 return unnest 4070 4071 values = self._parse_derived_table_values() 4072 if values: 4073 return values 4074 4075 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4076 if subquery: 4077 if not subquery.args.get("pivots"): 4078 subquery.set("pivots", self._parse_pivots()) 4079 return subquery 4080 4081 bracket = parse_bracket and self._parse_bracket(None) 4082 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4083 4084 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4085 self._parse_table 4086 ) 4087 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4088 4089 only = self._match(TokenType.ONLY) 4090 4091 this = t.cast( 4092 exp.Expression, 4093 bracket 4094 or rows_from 4095 or self._parse_bracket( 4096 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4097 ), 4098 ) 4099 4100 if only: 4101 this.set("only", only) 4102 4103 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4104 self._match_text_seq("*") 4105 4106 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4107 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4108 this.set("partition", self._parse_partition()) 4109 4110 if schema: 4111 return self._parse_schema(this=this) 4112 4113 version = self._parse_version() 4114 4115 if version: 4116 this.set("version", version) 4117 4118 if self.dialect.ALIAS_POST_TABLESAMPLE: 4119 this.set("sample", self._parse_table_sample()) 4120 4121 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4122 if alias: 4123 this.set("alias", alias) 4124 4125 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4126 return self.expression( 4127 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4128 ) 4129 4130 this.set("hints", self._parse_table_hints()) 4131 4132 if not this.args.get("pivots"): 4133 this.set("pivots", self._parse_pivots()) 4134 4135 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4136 this.set("sample", self._parse_table_sample()) 4137 4138 if joins: 4139 for join in self._parse_joins(): 4140 this.append("joins", join) 4141 4142 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4143 this.set("ordinality", True) 4144 this.set("alias", self._parse_table_alias()) 4145 4146 return this 4147 4148 def _parse_version(self) -> t.Optional[exp.Version]: 4149 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4150 this = "TIMESTAMP" 4151 elif self._match(TokenType.VERSION_SNAPSHOT): 4152 this = "VERSION" 4153 else: 4154 return None 4155 4156 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4157 kind = self._prev.text.upper() 4158 start = self._parse_bitwise() 4159 self._match_texts(("TO", "AND")) 4160 end = self._parse_bitwise() 4161 expression: t.Optional[exp.Expression] = self.expression( 4162 exp.Tuple, expressions=[start, end] 4163 ) 4164 elif self._match_text_seq("CONTAINED", "IN"): 4165 kind = "CONTAINED IN" 4166 expression = self.expression( 4167 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4168 ) 4169 elif self._match(TokenType.ALL): 4170 kind = "ALL" 4171 expression = None 4172 else: 4173 self._match_text_seq("AS", "OF") 4174 kind = "AS OF" 4175 expression = self._parse_type() 4176 4177 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4178 4179 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4180 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4181 index = self._index 4182 historical_data = None 4183 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4184 this = self._prev.text.upper() 4185 kind = ( 4186 self._match(TokenType.L_PAREN) 4187 and self._match_texts(self.HISTORICAL_DATA_KIND) 4188 and self._prev.text.upper() 4189 ) 4190 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4191 4192 if expression: 4193 self._match_r_paren() 4194 historical_data = self.expression( 4195 exp.HistoricalData, this=this, kind=kind, expression=expression 4196 ) 4197 else: 4198 self._retreat(index) 4199 4200 return historical_data 4201 4202 def _parse_changes(self) -> t.Optional[exp.Changes]: 4203 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4204 return None 4205 4206 information = self._parse_var(any_token=True) 4207 self._match_r_paren() 4208 4209 return self.expression( 4210 exp.Changes, 4211 information=information, 4212 at_before=self._parse_historical_data(), 4213 end=self._parse_historical_data(), 4214 ) 4215 4216 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4217 if not self._match(TokenType.UNNEST): 4218 return None 4219 4220 expressions = self._parse_wrapped_csv(self._parse_equality) 4221 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4222 4223 alias = self._parse_table_alias() if with_alias else None 4224 4225 if alias: 4226 if self.dialect.UNNEST_COLUMN_ONLY: 4227 if alias.args.get("columns"): 4228 self.raise_error("Unexpected extra column alias in unnest.") 4229 4230 alias.set("columns", [alias.this]) 4231 alias.set("this", None) 4232 4233 columns = alias.args.get("columns") or [] 4234 if offset and len(expressions) < len(columns): 4235 offset = columns.pop() 4236 4237 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4238 self._match(TokenType.ALIAS) 4239 offset = self._parse_id_var( 4240 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4241 ) or exp.to_identifier("offset") 4242 4243 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4244 4245 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4246 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4247 if not is_derived and not ( 4248 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4249 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4250 ): 4251 return None 4252 4253 expressions = self._parse_csv(self._parse_value) 4254 alias = self._parse_table_alias() 4255 4256 if is_derived: 4257 self._match_r_paren() 4258 4259 return self.expression( 4260 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4261 ) 4262 4263 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4264 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4265 as_modifier and self._match_text_seq("USING", "SAMPLE") 4266 ): 4267 return None 4268 4269 bucket_numerator = None 4270 bucket_denominator = None 4271 bucket_field = None 4272 percent = None 4273 size = None 4274 seed = None 4275 4276 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4277 matched_l_paren = self._match(TokenType.L_PAREN) 4278 4279 if self.TABLESAMPLE_CSV: 4280 num = None 4281 expressions = self._parse_csv(self._parse_primary) 4282 else: 4283 expressions = None 4284 num = ( 4285 self._parse_factor() 4286 if self._match(TokenType.NUMBER, advance=False) 4287 else self._parse_primary() or self._parse_placeholder() 4288 ) 4289 4290 if self._match_text_seq("BUCKET"): 4291 bucket_numerator = self._parse_number() 4292 self._match_text_seq("OUT", "OF") 4293 bucket_denominator = bucket_denominator = self._parse_number() 4294 self._match(TokenType.ON) 4295 bucket_field = self._parse_field() 4296 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4297 percent = num 4298 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4299 size = num 4300 else: 4301 percent = num 4302 4303 if matched_l_paren: 4304 self._match_r_paren() 4305 4306 if self._match(TokenType.L_PAREN): 4307 method = self._parse_var(upper=True) 4308 seed = self._match(TokenType.COMMA) and self._parse_number() 4309 self._match_r_paren() 4310 elif self._match_texts(("SEED", "REPEATABLE")): 4311 seed = self._parse_wrapped(self._parse_number) 4312 4313 if not method and self.DEFAULT_SAMPLING_METHOD: 4314 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4315 4316 return self.expression( 4317 exp.TableSample, 4318 expressions=expressions, 4319 method=method, 4320 bucket_numerator=bucket_numerator, 4321 bucket_denominator=bucket_denominator, 4322 bucket_field=bucket_field, 4323 percent=percent, 4324 size=size, 4325 seed=seed, 4326 ) 4327 4328 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4329 return list(iter(self._parse_pivot, None)) or None 4330 4331 def _parse_joins(self) -> t.Iterator[exp.Join]: 4332 return iter(self._parse_join, None) 4333 4334 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4335 if not self._match(TokenType.INTO): 4336 return None 4337 4338 return self.expression( 4339 exp.UnpivotColumns, 4340 this=self._match_text_seq("NAME") and self._parse_column(), 4341 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4342 ) 4343 4344 # https://duckdb.org/docs/sql/statements/pivot 4345 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4346 def _parse_on() -> t.Optional[exp.Expression]: 4347 this = self._parse_bitwise() 4348 4349 if self._match(TokenType.IN): 4350 # PIVOT ... ON col IN (row_val1, row_val2) 4351 return self._parse_in(this) 4352 if self._match(TokenType.ALIAS, advance=False): 4353 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4354 return self._parse_alias(this) 4355 4356 return this 4357 4358 this = self._parse_table() 4359 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4360 into = self._parse_unpivot_columns() 4361 using = self._match(TokenType.USING) and self._parse_csv( 4362 lambda: self._parse_alias(self._parse_function()) 4363 ) 4364 group = self._parse_group() 4365 4366 return self.expression( 4367 exp.Pivot, 4368 this=this, 4369 expressions=expressions, 4370 using=using, 4371 group=group, 4372 unpivot=is_unpivot, 4373 into=into, 4374 ) 4375 4376 def _parse_pivot_in(self) -> exp.In: 4377 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4378 this = self._parse_select_or_expression() 4379 4380 self._match(TokenType.ALIAS) 4381 alias = self._parse_bitwise() 4382 if alias: 4383 if isinstance(alias, exp.Column) and not alias.db: 4384 alias = alias.this 4385 return self.expression(exp.PivotAlias, this=this, alias=alias) 4386 4387 return this 4388 4389 value = self._parse_column() 4390 4391 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4392 self.raise_error("Expecting IN (") 4393 4394 if self._match(TokenType.ANY): 4395 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4396 else: 4397 exprs = self._parse_csv(_parse_aliased_expression) 4398 4399 self._match_r_paren() 4400 return self.expression(exp.In, this=value, expressions=exprs) 4401 4402 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4403 func = self._parse_function() 4404 if not func: 4405 self.raise_error("Expecting an aggregation function in PIVOT") 4406 4407 return self._parse_alias(func) 4408 4409 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4410 index = self._index 4411 include_nulls = None 4412 4413 if self._match(TokenType.PIVOT): 4414 unpivot = False 4415 elif self._match(TokenType.UNPIVOT): 4416 unpivot = True 4417 4418 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4419 if self._match_text_seq("INCLUDE", "NULLS"): 4420 include_nulls = True 4421 elif self._match_text_seq("EXCLUDE", "NULLS"): 4422 include_nulls = False 4423 else: 4424 return None 4425 4426 expressions = [] 4427 4428 if not self._match(TokenType.L_PAREN): 4429 self._retreat(index) 4430 return None 4431 4432 if unpivot: 4433 expressions = self._parse_csv(self._parse_column) 4434 else: 4435 expressions = self._parse_csv(self._parse_pivot_aggregation) 4436 4437 if not expressions: 4438 self.raise_error("Failed to parse PIVOT's aggregation list") 4439 4440 if not self._match(TokenType.FOR): 4441 self.raise_error("Expecting FOR") 4442 4443 fields = [] 4444 while True: 4445 field = self._try_parse(self._parse_pivot_in) 4446 if not field: 4447 break 4448 fields.append(field) 4449 4450 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4451 self._parse_bitwise 4452 ) 4453 4454 group = self._parse_group() 4455 4456 self._match_r_paren() 4457 4458 pivot = self.expression( 4459 exp.Pivot, 4460 expressions=expressions, 4461 fields=fields, 4462 unpivot=unpivot, 4463 include_nulls=include_nulls, 4464 default_on_null=default_on_null, 4465 group=group, 4466 ) 4467 4468 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4469 pivot.set("alias", self._parse_table_alias()) 4470 4471 if not unpivot: 4472 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4473 4474 columns: t.List[exp.Expression] = [] 4475 all_fields = [] 4476 for pivot_field in pivot.fields: 4477 pivot_field_expressions = pivot_field.expressions 4478 4479 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4480 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4481 continue 4482 4483 all_fields.append( 4484 [ 4485 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4486 for fld in pivot_field_expressions 4487 ] 4488 ) 4489 4490 if all_fields: 4491 if names: 4492 all_fields.append(names) 4493 4494 # Generate all possible combinations of the pivot columns 4495 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4496 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4497 for fld_parts_tuple in itertools.product(*all_fields): 4498 fld_parts = list(fld_parts_tuple) 4499 4500 if names and self.PREFIXED_PIVOT_COLUMNS: 4501 # Move the "name" to the front of the list 4502 fld_parts.insert(0, fld_parts.pop(-1)) 4503 4504 columns.append(exp.to_identifier("_".join(fld_parts))) 4505 4506 pivot.set("columns", columns) 4507 4508 return pivot 4509 4510 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4511 return [agg.alias for agg in aggregations if agg.alias] 4512 4513 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4514 if not skip_where_token and not self._match(TokenType.PREWHERE): 4515 return None 4516 4517 return self.expression( 4518 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4519 ) 4520 4521 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4522 if not skip_where_token and not self._match(TokenType.WHERE): 4523 return None 4524 4525 return self.expression( 4526 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4527 ) 4528 4529 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4530 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4531 return None 4532 comments = self._prev_comments 4533 4534 elements: t.Dict[str, t.Any] = defaultdict(list) 4535 4536 if self._match(TokenType.ALL): 4537 elements["all"] = True 4538 elif self._match(TokenType.DISTINCT): 4539 elements["all"] = False 4540 4541 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4542 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4543 4544 while True: 4545 index = self._index 4546 4547 elements["expressions"].extend( 4548 self._parse_csv( 4549 lambda: None 4550 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4551 else self._parse_assignment() 4552 ) 4553 ) 4554 4555 before_with_index = self._index 4556 with_prefix = self._match(TokenType.WITH) 4557 4558 if self._match(TokenType.ROLLUP): 4559 elements["rollup"].append( 4560 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4561 ) 4562 elif self._match(TokenType.CUBE): 4563 elements["cube"].append( 4564 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4565 ) 4566 elif self._match(TokenType.GROUPING_SETS): 4567 elements["grouping_sets"].append( 4568 self.expression( 4569 exp.GroupingSets, 4570 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4571 ) 4572 ) 4573 elif self._match_text_seq("TOTALS"): 4574 elements["totals"] = True # type: ignore 4575 4576 if before_with_index <= self._index <= before_with_index + 1: 4577 self._retreat(before_with_index) 4578 break 4579 4580 if index == self._index: 4581 break 4582 4583 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4584 4585 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4586 return self.expression( 4587 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4588 ) 4589 4590 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4591 if self._match(TokenType.L_PAREN): 4592 grouping_set = self._parse_csv(self._parse_column) 4593 self._match_r_paren() 4594 return self.expression(exp.Tuple, expressions=grouping_set) 4595 4596 return self._parse_column() 4597 4598 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4599 if not skip_having_token and not self._match(TokenType.HAVING): 4600 return None 4601 return self.expression( 4602 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4603 ) 4604 4605 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4606 if not self._match(TokenType.QUALIFY): 4607 return None 4608 return self.expression(exp.Qualify, this=self._parse_assignment()) 4609 4610 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4611 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4612 exp.Prior, this=self._parse_bitwise() 4613 ) 4614 connect = self._parse_assignment() 4615 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4616 return connect 4617 4618 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4619 if skip_start_token: 4620 start = None 4621 elif self._match(TokenType.START_WITH): 4622 start = self._parse_assignment() 4623 else: 4624 return None 4625 4626 self._match(TokenType.CONNECT_BY) 4627 nocycle = self._match_text_seq("NOCYCLE") 4628 connect = self._parse_connect_with_prior() 4629 4630 if not start and self._match(TokenType.START_WITH): 4631 start = self._parse_assignment() 4632 4633 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4634 4635 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4636 this = self._parse_id_var(any_token=True) 4637 if self._match(TokenType.ALIAS): 4638 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4639 return this 4640 4641 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4642 if self._match_text_seq("INTERPOLATE"): 4643 return self._parse_wrapped_csv(self._parse_name_as_expression) 4644 return None 4645 4646 def _parse_order( 4647 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4648 ) -> t.Optional[exp.Expression]: 4649 siblings = None 4650 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4651 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4652 return this 4653 4654 siblings = True 4655 4656 return self.expression( 4657 exp.Order, 4658 comments=self._prev_comments, 4659 this=this, 4660 expressions=self._parse_csv(self._parse_ordered), 4661 siblings=siblings, 4662 ) 4663 4664 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4665 if not self._match(token): 4666 return None 4667 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4668 4669 def _parse_ordered( 4670 self, parse_method: t.Optional[t.Callable] = None 4671 ) -> t.Optional[exp.Ordered]: 4672 this = parse_method() if parse_method else self._parse_assignment() 4673 if not this: 4674 return None 4675 4676 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4677 this = exp.var("ALL") 4678 4679 asc = self._match(TokenType.ASC) 4680 desc = self._match(TokenType.DESC) or (asc and False) 4681 4682 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4683 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4684 4685 nulls_first = is_nulls_first or False 4686 explicitly_null_ordered = is_nulls_first or is_nulls_last 4687 4688 if ( 4689 not explicitly_null_ordered 4690 and ( 4691 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4692 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4693 ) 4694 and self.dialect.NULL_ORDERING != "nulls_are_last" 4695 ): 4696 nulls_first = True 4697 4698 if self._match_text_seq("WITH", "FILL"): 4699 with_fill = self.expression( 4700 exp.WithFill, 4701 **{ # type: ignore 4702 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4703 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4704 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4705 "interpolate": self._parse_interpolate(), 4706 }, 4707 ) 4708 else: 4709 with_fill = None 4710 4711 return self.expression( 4712 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4713 ) 4714 4715 def _parse_limit_options(self) -> exp.LimitOptions: 4716 percent = self._match(TokenType.PERCENT) 4717 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4718 self._match_text_seq("ONLY") 4719 with_ties = self._match_text_seq("WITH", "TIES") 4720 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4721 4722 def _parse_limit( 4723 self, 4724 this: t.Optional[exp.Expression] = None, 4725 top: bool = False, 4726 skip_limit_token: bool = False, 4727 ) -> t.Optional[exp.Expression]: 4728 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4729 comments = self._prev_comments 4730 if top: 4731 limit_paren = self._match(TokenType.L_PAREN) 4732 expression = self._parse_term() if limit_paren else self._parse_number() 4733 4734 if limit_paren: 4735 self._match_r_paren() 4736 4737 limit_options = self._parse_limit_options() 4738 else: 4739 limit_options = None 4740 expression = self._parse_term() 4741 4742 if self._match(TokenType.COMMA): 4743 offset = expression 4744 expression = self._parse_term() 4745 else: 4746 offset = None 4747 4748 limit_exp = self.expression( 4749 exp.Limit, 4750 this=this, 4751 expression=expression, 4752 offset=offset, 4753 comments=comments, 4754 limit_options=limit_options, 4755 expressions=self._parse_limit_by(), 4756 ) 4757 4758 return limit_exp 4759 4760 if self._match(TokenType.FETCH): 4761 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4762 direction = self._prev.text.upper() if direction else "FIRST" 4763 4764 count = self._parse_field(tokens=self.FETCH_TOKENS) 4765 4766 return self.expression( 4767 exp.Fetch, 4768 direction=direction, 4769 count=count, 4770 limit_options=self._parse_limit_options(), 4771 ) 4772 4773 return this 4774 4775 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4776 if not self._match(TokenType.OFFSET): 4777 return this 4778 4779 count = self._parse_term() 4780 self._match_set((TokenType.ROW, TokenType.ROWS)) 4781 4782 return self.expression( 4783 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4784 ) 4785 4786 def _can_parse_limit_or_offset(self) -> bool: 4787 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4788 return False 4789 4790 index = self._index 4791 result = bool( 4792 self._try_parse(self._parse_limit, retreat=True) 4793 or self._try_parse(self._parse_offset, retreat=True) 4794 ) 4795 self._retreat(index) 4796 return result 4797 4798 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4799 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4800 4801 def _parse_locks(self) -> t.List[exp.Lock]: 4802 locks = [] 4803 while True: 4804 update, key = None, None 4805 if self._match_text_seq("FOR", "UPDATE"): 4806 update = True 4807 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4808 "LOCK", "IN", "SHARE", "MODE" 4809 ): 4810 update = False 4811 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4812 update, key = False, True 4813 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4814 update, key = True, True 4815 else: 4816 break 4817 4818 expressions = None 4819 if self._match_text_seq("OF"): 4820 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4821 4822 wait: t.Optional[bool | exp.Expression] = None 4823 if self._match_text_seq("NOWAIT"): 4824 wait = True 4825 elif self._match_text_seq("WAIT"): 4826 wait = self._parse_primary() 4827 elif self._match_text_seq("SKIP", "LOCKED"): 4828 wait = False 4829 4830 locks.append( 4831 self.expression( 4832 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4833 ) 4834 ) 4835 4836 return locks 4837 4838 def parse_set_operation( 4839 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4840 ) -> t.Optional[exp.Expression]: 4841 start = self._index 4842 _, side_token, kind_token = self._parse_join_parts() 4843 4844 side = side_token.text if side_token else None 4845 kind = kind_token.text if kind_token else None 4846 4847 if not self._match_set(self.SET_OPERATIONS): 4848 self._retreat(start) 4849 return None 4850 4851 token_type = self._prev.token_type 4852 4853 if token_type == TokenType.UNION: 4854 operation: t.Type[exp.SetOperation] = exp.Union 4855 elif token_type == TokenType.EXCEPT: 4856 operation = exp.Except 4857 else: 4858 operation = exp.Intersect 4859 4860 comments = self._prev.comments 4861 4862 if self._match(TokenType.DISTINCT): 4863 distinct: t.Optional[bool] = True 4864 elif self._match(TokenType.ALL): 4865 distinct = False 4866 else: 4867 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4868 if distinct is None: 4869 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4870 4871 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4872 "STRICT", "CORRESPONDING" 4873 ) 4874 if self._match_text_seq("CORRESPONDING"): 4875 by_name = True 4876 if not side and not kind: 4877 kind = "INNER" 4878 4879 on_column_list = None 4880 if by_name and self._match_texts(("ON", "BY")): 4881 on_column_list = self._parse_wrapped_csv(self._parse_column) 4882 4883 expression = self._parse_select( 4884 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4885 ) 4886 4887 return self.expression( 4888 operation, 4889 comments=comments, 4890 this=this, 4891 distinct=distinct, 4892 by_name=by_name, 4893 expression=expression, 4894 side=side, 4895 kind=kind, 4896 on=on_column_list, 4897 ) 4898 4899 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4900 while this: 4901 setop = self.parse_set_operation(this) 4902 if not setop: 4903 break 4904 this = setop 4905 4906 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4907 expression = this.expression 4908 4909 if expression: 4910 for arg in self.SET_OP_MODIFIERS: 4911 expr = expression.args.get(arg) 4912 if expr: 4913 this.set(arg, expr.pop()) 4914 4915 return this 4916 4917 def _parse_expression(self) -> t.Optional[exp.Expression]: 4918 return self._parse_alias(self._parse_assignment()) 4919 4920 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4921 this = self._parse_disjunction() 4922 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4923 # This allows us to parse <non-identifier token> := <expr> 4924 this = exp.column( 4925 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4926 ) 4927 4928 while self._match_set(self.ASSIGNMENT): 4929 if isinstance(this, exp.Column) and len(this.parts) == 1: 4930 this = this.this 4931 4932 this = self.expression( 4933 self.ASSIGNMENT[self._prev.token_type], 4934 this=this, 4935 comments=self._prev_comments, 4936 expression=self._parse_assignment(), 4937 ) 4938 4939 return this 4940 4941 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4942 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4943 4944 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4945 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4946 4947 def _parse_equality(self) -> t.Optional[exp.Expression]: 4948 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4949 4950 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4951 return self._parse_tokens(self._parse_range, self.COMPARISON) 4952 4953 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4954 this = this or self._parse_bitwise() 4955 negate = self._match(TokenType.NOT) 4956 4957 if self._match_set(self.RANGE_PARSERS): 4958 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4959 if not expression: 4960 return this 4961 4962 this = expression 4963 elif self._match(TokenType.ISNULL): 4964 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4965 4966 # Postgres supports ISNULL and NOTNULL for conditions. 4967 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4968 if self._match(TokenType.NOTNULL): 4969 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4970 this = self.expression(exp.Not, this=this) 4971 4972 if negate: 4973 this = self._negate_range(this) 4974 4975 if self._match(TokenType.IS): 4976 this = self._parse_is(this) 4977 4978 return this 4979 4980 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4981 if not this: 4982 return this 4983 4984 return self.expression(exp.Not, this=this) 4985 4986 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4987 index = self._index - 1 4988 negate = self._match(TokenType.NOT) 4989 4990 if self._match_text_seq("DISTINCT", "FROM"): 4991 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4992 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4993 4994 if self._match(TokenType.JSON): 4995 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4996 4997 if self._match_text_seq("WITH"): 4998 _with = True 4999 elif self._match_text_seq("WITHOUT"): 5000 _with = False 5001 else: 5002 _with = None 5003 5004 unique = self._match(TokenType.UNIQUE) 5005 self._match_text_seq("KEYS") 5006 expression: t.Optional[exp.Expression] = self.expression( 5007 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5008 ) 5009 else: 5010 expression = self._parse_primary() or self._parse_null() 5011 if not expression: 5012 self._retreat(index) 5013 return None 5014 5015 this = self.expression(exp.Is, this=this, expression=expression) 5016 return self.expression(exp.Not, this=this) if negate else this 5017 5018 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5019 unnest = self._parse_unnest(with_alias=False) 5020 if unnest: 5021 this = self.expression(exp.In, this=this, unnest=unnest) 5022 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5023 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5024 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5025 5026 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5027 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5028 else: 5029 this = self.expression(exp.In, this=this, expressions=expressions) 5030 5031 if matched_l_paren: 5032 self._match_r_paren(this) 5033 elif not self._match(TokenType.R_BRACKET, expression=this): 5034 self.raise_error("Expecting ]") 5035 else: 5036 this = self.expression(exp.In, this=this, field=self._parse_column()) 5037 5038 return this 5039 5040 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5041 symmetric = None 5042 if self._match_text_seq("SYMMETRIC"): 5043 symmetric = True 5044 elif self._match_text_seq("ASYMMETRIC"): 5045 symmetric = False 5046 5047 low = self._parse_bitwise() 5048 self._match(TokenType.AND) 5049 high = self._parse_bitwise() 5050 5051 return self.expression( 5052 exp.Between, 5053 this=this, 5054 low=low, 5055 high=high, 5056 symmetric=symmetric, 5057 ) 5058 5059 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5060 if not self._match(TokenType.ESCAPE): 5061 return this 5062 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5063 5064 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5065 index = self._index 5066 5067 if not self._match(TokenType.INTERVAL) and match_interval: 5068 return None 5069 5070 if self._match(TokenType.STRING, advance=False): 5071 this = self._parse_primary() 5072 else: 5073 this = self._parse_term() 5074 5075 if not this or ( 5076 isinstance(this, exp.Column) 5077 and not this.table 5078 and not this.this.quoted 5079 and this.name.upper() == "IS" 5080 ): 5081 self._retreat(index) 5082 return None 5083 5084 unit = self._parse_function() or ( 5085 not self._match(TokenType.ALIAS, advance=False) 5086 and self._parse_var(any_token=True, upper=True) 5087 ) 5088 5089 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5090 # each INTERVAL expression into this canonical form so it's easy to transpile 5091 if this and this.is_number: 5092 this = exp.Literal.string(this.to_py()) 5093 elif this and this.is_string: 5094 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5095 if parts and unit: 5096 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5097 unit = None 5098 self._retreat(self._index - 1) 5099 5100 if len(parts) == 1: 5101 this = exp.Literal.string(parts[0][0]) 5102 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5103 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5104 unit = self.expression( 5105 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5106 ) 5107 5108 interval = self.expression(exp.Interval, this=this, unit=unit) 5109 5110 index = self._index 5111 self._match(TokenType.PLUS) 5112 5113 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5114 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5115 return self.expression( 5116 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5117 ) 5118 5119 self._retreat(index) 5120 return interval 5121 5122 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5123 this = self._parse_term() 5124 5125 while True: 5126 if self._match_set(self.BITWISE): 5127 this = self.expression( 5128 self.BITWISE[self._prev.token_type], 5129 this=this, 5130 expression=self._parse_term(), 5131 ) 5132 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5133 this = self.expression( 5134 exp.DPipe, 5135 this=this, 5136 expression=self._parse_term(), 5137 safe=not self.dialect.STRICT_STRING_CONCAT, 5138 ) 5139 elif self._match(TokenType.DQMARK): 5140 this = self.expression( 5141 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5142 ) 5143 elif self._match_pair(TokenType.LT, TokenType.LT): 5144 this = self.expression( 5145 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5146 ) 5147 elif self._match_pair(TokenType.GT, TokenType.GT): 5148 this = self.expression( 5149 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5150 ) 5151 else: 5152 break 5153 5154 return this 5155 5156 def _parse_term(self) -> t.Optional[exp.Expression]: 5157 this = self._parse_factor() 5158 5159 while self._match_set(self.TERM): 5160 klass = self.TERM[self._prev.token_type] 5161 comments = self._prev_comments 5162 expression = self._parse_factor() 5163 5164 this = self.expression(klass, this=this, comments=comments, expression=expression) 5165 5166 if isinstance(this, exp.Collate): 5167 expr = this.expression 5168 5169 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5170 # fallback to Identifier / Var 5171 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5172 ident = expr.this 5173 if isinstance(ident, exp.Identifier): 5174 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5175 5176 return this 5177 5178 def _parse_factor(self) -> t.Optional[exp.Expression]: 5179 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5180 this = parse_method() 5181 5182 while self._match_set(self.FACTOR): 5183 klass = self.FACTOR[self._prev.token_type] 5184 comments = self._prev_comments 5185 expression = parse_method() 5186 5187 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5188 self._retreat(self._index - 1) 5189 return this 5190 5191 this = self.expression(klass, this=this, comments=comments, expression=expression) 5192 5193 if isinstance(this, exp.Div): 5194 this.args["typed"] = self.dialect.TYPED_DIVISION 5195 this.args["safe"] = self.dialect.SAFE_DIVISION 5196 5197 return this 5198 5199 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5200 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5201 5202 def _parse_unary(self) -> t.Optional[exp.Expression]: 5203 if self._match_set(self.UNARY_PARSERS): 5204 return self.UNARY_PARSERS[self._prev.token_type](self) 5205 return self._parse_at_time_zone(self._parse_type()) 5206 5207 def _parse_type( 5208 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5209 ) -> t.Optional[exp.Expression]: 5210 interval = parse_interval and self._parse_interval() 5211 if interval: 5212 return interval 5213 5214 index = self._index 5215 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5216 5217 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5218 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5219 if isinstance(data_type, exp.Cast): 5220 # This constructor can contain ops directly after it, for instance struct unnesting: 5221 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5222 return self._parse_column_ops(data_type) 5223 5224 if data_type: 5225 index2 = self._index 5226 this = self._parse_primary() 5227 5228 if isinstance(this, exp.Literal): 5229 literal = this.name 5230 this = self._parse_column_ops(this) 5231 5232 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5233 if parser: 5234 return parser(self, this, data_type) 5235 5236 if ( 5237 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5238 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5239 and TIME_ZONE_RE.search(literal) 5240 ): 5241 data_type = exp.DataType.build("TIMESTAMPTZ") 5242 5243 return self.expression(exp.Cast, this=this, to=data_type) 5244 5245 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5246 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5247 # 5248 # If the index difference here is greater than 1, that means the parser itself must have 5249 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5250 # 5251 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5252 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5253 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5254 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5255 # 5256 # In these cases, we don't really want to return the converted type, but instead retreat 5257 # and try to parse a Column or Identifier in the section below. 5258 if data_type.expressions and index2 - index > 1: 5259 self._retreat(index2) 5260 return self._parse_column_ops(data_type) 5261 5262 self._retreat(index) 5263 5264 if fallback_to_identifier: 5265 return self._parse_id_var() 5266 5267 this = self._parse_column() 5268 return this and self._parse_column_ops(this) 5269 5270 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5271 this = self._parse_type() 5272 if not this: 5273 return None 5274 5275 if isinstance(this, exp.Column) and not this.table: 5276 this = exp.var(this.name.upper()) 5277 5278 return self.expression( 5279 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5280 ) 5281 5282 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5283 type_name = identifier.name 5284 5285 while self._match(TokenType.DOT): 5286 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5287 5288 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5289 5290 def _parse_types( 5291 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5292 ) -> t.Optional[exp.Expression]: 5293 index = self._index 5294 5295 this: t.Optional[exp.Expression] = None 5296 prefix = self._match_text_seq("SYSUDTLIB", ".") 5297 5298 if not self._match_set(self.TYPE_TOKENS): 5299 identifier = allow_identifiers and self._parse_id_var( 5300 any_token=False, tokens=(TokenType.VAR,) 5301 ) 5302 if isinstance(identifier, exp.Identifier): 5303 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5304 5305 if len(tokens) != 1: 5306 self.raise_error("Unexpected identifier", self._prev) 5307 5308 if tokens[0].token_type in self.TYPE_TOKENS: 5309 self._prev = tokens[0] 5310 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5311 this = self._parse_user_defined_type(identifier) 5312 else: 5313 self._retreat(self._index - 1) 5314 return None 5315 else: 5316 return None 5317 5318 type_token = self._prev.token_type 5319 5320 if type_token == TokenType.PSEUDO_TYPE: 5321 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5322 5323 if type_token == TokenType.OBJECT_IDENTIFIER: 5324 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5325 5326 # https://materialize.com/docs/sql/types/map/ 5327 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5328 key_type = self._parse_types( 5329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5330 ) 5331 if not self._match(TokenType.FARROW): 5332 self._retreat(index) 5333 return None 5334 5335 value_type = self._parse_types( 5336 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5337 ) 5338 if not self._match(TokenType.R_BRACKET): 5339 self._retreat(index) 5340 return None 5341 5342 return exp.DataType( 5343 this=exp.DataType.Type.MAP, 5344 expressions=[key_type, value_type], 5345 nested=True, 5346 prefix=prefix, 5347 ) 5348 5349 nested = type_token in self.NESTED_TYPE_TOKENS 5350 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5351 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5352 expressions = None 5353 maybe_func = False 5354 5355 if self._match(TokenType.L_PAREN): 5356 if is_struct: 5357 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5358 elif nested: 5359 expressions = self._parse_csv( 5360 lambda: self._parse_types( 5361 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5362 ) 5363 ) 5364 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5365 this = expressions[0] 5366 this.set("nullable", True) 5367 self._match_r_paren() 5368 return this 5369 elif type_token in self.ENUM_TYPE_TOKENS: 5370 expressions = self._parse_csv(self._parse_equality) 5371 elif is_aggregate: 5372 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5373 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5374 ) 5375 if not func_or_ident: 5376 return None 5377 expressions = [func_or_ident] 5378 if self._match(TokenType.COMMA): 5379 expressions.extend( 5380 self._parse_csv( 5381 lambda: self._parse_types( 5382 check_func=check_func, 5383 schema=schema, 5384 allow_identifiers=allow_identifiers, 5385 ) 5386 ) 5387 ) 5388 else: 5389 expressions = self._parse_csv(self._parse_type_size) 5390 5391 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5392 if type_token == TokenType.VECTOR and len(expressions) == 2: 5393 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5394 5395 if not expressions or not self._match(TokenType.R_PAREN): 5396 self._retreat(index) 5397 return None 5398 5399 maybe_func = True 5400 5401 values: t.Optional[t.List[exp.Expression]] = None 5402 5403 if nested and self._match(TokenType.LT): 5404 if is_struct: 5405 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5406 else: 5407 expressions = self._parse_csv( 5408 lambda: self._parse_types( 5409 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5410 ) 5411 ) 5412 5413 if not self._match(TokenType.GT): 5414 self.raise_error("Expecting >") 5415 5416 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5417 values = self._parse_csv(self._parse_assignment) 5418 if not values and is_struct: 5419 values = None 5420 self._retreat(self._index - 1) 5421 else: 5422 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5423 5424 if type_token in self.TIMESTAMPS: 5425 if self._match_text_seq("WITH", "TIME", "ZONE"): 5426 maybe_func = False 5427 tz_type = ( 5428 exp.DataType.Type.TIMETZ 5429 if type_token in self.TIMES 5430 else exp.DataType.Type.TIMESTAMPTZ 5431 ) 5432 this = exp.DataType(this=tz_type, expressions=expressions) 5433 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5434 maybe_func = False 5435 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5436 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5437 maybe_func = False 5438 elif type_token == TokenType.INTERVAL: 5439 unit = self._parse_var(upper=True) 5440 if unit: 5441 if self._match_text_seq("TO"): 5442 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5443 5444 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5445 else: 5446 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5447 elif type_token == TokenType.VOID: 5448 this = exp.DataType(this=exp.DataType.Type.NULL) 5449 5450 if maybe_func and check_func: 5451 index2 = self._index 5452 peek = self._parse_string() 5453 5454 if not peek: 5455 self._retreat(index) 5456 return None 5457 5458 self._retreat(index2) 5459 5460 if not this: 5461 if self._match_text_seq("UNSIGNED"): 5462 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5463 if not unsigned_type_token: 5464 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5465 5466 type_token = unsigned_type_token or type_token 5467 5468 this = exp.DataType( 5469 this=exp.DataType.Type[type_token.value], 5470 expressions=expressions, 5471 nested=nested, 5472 prefix=prefix, 5473 ) 5474 5475 # Empty arrays/structs are allowed 5476 if values is not None: 5477 cls = exp.Struct if is_struct else exp.Array 5478 this = exp.cast(cls(expressions=values), this, copy=False) 5479 5480 elif expressions: 5481 this.set("expressions", expressions) 5482 5483 # https://materialize.com/docs/sql/types/list/#type-name 5484 while self._match(TokenType.LIST): 5485 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5486 5487 index = self._index 5488 5489 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5490 matched_array = self._match(TokenType.ARRAY) 5491 5492 while self._curr: 5493 datatype_token = self._prev.token_type 5494 matched_l_bracket = self._match(TokenType.L_BRACKET) 5495 5496 if (not matched_l_bracket and not matched_array) or ( 5497 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5498 ): 5499 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5500 # not to be confused with the fixed size array parsing 5501 break 5502 5503 matched_array = False 5504 values = self._parse_csv(self._parse_assignment) or None 5505 if ( 5506 values 5507 and not schema 5508 and ( 5509 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5510 ) 5511 ): 5512 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5513 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5514 self._retreat(index) 5515 break 5516 5517 this = exp.DataType( 5518 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5519 ) 5520 self._match(TokenType.R_BRACKET) 5521 5522 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5523 converter = self.TYPE_CONVERTERS.get(this.this) 5524 if converter: 5525 this = converter(t.cast(exp.DataType, this)) 5526 5527 return this 5528 5529 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5530 index = self._index 5531 5532 if ( 5533 self._curr 5534 and self._next 5535 and self._curr.token_type in self.TYPE_TOKENS 5536 and self._next.token_type in self.TYPE_TOKENS 5537 ): 5538 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5539 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5540 this = self._parse_id_var() 5541 else: 5542 this = ( 5543 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5544 or self._parse_id_var() 5545 ) 5546 5547 self._match(TokenType.COLON) 5548 5549 if ( 5550 type_required 5551 and not isinstance(this, exp.DataType) 5552 and not self._match_set(self.TYPE_TOKENS, advance=False) 5553 ): 5554 self._retreat(index) 5555 return self._parse_types() 5556 5557 return self._parse_column_def(this) 5558 5559 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5560 if not self._match_text_seq("AT", "TIME", "ZONE"): 5561 return this 5562 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5563 5564 def _parse_column(self) -> t.Optional[exp.Expression]: 5565 this = self._parse_column_reference() 5566 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5567 5568 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5569 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5570 5571 return column 5572 5573 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5574 this = self._parse_field() 5575 if ( 5576 not this 5577 and self._match(TokenType.VALUES, advance=False) 5578 and self.VALUES_FOLLOWED_BY_PAREN 5579 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5580 ): 5581 this = self._parse_id_var() 5582 5583 if isinstance(this, exp.Identifier): 5584 # We bubble up comments from the Identifier to the Column 5585 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5586 5587 return this 5588 5589 def _parse_colon_as_variant_extract( 5590 self, this: t.Optional[exp.Expression] 5591 ) -> t.Optional[exp.Expression]: 5592 casts = [] 5593 json_path = [] 5594 escape = None 5595 5596 while self._match(TokenType.COLON): 5597 start_index = self._index 5598 5599 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5600 path = self._parse_column_ops( 5601 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5602 ) 5603 5604 # The cast :: operator has a lower precedence than the extraction operator :, so 5605 # we rearrange the AST appropriately to avoid casting the JSON path 5606 while isinstance(path, exp.Cast): 5607 casts.append(path.to) 5608 path = path.this 5609 5610 if casts: 5611 dcolon_offset = next( 5612 i 5613 for i, t in enumerate(self._tokens[start_index:]) 5614 if t.token_type == TokenType.DCOLON 5615 ) 5616 end_token = self._tokens[start_index + dcolon_offset - 1] 5617 else: 5618 end_token = self._prev 5619 5620 if path: 5621 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5622 # it'll roundtrip to a string literal in GET_PATH 5623 if isinstance(path, exp.Identifier) and path.quoted: 5624 escape = True 5625 5626 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5627 5628 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5629 # Databricks transforms it back to the colon/dot notation 5630 if json_path: 5631 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5632 5633 if json_path_expr: 5634 json_path_expr.set("escape", escape) 5635 5636 this = self.expression( 5637 exp.JSONExtract, 5638 this=this, 5639 expression=json_path_expr, 5640 variant_extract=True, 5641 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5642 ) 5643 5644 while casts: 5645 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5646 5647 return this 5648 5649 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5650 return self._parse_types() 5651 5652 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5653 this = self._parse_bracket(this) 5654 5655 while self._match_set(self.COLUMN_OPERATORS): 5656 op_token = self._prev.token_type 5657 op = self.COLUMN_OPERATORS.get(op_token) 5658 5659 if op_token in self.CAST_COLUMN_OPERATORS: 5660 field = self._parse_dcolon() 5661 if not field: 5662 self.raise_error("Expected type") 5663 elif op and self._curr: 5664 field = self._parse_column_reference() or self._parse_bracket() 5665 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5666 field = self._parse_column_ops(field) 5667 else: 5668 field = self._parse_field(any_token=True, anonymous_func=True) 5669 5670 # Function calls can be qualified, e.g., x.y.FOO() 5671 # This converts the final AST to a series of Dots leading to the function call 5672 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5673 if isinstance(field, (exp.Func, exp.Window)) and this: 5674 this = this.transform( 5675 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5676 ) 5677 5678 if op: 5679 this = op(self, this, field) 5680 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5681 this = self.expression( 5682 exp.Column, 5683 comments=this.comments, 5684 this=field, 5685 table=this.this, 5686 db=this.args.get("table"), 5687 catalog=this.args.get("db"), 5688 ) 5689 elif isinstance(field, exp.Window): 5690 # Move the exp.Dot's to the window's function 5691 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5692 field.set("this", window_func) 5693 this = field 5694 else: 5695 this = self.expression(exp.Dot, this=this, expression=field) 5696 5697 if field and field.comments: 5698 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5699 5700 this = self._parse_bracket(this) 5701 5702 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5703 5704 def _parse_paren(self) -> t.Optional[exp.Expression]: 5705 if not self._match(TokenType.L_PAREN): 5706 return None 5707 5708 comments = self._prev_comments 5709 query = self._parse_select() 5710 5711 if query: 5712 expressions = [query] 5713 else: 5714 expressions = self._parse_expressions() 5715 5716 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5717 5718 if not this and self._match(TokenType.R_PAREN, advance=False): 5719 this = self.expression(exp.Tuple) 5720 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5721 this = self._parse_subquery(this=this, parse_alias=False) 5722 elif isinstance(this, exp.Subquery): 5723 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5724 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5725 this = self.expression(exp.Tuple, expressions=expressions) 5726 else: 5727 this = self.expression(exp.Paren, this=this) 5728 5729 if this: 5730 this.add_comments(comments) 5731 5732 self._match_r_paren(expression=this) 5733 return this 5734 5735 def _parse_primary(self) -> t.Optional[exp.Expression]: 5736 if self._match_set(self.PRIMARY_PARSERS): 5737 token_type = self._prev.token_type 5738 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5739 5740 if token_type == TokenType.STRING: 5741 expressions = [primary] 5742 while self._match(TokenType.STRING): 5743 expressions.append(exp.Literal.string(self._prev.text)) 5744 5745 if len(expressions) > 1: 5746 return self.expression(exp.Concat, expressions=expressions) 5747 5748 return primary 5749 5750 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5751 return exp.Literal.number(f"0.{self._prev.text}") 5752 5753 return self._parse_paren() 5754 5755 def _parse_field( 5756 self, 5757 any_token: bool = False, 5758 tokens: t.Optional[t.Collection[TokenType]] = None, 5759 anonymous_func: bool = False, 5760 ) -> t.Optional[exp.Expression]: 5761 if anonymous_func: 5762 field = ( 5763 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5764 or self._parse_primary() 5765 ) 5766 else: 5767 field = self._parse_primary() or self._parse_function( 5768 anonymous=anonymous_func, any_token=any_token 5769 ) 5770 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5771 5772 def _parse_function( 5773 self, 5774 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5775 anonymous: bool = False, 5776 optional_parens: bool = True, 5777 any_token: bool = False, 5778 ) -> t.Optional[exp.Expression]: 5779 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5780 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5781 fn_syntax = False 5782 if ( 5783 self._match(TokenType.L_BRACE, advance=False) 5784 and self._next 5785 and self._next.text.upper() == "FN" 5786 ): 5787 self._advance(2) 5788 fn_syntax = True 5789 5790 func = self._parse_function_call( 5791 functions=functions, 5792 anonymous=anonymous, 5793 optional_parens=optional_parens, 5794 any_token=any_token, 5795 ) 5796 5797 if fn_syntax: 5798 self._match(TokenType.R_BRACE) 5799 5800 return func 5801 5802 def _parse_function_call( 5803 self, 5804 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5805 anonymous: bool = False, 5806 optional_parens: bool = True, 5807 any_token: bool = False, 5808 ) -> t.Optional[exp.Expression]: 5809 if not self._curr: 5810 return None 5811 5812 comments = self._curr.comments 5813 prev = self._prev 5814 token = self._curr 5815 token_type = self._curr.token_type 5816 this = self._curr.text 5817 upper = this.upper() 5818 5819 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5820 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5821 self._advance() 5822 return self._parse_window(parser(self)) 5823 5824 if not self._next or self._next.token_type != TokenType.L_PAREN: 5825 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5826 self._advance() 5827 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5828 5829 return None 5830 5831 if any_token: 5832 if token_type in self.RESERVED_TOKENS: 5833 return None 5834 elif token_type not in self.FUNC_TOKENS: 5835 return None 5836 5837 self._advance(2) 5838 5839 parser = self.FUNCTION_PARSERS.get(upper) 5840 if parser and not anonymous: 5841 this = parser(self) 5842 else: 5843 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5844 5845 if subquery_predicate: 5846 expr = None 5847 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5848 expr = self._parse_select() 5849 self._match_r_paren() 5850 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5851 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5852 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5853 self._advance(-1) 5854 expr = self._parse_bitwise() 5855 5856 if expr: 5857 return self.expression(subquery_predicate, comments=comments, this=expr) 5858 5859 if functions is None: 5860 functions = self.FUNCTIONS 5861 5862 function = functions.get(upper) 5863 known_function = function and not anonymous 5864 5865 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5866 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5867 5868 post_func_comments = self._curr and self._curr.comments 5869 if known_function and post_func_comments: 5870 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5871 # call we'll construct it as exp.Anonymous, even if it's "known" 5872 if any( 5873 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5874 for comment in post_func_comments 5875 ): 5876 known_function = False 5877 5878 if alias and known_function: 5879 args = self._kv_to_prop_eq(args) 5880 5881 if known_function: 5882 func_builder = t.cast(t.Callable, function) 5883 5884 if "dialect" in func_builder.__code__.co_varnames: 5885 func = func_builder(args, dialect=self.dialect) 5886 else: 5887 func = func_builder(args) 5888 5889 func = self.validate_expression(func, args) 5890 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5891 func.meta["name"] = this 5892 5893 this = func 5894 else: 5895 if token_type == TokenType.IDENTIFIER: 5896 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5897 5898 this = self.expression(exp.Anonymous, this=this, expressions=args) 5899 this = this.update_positions(token) 5900 5901 if isinstance(this, exp.Expression): 5902 this.add_comments(comments) 5903 5904 self._match_r_paren(this) 5905 return self._parse_window(this) 5906 5907 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5908 return expression 5909 5910 def _kv_to_prop_eq( 5911 self, expressions: t.List[exp.Expression], parse_map: bool = False 5912 ) -> t.List[exp.Expression]: 5913 transformed = [] 5914 5915 for index, e in enumerate(expressions): 5916 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5917 if isinstance(e, exp.Alias): 5918 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5919 5920 if not isinstance(e, exp.PropertyEQ): 5921 e = self.expression( 5922 exp.PropertyEQ, 5923 this=e.this if parse_map else exp.to_identifier(e.this.name), 5924 expression=e.expression, 5925 ) 5926 5927 if isinstance(e.this, exp.Column): 5928 e.this.replace(e.this.this) 5929 else: 5930 e = self._to_prop_eq(e, index) 5931 5932 transformed.append(e) 5933 5934 return transformed 5935 5936 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5937 return self._parse_statement() 5938 5939 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5940 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5941 5942 def _parse_user_defined_function( 5943 self, kind: t.Optional[TokenType] = None 5944 ) -> t.Optional[exp.Expression]: 5945 this = self._parse_table_parts(schema=True) 5946 5947 if not self._match(TokenType.L_PAREN): 5948 return this 5949 5950 expressions = self._parse_csv(self._parse_function_parameter) 5951 self._match_r_paren() 5952 return self.expression( 5953 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5954 ) 5955 5956 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5957 literal = self._parse_primary() 5958 if literal: 5959 return self.expression(exp.Introducer, this=token.text, expression=literal) 5960 5961 return self._identifier_expression(token) 5962 5963 def _parse_session_parameter(self) -> exp.SessionParameter: 5964 kind = None 5965 this = self._parse_id_var() or self._parse_primary() 5966 5967 if this and self._match(TokenType.DOT): 5968 kind = this.name 5969 this = self._parse_var() or self._parse_primary() 5970 5971 return self.expression(exp.SessionParameter, this=this, kind=kind) 5972 5973 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5974 return self._parse_id_var() 5975 5976 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5977 index = self._index 5978 5979 if self._match(TokenType.L_PAREN): 5980 expressions = t.cast( 5981 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5982 ) 5983 5984 if not self._match(TokenType.R_PAREN): 5985 self._retreat(index) 5986 else: 5987 expressions = [self._parse_lambda_arg()] 5988 5989 if self._match_set(self.LAMBDAS): 5990 return self.LAMBDAS[self._prev.token_type](self, expressions) 5991 5992 self._retreat(index) 5993 5994 this: t.Optional[exp.Expression] 5995 5996 if self._match(TokenType.DISTINCT): 5997 this = self.expression( 5998 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5999 ) 6000 else: 6001 this = self._parse_select_or_expression(alias=alias) 6002 6003 return self._parse_limit( 6004 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6005 ) 6006 6007 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6008 index = self._index 6009 if not self._match(TokenType.L_PAREN): 6010 return this 6011 6012 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6013 # expr can be of both types 6014 if self._match_set(self.SELECT_START_TOKENS): 6015 self._retreat(index) 6016 return this 6017 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6018 self._match_r_paren() 6019 return self.expression(exp.Schema, this=this, expressions=args) 6020 6021 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6022 return self._parse_column_def(self._parse_field(any_token=True)) 6023 6024 def _parse_column_def( 6025 self, this: t.Optional[exp.Expression], computed_column: bool = True 6026 ) -> t.Optional[exp.Expression]: 6027 # column defs are not really columns, they're identifiers 6028 if isinstance(this, exp.Column): 6029 this = this.this 6030 6031 if not computed_column: 6032 self._match(TokenType.ALIAS) 6033 6034 kind = self._parse_types(schema=True) 6035 6036 if self._match_text_seq("FOR", "ORDINALITY"): 6037 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6038 6039 constraints: t.List[exp.Expression] = [] 6040 6041 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6042 ("ALIAS", "MATERIALIZED") 6043 ): 6044 persisted = self._prev.text.upper() == "MATERIALIZED" 6045 constraint_kind = exp.ComputedColumnConstraint( 6046 this=self._parse_assignment(), 6047 persisted=persisted or self._match_text_seq("PERSISTED"), 6048 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6049 ) 6050 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6051 elif ( 6052 kind 6053 and self._match(TokenType.ALIAS, advance=False) 6054 and ( 6055 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6056 or (self._next and self._next.token_type == TokenType.L_PAREN) 6057 ) 6058 ): 6059 self._advance() 6060 constraints.append( 6061 self.expression( 6062 exp.ColumnConstraint, 6063 kind=exp.ComputedColumnConstraint( 6064 this=self._parse_disjunction(), 6065 persisted=self._match_texts(("STORED", "VIRTUAL")) 6066 and self._prev.text.upper() == "STORED", 6067 ), 6068 ) 6069 ) 6070 6071 while True: 6072 constraint = self._parse_column_constraint() 6073 if not constraint: 6074 break 6075 constraints.append(constraint) 6076 6077 if not kind and not constraints: 6078 return this 6079 6080 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6081 6082 def _parse_auto_increment( 6083 self, 6084 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6085 start = None 6086 increment = None 6087 order = None 6088 6089 if self._match(TokenType.L_PAREN, advance=False): 6090 args = self._parse_wrapped_csv(self._parse_bitwise) 6091 start = seq_get(args, 0) 6092 increment = seq_get(args, 1) 6093 elif self._match_text_seq("START"): 6094 start = self._parse_bitwise() 6095 self._match_text_seq("INCREMENT") 6096 increment = self._parse_bitwise() 6097 if self._match_text_seq("ORDER"): 6098 order = True 6099 elif self._match_text_seq("NOORDER"): 6100 order = False 6101 6102 if start and increment: 6103 return exp.GeneratedAsIdentityColumnConstraint( 6104 start=start, increment=increment, this=False, order=order 6105 ) 6106 6107 return exp.AutoIncrementColumnConstraint() 6108 6109 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6110 if not self._match_text_seq("REFRESH"): 6111 self._retreat(self._index - 1) 6112 return None 6113 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6114 6115 def _parse_compress(self) -> exp.CompressColumnConstraint: 6116 if self._match(TokenType.L_PAREN, advance=False): 6117 return self.expression( 6118 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6119 ) 6120 6121 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6122 6123 def _parse_generated_as_identity( 6124 self, 6125 ) -> ( 6126 exp.GeneratedAsIdentityColumnConstraint 6127 | exp.ComputedColumnConstraint 6128 | exp.GeneratedAsRowColumnConstraint 6129 ): 6130 if self._match_text_seq("BY", "DEFAULT"): 6131 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6132 this = self.expression( 6133 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6134 ) 6135 else: 6136 self._match_text_seq("ALWAYS") 6137 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6138 6139 self._match(TokenType.ALIAS) 6140 6141 if self._match_text_seq("ROW"): 6142 start = self._match_text_seq("START") 6143 if not start: 6144 self._match(TokenType.END) 6145 hidden = self._match_text_seq("HIDDEN") 6146 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6147 6148 identity = self._match_text_seq("IDENTITY") 6149 6150 if self._match(TokenType.L_PAREN): 6151 if self._match(TokenType.START_WITH): 6152 this.set("start", self._parse_bitwise()) 6153 if self._match_text_seq("INCREMENT", "BY"): 6154 this.set("increment", self._parse_bitwise()) 6155 if self._match_text_seq("MINVALUE"): 6156 this.set("minvalue", self._parse_bitwise()) 6157 if self._match_text_seq("MAXVALUE"): 6158 this.set("maxvalue", self._parse_bitwise()) 6159 6160 if self._match_text_seq("CYCLE"): 6161 this.set("cycle", True) 6162 elif self._match_text_seq("NO", "CYCLE"): 6163 this.set("cycle", False) 6164 6165 if not identity: 6166 this.set("expression", self._parse_range()) 6167 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6168 args = self._parse_csv(self._parse_bitwise) 6169 this.set("start", seq_get(args, 0)) 6170 this.set("increment", seq_get(args, 1)) 6171 6172 self._match_r_paren() 6173 6174 return this 6175 6176 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6177 self._match_text_seq("LENGTH") 6178 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6179 6180 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6181 if self._match_text_seq("NULL"): 6182 return self.expression(exp.NotNullColumnConstraint) 6183 if self._match_text_seq("CASESPECIFIC"): 6184 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6185 if self._match_text_seq("FOR", "REPLICATION"): 6186 return self.expression(exp.NotForReplicationColumnConstraint) 6187 6188 # Unconsume the `NOT` token 6189 self._retreat(self._index - 1) 6190 return None 6191 6192 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6193 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6194 6195 procedure_option_follows = ( 6196 self._match(TokenType.WITH, advance=False) 6197 and self._next 6198 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6199 ) 6200 6201 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6202 return self.expression( 6203 exp.ColumnConstraint, 6204 this=this, 6205 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6206 ) 6207 6208 return this 6209 6210 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6211 if not self._match(TokenType.CONSTRAINT): 6212 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6213 6214 return self.expression( 6215 exp.Constraint, 6216 this=self._parse_id_var(), 6217 expressions=self._parse_unnamed_constraints(), 6218 ) 6219 6220 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6221 constraints = [] 6222 while True: 6223 constraint = self._parse_unnamed_constraint() or self._parse_function() 6224 if not constraint: 6225 break 6226 constraints.append(constraint) 6227 6228 return constraints 6229 6230 def _parse_unnamed_constraint( 6231 self, constraints: t.Optional[t.Collection[str]] = None 6232 ) -> t.Optional[exp.Expression]: 6233 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6234 constraints or self.CONSTRAINT_PARSERS 6235 ): 6236 return None 6237 6238 constraint = self._prev.text.upper() 6239 if constraint not in self.CONSTRAINT_PARSERS: 6240 self.raise_error(f"No parser found for schema constraint {constraint}.") 6241 6242 return self.CONSTRAINT_PARSERS[constraint](self) 6243 6244 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6245 return self._parse_id_var(any_token=False) 6246 6247 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6248 self._match_texts(("KEY", "INDEX")) 6249 return self.expression( 6250 exp.UniqueColumnConstraint, 6251 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6252 this=self._parse_schema(self._parse_unique_key()), 6253 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6254 on_conflict=self._parse_on_conflict(), 6255 options=self._parse_key_constraint_options(), 6256 ) 6257 6258 def _parse_key_constraint_options(self) -> t.List[str]: 6259 options = [] 6260 while True: 6261 if not self._curr: 6262 break 6263 6264 if self._match(TokenType.ON): 6265 action = None 6266 on = self._advance_any() and self._prev.text 6267 6268 if self._match_text_seq("NO", "ACTION"): 6269 action = "NO ACTION" 6270 elif self._match_text_seq("CASCADE"): 6271 action = "CASCADE" 6272 elif self._match_text_seq("RESTRICT"): 6273 action = "RESTRICT" 6274 elif self._match_pair(TokenType.SET, TokenType.NULL): 6275 action = "SET NULL" 6276 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6277 action = "SET DEFAULT" 6278 else: 6279 self.raise_error("Invalid key constraint") 6280 6281 options.append(f"ON {on} {action}") 6282 else: 6283 var = self._parse_var_from_options( 6284 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6285 ) 6286 if not var: 6287 break 6288 options.append(var.name) 6289 6290 return options 6291 6292 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6293 if match and not self._match(TokenType.REFERENCES): 6294 return None 6295 6296 expressions = None 6297 this = self._parse_table(schema=True) 6298 options = self._parse_key_constraint_options() 6299 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6300 6301 def _parse_foreign_key(self) -> exp.ForeignKey: 6302 expressions = ( 6303 self._parse_wrapped_id_vars() 6304 if not self._match(TokenType.REFERENCES, advance=False) 6305 else None 6306 ) 6307 reference = self._parse_references() 6308 on_options = {} 6309 6310 while self._match(TokenType.ON): 6311 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6312 self.raise_error("Expected DELETE or UPDATE") 6313 6314 kind = self._prev.text.lower() 6315 6316 if self._match_text_seq("NO", "ACTION"): 6317 action = "NO ACTION" 6318 elif self._match(TokenType.SET): 6319 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6320 action = "SET " + self._prev.text.upper() 6321 else: 6322 self._advance() 6323 action = self._prev.text.upper() 6324 6325 on_options[kind] = action 6326 6327 return self.expression( 6328 exp.ForeignKey, 6329 expressions=expressions, 6330 reference=reference, 6331 options=self._parse_key_constraint_options(), 6332 **on_options, # type: ignore 6333 ) 6334 6335 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6336 return self._parse_ordered() or self._parse_field() 6337 6338 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6339 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6340 self._retreat(self._index - 1) 6341 return None 6342 6343 id_vars = self._parse_wrapped_id_vars() 6344 return self.expression( 6345 exp.PeriodForSystemTimeConstraint, 6346 this=seq_get(id_vars, 0), 6347 expression=seq_get(id_vars, 1), 6348 ) 6349 6350 def _parse_primary_key( 6351 self, wrapped_optional: bool = False, in_props: bool = False 6352 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6353 desc = ( 6354 self._match_set((TokenType.ASC, TokenType.DESC)) 6355 and self._prev.token_type == TokenType.DESC 6356 ) 6357 6358 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6359 return self.expression( 6360 exp.PrimaryKeyColumnConstraint, 6361 desc=desc, 6362 options=self._parse_key_constraint_options(), 6363 ) 6364 6365 expressions = self._parse_wrapped_csv( 6366 self._parse_primary_key_part, optional=wrapped_optional 6367 ) 6368 6369 return self.expression( 6370 exp.PrimaryKey, 6371 expressions=expressions, 6372 include=self._parse_index_params(), 6373 options=self._parse_key_constraint_options(), 6374 ) 6375 6376 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6377 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6378 6379 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6380 """ 6381 Parses a datetime column in ODBC format. We parse the column into the corresponding 6382 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6383 same as we did for `DATE('yyyy-mm-dd')`. 6384 6385 Reference: 6386 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6387 """ 6388 self._match(TokenType.VAR) 6389 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6390 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6391 if not self._match(TokenType.R_BRACE): 6392 self.raise_error("Expected }") 6393 return expression 6394 6395 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6396 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6397 return this 6398 6399 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6400 map_token = seq_get(self._tokens, self._index - 2) 6401 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6402 else: 6403 parse_map = False 6404 6405 bracket_kind = self._prev.token_type 6406 if ( 6407 bracket_kind == TokenType.L_BRACE 6408 and self._curr 6409 and self._curr.token_type == TokenType.VAR 6410 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6411 ): 6412 return self._parse_odbc_datetime_literal() 6413 6414 expressions = self._parse_csv( 6415 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6416 ) 6417 6418 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6419 self.raise_error("Expected ]") 6420 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6421 self.raise_error("Expected }") 6422 6423 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6424 if bracket_kind == TokenType.L_BRACE: 6425 this = self.expression( 6426 exp.Struct, 6427 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6428 ) 6429 elif not this: 6430 this = build_array_constructor( 6431 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6432 ) 6433 else: 6434 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6435 if constructor_type: 6436 return build_array_constructor( 6437 constructor_type, 6438 args=expressions, 6439 bracket_kind=bracket_kind, 6440 dialect=self.dialect, 6441 ) 6442 6443 expressions = apply_index_offset( 6444 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6445 ) 6446 this = self.expression( 6447 exp.Bracket, 6448 this=this, 6449 expressions=expressions, 6450 comments=this.pop_comments(), 6451 ) 6452 6453 self._add_comments(this) 6454 return self._parse_bracket(this) 6455 6456 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6457 if self._match(TokenType.COLON): 6458 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6459 return this 6460 6461 def _parse_case(self) -> t.Optional[exp.Expression]: 6462 ifs = [] 6463 default = None 6464 6465 comments = self._prev_comments 6466 expression = self._parse_assignment() 6467 6468 while self._match(TokenType.WHEN): 6469 this = self._parse_assignment() 6470 self._match(TokenType.THEN) 6471 then = self._parse_assignment() 6472 ifs.append(self.expression(exp.If, this=this, true=then)) 6473 6474 if self._match(TokenType.ELSE): 6475 default = self._parse_assignment() 6476 6477 if not self._match(TokenType.END): 6478 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6479 default = exp.column("interval") 6480 else: 6481 self.raise_error("Expected END after CASE", self._prev) 6482 6483 return self.expression( 6484 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6485 ) 6486 6487 def _parse_if(self) -> t.Optional[exp.Expression]: 6488 if self._match(TokenType.L_PAREN): 6489 args = self._parse_csv( 6490 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6491 ) 6492 this = self.validate_expression(exp.If.from_arg_list(args), args) 6493 self._match_r_paren() 6494 else: 6495 index = self._index - 1 6496 6497 if self.NO_PAREN_IF_COMMANDS and index == 0: 6498 return self._parse_as_command(self._prev) 6499 6500 condition = self._parse_assignment() 6501 6502 if not condition: 6503 self._retreat(index) 6504 return None 6505 6506 self._match(TokenType.THEN) 6507 true = self._parse_assignment() 6508 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6509 self._match(TokenType.END) 6510 this = self.expression(exp.If, this=condition, true=true, false=false) 6511 6512 return this 6513 6514 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6515 if not self._match_text_seq("VALUE", "FOR"): 6516 self._retreat(self._index - 1) 6517 return None 6518 6519 return self.expression( 6520 exp.NextValueFor, 6521 this=self._parse_column(), 6522 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6523 ) 6524 6525 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6526 this = self._parse_function() or self._parse_var_or_string(upper=True) 6527 6528 if self._match(TokenType.FROM): 6529 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6530 6531 if not self._match(TokenType.COMMA): 6532 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6533 6534 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6535 6536 def _parse_gap_fill(self) -> exp.GapFill: 6537 self._match(TokenType.TABLE) 6538 this = self._parse_table() 6539 6540 self._match(TokenType.COMMA) 6541 args = [this, *self._parse_csv(self._parse_lambda)] 6542 6543 gap_fill = exp.GapFill.from_arg_list(args) 6544 return self.validate_expression(gap_fill, args) 6545 6546 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6547 this = self._parse_assignment() 6548 6549 if not self._match(TokenType.ALIAS): 6550 if self._match(TokenType.COMMA): 6551 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6552 6553 self.raise_error("Expected AS after CAST") 6554 6555 fmt = None 6556 to = self._parse_types() 6557 6558 default = self._match(TokenType.DEFAULT) 6559 if default: 6560 default = self._parse_bitwise() 6561 self._match_text_seq("ON", "CONVERSION", "ERROR") 6562 6563 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6564 fmt_string = self._parse_string() 6565 fmt = self._parse_at_time_zone(fmt_string) 6566 6567 if not to: 6568 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6569 if to.this in exp.DataType.TEMPORAL_TYPES: 6570 this = self.expression( 6571 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6572 this=this, 6573 format=exp.Literal.string( 6574 format_time( 6575 fmt_string.this if fmt_string else "", 6576 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6577 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6578 ) 6579 ), 6580 safe=safe, 6581 ) 6582 6583 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6584 this.set("zone", fmt.args["zone"]) 6585 return this 6586 elif not to: 6587 self.raise_error("Expected TYPE after CAST") 6588 elif isinstance(to, exp.Identifier): 6589 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6590 elif to.this == exp.DataType.Type.CHAR: 6591 if self._match(TokenType.CHARACTER_SET): 6592 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6593 6594 return self.build_cast( 6595 strict=strict, 6596 this=this, 6597 to=to, 6598 format=fmt, 6599 safe=safe, 6600 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6601 default=default, 6602 ) 6603 6604 def _parse_string_agg(self) -> exp.GroupConcat: 6605 if self._match(TokenType.DISTINCT): 6606 args: t.List[t.Optional[exp.Expression]] = [ 6607 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6608 ] 6609 if self._match(TokenType.COMMA): 6610 args.extend(self._parse_csv(self._parse_assignment)) 6611 else: 6612 args = self._parse_csv(self._parse_assignment) # type: ignore 6613 6614 if self._match_text_seq("ON", "OVERFLOW"): 6615 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6616 if self._match_text_seq("ERROR"): 6617 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6618 else: 6619 self._match_text_seq("TRUNCATE") 6620 on_overflow = self.expression( 6621 exp.OverflowTruncateBehavior, 6622 this=self._parse_string(), 6623 with_count=( 6624 self._match_text_seq("WITH", "COUNT") 6625 or not self._match_text_seq("WITHOUT", "COUNT") 6626 ), 6627 ) 6628 else: 6629 on_overflow = None 6630 6631 index = self._index 6632 if not self._match(TokenType.R_PAREN) and args: 6633 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6634 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6635 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6636 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6637 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6638 6639 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6640 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6641 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6642 if not self._match_text_seq("WITHIN", "GROUP"): 6643 self._retreat(index) 6644 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6645 6646 # The corresponding match_r_paren will be called in parse_function (caller) 6647 self._match_l_paren() 6648 6649 return self.expression( 6650 exp.GroupConcat, 6651 this=self._parse_order(this=seq_get(args, 0)), 6652 separator=seq_get(args, 1), 6653 on_overflow=on_overflow, 6654 ) 6655 6656 def _parse_convert( 6657 self, strict: bool, safe: t.Optional[bool] = None 6658 ) -> t.Optional[exp.Expression]: 6659 this = self._parse_bitwise() 6660 6661 if self._match(TokenType.USING): 6662 to: t.Optional[exp.Expression] = self.expression( 6663 exp.CharacterSet, this=self._parse_var() 6664 ) 6665 elif self._match(TokenType.COMMA): 6666 to = self._parse_types() 6667 else: 6668 to = None 6669 6670 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6671 6672 def _parse_xml_table(self) -> exp.XMLTable: 6673 namespaces = None 6674 passing = None 6675 columns = None 6676 6677 if self._match_text_seq("XMLNAMESPACES", "("): 6678 namespaces = self._parse_xml_namespace() 6679 self._match_text_seq(")", ",") 6680 6681 this = self._parse_string() 6682 6683 if self._match_text_seq("PASSING"): 6684 # The BY VALUE keywords are optional and are provided for semantic clarity 6685 self._match_text_seq("BY", "VALUE") 6686 passing = self._parse_csv(self._parse_column) 6687 6688 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6689 6690 if self._match_text_seq("COLUMNS"): 6691 columns = self._parse_csv(self._parse_field_def) 6692 6693 return self.expression( 6694 exp.XMLTable, 6695 this=this, 6696 namespaces=namespaces, 6697 passing=passing, 6698 columns=columns, 6699 by_ref=by_ref, 6700 ) 6701 6702 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6703 namespaces = [] 6704 6705 while True: 6706 if self._match(TokenType.DEFAULT): 6707 uri = self._parse_string() 6708 else: 6709 uri = self._parse_alias(self._parse_string()) 6710 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6711 if not self._match(TokenType.COMMA): 6712 break 6713 6714 return namespaces 6715 6716 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6717 args = self._parse_csv(self._parse_assignment) 6718 6719 if len(args) < 3: 6720 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6721 6722 return self.expression(exp.DecodeCase, expressions=args) 6723 6724 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6725 self._match_text_seq("KEY") 6726 key = self._parse_column() 6727 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6728 self._match_text_seq("VALUE") 6729 value = self._parse_bitwise() 6730 6731 if not key and not value: 6732 return None 6733 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6734 6735 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6736 if not this or not self._match_text_seq("FORMAT", "JSON"): 6737 return this 6738 6739 return self.expression(exp.FormatJson, this=this) 6740 6741 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6742 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6743 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6744 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6745 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6746 else: 6747 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6748 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6749 6750 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6751 6752 if not empty and not error and not null: 6753 return None 6754 6755 return self.expression( 6756 exp.OnCondition, 6757 empty=empty, 6758 error=error, 6759 null=null, 6760 ) 6761 6762 def _parse_on_handling( 6763 self, on: str, *values: str 6764 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6765 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6766 for value in values: 6767 if self._match_text_seq(value, "ON", on): 6768 return f"{value} ON {on}" 6769 6770 index = self._index 6771 if self._match(TokenType.DEFAULT): 6772 default_value = self._parse_bitwise() 6773 if self._match_text_seq("ON", on): 6774 return default_value 6775 6776 self._retreat(index) 6777 6778 return None 6779 6780 @t.overload 6781 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6782 6783 @t.overload 6784 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6785 6786 def _parse_json_object(self, agg=False): 6787 star = self._parse_star() 6788 expressions = ( 6789 [star] 6790 if star 6791 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6792 ) 6793 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6794 6795 unique_keys = None 6796 if self._match_text_seq("WITH", "UNIQUE"): 6797 unique_keys = True 6798 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6799 unique_keys = False 6800 6801 self._match_text_seq("KEYS") 6802 6803 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6804 self._parse_type() 6805 ) 6806 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6807 6808 return self.expression( 6809 exp.JSONObjectAgg if agg else exp.JSONObject, 6810 expressions=expressions, 6811 null_handling=null_handling, 6812 unique_keys=unique_keys, 6813 return_type=return_type, 6814 encoding=encoding, 6815 ) 6816 6817 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6818 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6819 if not self._match_text_seq("NESTED"): 6820 this = self._parse_id_var() 6821 kind = self._parse_types(allow_identifiers=False) 6822 nested = None 6823 else: 6824 this = None 6825 kind = None 6826 nested = True 6827 6828 path = self._match_text_seq("PATH") and self._parse_string() 6829 nested_schema = nested and self._parse_json_schema() 6830 6831 return self.expression( 6832 exp.JSONColumnDef, 6833 this=this, 6834 kind=kind, 6835 path=path, 6836 nested_schema=nested_schema, 6837 ) 6838 6839 def _parse_json_schema(self) -> exp.JSONSchema: 6840 self._match_text_seq("COLUMNS") 6841 return self.expression( 6842 exp.JSONSchema, 6843 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6844 ) 6845 6846 def _parse_json_table(self) -> exp.JSONTable: 6847 this = self._parse_format_json(self._parse_bitwise()) 6848 path = self._match(TokenType.COMMA) and self._parse_string() 6849 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6850 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6851 schema = self._parse_json_schema() 6852 6853 return exp.JSONTable( 6854 this=this, 6855 schema=schema, 6856 path=path, 6857 error_handling=error_handling, 6858 empty_handling=empty_handling, 6859 ) 6860 6861 def _parse_match_against(self) -> exp.MatchAgainst: 6862 expressions = self._parse_csv(self._parse_column) 6863 6864 self._match_text_seq(")", "AGAINST", "(") 6865 6866 this = self._parse_string() 6867 6868 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6869 modifier = "IN NATURAL LANGUAGE MODE" 6870 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6871 modifier = f"{modifier} WITH QUERY EXPANSION" 6872 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6873 modifier = "IN BOOLEAN MODE" 6874 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6875 modifier = "WITH QUERY EXPANSION" 6876 else: 6877 modifier = None 6878 6879 return self.expression( 6880 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6881 ) 6882 6883 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6884 def _parse_open_json(self) -> exp.OpenJSON: 6885 this = self._parse_bitwise() 6886 path = self._match(TokenType.COMMA) and self._parse_string() 6887 6888 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6889 this = self._parse_field(any_token=True) 6890 kind = self._parse_types() 6891 path = self._parse_string() 6892 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6893 6894 return self.expression( 6895 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6896 ) 6897 6898 expressions = None 6899 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6900 self._match_l_paren() 6901 expressions = self._parse_csv(_parse_open_json_column_def) 6902 6903 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6904 6905 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6906 args = self._parse_csv(self._parse_bitwise) 6907 6908 if self._match(TokenType.IN): 6909 return self.expression( 6910 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6911 ) 6912 6913 if haystack_first: 6914 haystack = seq_get(args, 0) 6915 needle = seq_get(args, 1) 6916 else: 6917 haystack = seq_get(args, 1) 6918 needle = seq_get(args, 0) 6919 6920 return self.expression( 6921 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6922 ) 6923 6924 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6925 args = self._parse_csv(self._parse_table) 6926 return exp.JoinHint(this=func_name.upper(), expressions=args) 6927 6928 def _parse_substring(self) -> exp.Substring: 6929 # Postgres supports the form: substring(string [from int] [for int]) 6930 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6931 6932 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6933 6934 if self._match(TokenType.FROM): 6935 args.append(self._parse_bitwise()) 6936 if self._match(TokenType.FOR): 6937 if len(args) == 1: 6938 args.append(exp.Literal.number(1)) 6939 args.append(self._parse_bitwise()) 6940 6941 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6942 6943 def _parse_trim(self) -> exp.Trim: 6944 # https://www.w3resource.com/sql/character-functions/trim.php 6945 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6946 6947 position = None 6948 collation = None 6949 expression = None 6950 6951 if self._match_texts(self.TRIM_TYPES): 6952 position = self._prev.text.upper() 6953 6954 this = self._parse_bitwise() 6955 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6956 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6957 expression = self._parse_bitwise() 6958 6959 if invert_order: 6960 this, expression = expression, this 6961 6962 if self._match(TokenType.COLLATE): 6963 collation = self._parse_bitwise() 6964 6965 return self.expression( 6966 exp.Trim, this=this, position=position, expression=expression, collation=collation 6967 ) 6968 6969 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6970 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6971 6972 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6973 return self._parse_window(self._parse_id_var(), alias=True) 6974 6975 def _parse_respect_or_ignore_nulls( 6976 self, this: t.Optional[exp.Expression] 6977 ) -> t.Optional[exp.Expression]: 6978 if self._match_text_seq("IGNORE", "NULLS"): 6979 return self.expression(exp.IgnoreNulls, this=this) 6980 if self._match_text_seq("RESPECT", "NULLS"): 6981 return self.expression(exp.RespectNulls, this=this) 6982 return this 6983 6984 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6985 if self._match(TokenType.HAVING): 6986 self._match_texts(("MAX", "MIN")) 6987 max = self._prev.text.upper() != "MIN" 6988 return self.expression( 6989 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6990 ) 6991 6992 return this 6993 6994 def _parse_window( 6995 self, this: t.Optional[exp.Expression], alias: bool = False 6996 ) -> t.Optional[exp.Expression]: 6997 func = this 6998 comments = func.comments if isinstance(func, exp.Expression) else None 6999 7000 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7001 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7002 if self._match_text_seq("WITHIN", "GROUP"): 7003 order = self._parse_wrapped(self._parse_order) 7004 this = self.expression(exp.WithinGroup, this=this, expression=order) 7005 7006 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7007 self._match(TokenType.WHERE) 7008 this = self.expression( 7009 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7010 ) 7011 self._match_r_paren() 7012 7013 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7014 # Some dialects choose to implement and some do not. 7015 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7016 7017 # There is some code above in _parse_lambda that handles 7018 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7019 7020 # The below changes handle 7021 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7022 7023 # Oracle allows both formats 7024 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7025 # and Snowflake chose to do the same for familiarity 7026 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7027 if isinstance(this, exp.AggFunc): 7028 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7029 7030 if ignore_respect and ignore_respect is not this: 7031 ignore_respect.replace(ignore_respect.this) 7032 this = self.expression(ignore_respect.__class__, this=this) 7033 7034 this = self._parse_respect_or_ignore_nulls(this) 7035 7036 # bigquery select from window x AS (partition by ...) 7037 if alias: 7038 over = None 7039 self._match(TokenType.ALIAS) 7040 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7041 return this 7042 else: 7043 over = self._prev.text.upper() 7044 7045 if comments and isinstance(func, exp.Expression): 7046 func.pop_comments() 7047 7048 if not self._match(TokenType.L_PAREN): 7049 return self.expression( 7050 exp.Window, 7051 comments=comments, 7052 this=this, 7053 alias=self._parse_id_var(False), 7054 over=over, 7055 ) 7056 7057 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7058 7059 first = self._match(TokenType.FIRST) 7060 if self._match_text_seq("LAST"): 7061 first = False 7062 7063 partition, order = self._parse_partition_and_order() 7064 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7065 7066 if kind: 7067 self._match(TokenType.BETWEEN) 7068 start = self._parse_window_spec() 7069 self._match(TokenType.AND) 7070 end = self._parse_window_spec() 7071 exclude = ( 7072 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7073 if self._match_text_seq("EXCLUDE") 7074 else None 7075 ) 7076 7077 spec = self.expression( 7078 exp.WindowSpec, 7079 kind=kind, 7080 start=start["value"], 7081 start_side=start["side"], 7082 end=end["value"], 7083 end_side=end["side"], 7084 exclude=exclude, 7085 ) 7086 else: 7087 spec = None 7088 7089 self._match_r_paren() 7090 7091 window = self.expression( 7092 exp.Window, 7093 comments=comments, 7094 this=this, 7095 partition_by=partition, 7096 order=order, 7097 spec=spec, 7098 alias=window_alias, 7099 over=over, 7100 first=first, 7101 ) 7102 7103 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7104 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7105 return self._parse_window(window, alias=alias) 7106 7107 return window 7108 7109 def _parse_partition_and_order( 7110 self, 7111 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7112 return self._parse_partition_by(), self._parse_order() 7113 7114 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7115 self._match(TokenType.BETWEEN) 7116 7117 return { 7118 "value": ( 7119 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7120 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7121 or self._parse_bitwise() 7122 ), 7123 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7124 } 7125 7126 def _parse_alias( 7127 self, this: t.Optional[exp.Expression], explicit: bool = False 7128 ) -> t.Optional[exp.Expression]: 7129 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7130 # so this section tries to parse the clause version and if it fails, it treats the token 7131 # as an identifier (alias) 7132 if self._can_parse_limit_or_offset(): 7133 return this 7134 7135 any_token = self._match(TokenType.ALIAS) 7136 comments = self._prev_comments or [] 7137 7138 if explicit and not any_token: 7139 return this 7140 7141 if self._match(TokenType.L_PAREN): 7142 aliases = self.expression( 7143 exp.Aliases, 7144 comments=comments, 7145 this=this, 7146 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7147 ) 7148 self._match_r_paren(aliases) 7149 return aliases 7150 7151 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7152 self.STRING_ALIASES and self._parse_string_as_identifier() 7153 ) 7154 7155 if alias: 7156 comments.extend(alias.pop_comments()) 7157 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7158 column = this.this 7159 7160 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7161 if not this.comments and column and column.comments: 7162 this.comments = column.pop_comments() 7163 7164 return this 7165 7166 def _parse_id_var( 7167 self, 7168 any_token: bool = True, 7169 tokens: t.Optional[t.Collection[TokenType]] = None, 7170 ) -> t.Optional[exp.Expression]: 7171 expression = self._parse_identifier() 7172 if not expression and ( 7173 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7174 ): 7175 quoted = self._prev.token_type == TokenType.STRING 7176 expression = self._identifier_expression(quoted=quoted) 7177 7178 return expression 7179 7180 def _parse_string(self) -> t.Optional[exp.Expression]: 7181 if self._match_set(self.STRING_PARSERS): 7182 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7183 return self._parse_placeholder() 7184 7185 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7186 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7187 if output: 7188 output.update_positions(self._prev) 7189 return output 7190 7191 def _parse_number(self) -> t.Optional[exp.Expression]: 7192 if self._match_set(self.NUMERIC_PARSERS): 7193 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7194 return self._parse_placeholder() 7195 7196 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7197 if self._match(TokenType.IDENTIFIER): 7198 return self._identifier_expression(quoted=True) 7199 return self._parse_placeholder() 7200 7201 def _parse_var( 7202 self, 7203 any_token: bool = False, 7204 tokens: t.Optional[t.Collection[TokenType]] = None, 7205 upper: bool = False, 7206 ) -> t.Optional[exp.Expression]: 7207 if ( 7208 (any_token and self._advance_any()) 7209 or self._match(TokenType.VAR) 7210 or (self._match_set(tokens) if tokens else False) 7211 ): 7212 return self.expression( 7213 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7214 ) 7215 return self._parse_placeholder() 7216 7217 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7218 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7219 self._advance() 7220 return self._prev 7221 return None 7222 7223 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7224 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7225 7226 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7227 return self._parse_primary() or self._parse_var(any_token=True) 7228 7229 def _parse_null(self) -> t.Optional[exp.Expression]: 7230 if self._match_set(self.NULL_TOKENS): 7231 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7232 return self._parse_placeholder() 7233 7234 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7235 if self._match(TokenType.TRUE): 7236 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7237 if self._match(TokenType.FALSE): 7238 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7239 return self._parse_placeholder() 7240 7241 def _parse_star(self) -> t.Optional[exp.Expression]: 7242 if self._match(TokenType.STAR): 7243 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7244 return self._parse_placeholder() 7245 7246 def _parse_parameter(self) -> exp.Parameter: 7247 this = self._parse_identifier() or self._parse_primary_or_var() 7248 return self.expression(exp.Parameter, this=this) 7249 7250 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7251 if self._match_set(self.PLACEHOLDER_PARSERS): 7252 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7253 if placeholder: 7254 return placeholder 7255 self._advance(-1) 7256 return None 7257 7258 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7259 if not self._match_texts(keywords): 7260 return None 7261 if self._match(TokenType.L_PAREN, advance=False): 7262 return self._parse_wrapped_csv(self._parse_expression) 7263 7264 expression = self._parse_expression() 7265 return [expression] if expression else None 7266 7267 def _parse_csv( 7268 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7269 ) -> t.List[exp.Expression]: 7270 parse_result = parse_method() 7271 items = [parse_result] if parse_result is not None else [] 7272 7273 while self._match(sep): 7274 self._add_comments(parse_result) 7275 parse_result = parse_method() 7276 if parse_result is not None: 7277 items.append(parse_result) 7278 7279 return items 7280 7281 def _parse_tokens( 7282 self, parse_method: t.Callable, expressions: t.Dict 7283 ) -> t.Optional[exp.Expression]: 7284 this = parse_method() 7285 7286 while self._match_set(expressions): 7287 this = self.expression( 7288 expressions[self._prev.token_type], 7289 this=this, 7290 comments=self._prev_comments, 7291 expression=parse_method(), 7292 ) 7293 7294 return this 7295 7296 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7297 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7298 7299 def _parse_wrapped_csv( 7300 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7301 ) -> t.List[exp.Expression]: 7302 return self._parse_wrapped( 7303 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7304 ) 7305 7306 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7307 wrapped = self._match(TokenType.L_PAREN) 7308 if not wrapped and not optional: 7309 self.raise_error("Expecting (") 7310 parse_result = parse_method() 7311 if wrapped: 7312 self._match_r_paren() 7313 return parse_result 7314 7315 def _parse_expressions(self) -> t.List[exp.Expression]: 7316 return self._parse_csv(self._parse_expression) 7317 7318 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7319 return self._parse_select() or self._parse_set_operations( 7320 self._parse_alias(self._parse_assignment(), explicit=True) 7321 if alias 7322 else self._parse_assignment() 7323 ) 7324 7325 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7326 return self._parse_query_modifiers( 7327 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7328 ) 7329 7330 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7331 this = None 7332 if self._match_texts(self.TRANSACTION_KIND): 7333 this = self._prev.text 7334 7335 self._match_texts(("TRANSACTION", "WORK")) 7336 7337 modes = [] 7338 while True: 7339 mode = [] 7340 while self._match(TokenType.VAR): 7341 mode.append(self._prev.text) 7342 7343 if mode: 7344 modes.append(" ".join(mode)) 7345 if not self._match(TokenType.COMMA): 7346 break 7347 7348 return self.expression(exp.Transaction, this=this, modes=modes) 7349 7350 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7351 chain = None 7352 savepoint = None 7353 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7354 7355 self._match_texts(("TRANSACTION", "WORK")) 7356 7357 if self._match_text_seq("TO"): 7358 self._match_text_seq("SAVEPOINT") 7359 savepoint = self._parse_id_var() 7360 7361 if self._match(TokenType.AND): 7362 chain = not self._match_text_seq("NO") 7363 self._match_text_seq("CHAIN") 7364 7365 if is_rollback: 7366 return self.expression(exp.Rollback, savepoint=savepoint) 7367 7368 return self.expression(exp.Commit, chain=chain) 7369 7370 def _parse_refresh(self) -> exp.Refresh: 7371 self._match(TokenType.TABLE) 7372 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7373 7374 def _parse_column_def_with_exists(self): 7375 start = self._index 7376 self._match(TokenType.COLUMN) 7377 7378 exists_column = self._parse_exists(not_=True) 7379 expression = self._parse_field_def() 7380 7381 if not isinstance(expression, exp.ColumnDef): 7382 self._retreat(start) 7383 return None 7384 7385 expression.set("exists", exists_column) 7386 7387 return expression 7388 7389 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7390 if not self._prev.text.upper() == "ADD": 7391 return None 7392 7393 expression = self._parse_column_def_with_exists() 7394 if not expression: 7395 return None 7396 7397 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7398 if self._match_texts(("FIRST", "AFTER")): 7399 position = self._prev.text 7400 column_position = self.expression( 7401 exp.ColumnPosition, this=self._parse_column(), position=position 7402 ) 7403 expression.set("position", column_position) 7404 7405 return expression 7406 7407 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7408 drop = self._match(TokenType.DROP) and self._parse_drop() 7409 if drop and not isinstance(drop, exp.Command): 7410 drop.set("kind", drop.args.get("kind", "COLUMN")) 7411 return drop 7412 7413 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7414 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7415 return self.expression( 7416 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7417 ) 7418 7419 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7420 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7421 self._match_text_seq("ADD") 7422 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7423 return self.expression( 7424 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7425 ) 7426 7427 column_def = self._parse_add_column() 7428 if isinstance(column_def, exp.ColumnDef): 7429 return column_def 7430 7431 exists = self._parse_exists(not_=True) 7432 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7433 return self.expression( 7434 exp.AddPartition, 7435 exists=exists, 7436 this=self._parse_field(any_token=True), 7437 location=self._match_text_seq("LOCATION", advance=False) 7438 and self._parse_property(), 7439 ) 7440 7441 return None 7442 7443 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7444 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7445 or self._match_text_seq("COLUMNS") 7446 ): 7447 schema = self._parse_schema() 7448 7449 return ( 7450 ensure_list(schema) 7451 if schema 7452 else self._parse_csv(self._parse_column_def_with_exists) 7453 ) 7454 7455 return self._parse_csv(_parse_add_alteration) 7456 7457 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7458 if self._match_texts(self.ALTER_ALTER_PARSERS): 7459 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7460 7461 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7462 # keyword after ALTER we default to parsing this statement 7463 self._match(TokenType.COLUMN) 7464 column = self._parse_field(any_token=True) 7465 7466 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7467 return self.expression(exp.AlterColumn, this=column, drop=True) 7468 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7469 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7470 if self._match(TokenType.COMMENT): 7471 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7472 if self._match_text_seq("DROP", "NOT", "NULL"): 7473 return self.expression( 7474 exp.AlterColumn, 7475 this=column, 7476 drop=True, 7477 allow_null=True, 7478 ) 7479 if self._match_text_seq("SET", "NOT", "NULL"): 7480 return self.expression( 7481 exp.AlterColumn, 7482 this=column, 7483 allow_null=False, 7484 ) 7485 7486 if self._match_text_seq("SET", "VISIBLE"): 7487 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7488 if self._match_text_seq("SET", "INVISIBLE"): 7489 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7490 7491 self._match_text_seq("SET", "DATA") 7492 self._match_text_seq("TYPE") 7493 return self.expression( 7494 exp.AlterColumn, 7495 this=column, 7496 dtype=self._parse_types(), 7497 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7498 using=self._match(TokenType.USING) and self._parse_assignment(), 7499 ) 7500 7501 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7502 if self._match_texts(("ALL", "EVEN", "AUTO")): 7503 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7504 7505 self._match_text_seq("KEY", "DISTKEY") 7506 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7507 7508 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7509 if compound: 7510 self._match_text_seq("SORTKEY") 7511 7512 if self._match(TokenType.L_PAREN, advance=False): 7513 return self.expression( 7514 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7515 ) 7516 7517 self._match_texts(("AUTO", "NONE")) 7518 return self.expression( 7519 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7520 ) 7521 7522 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7523 index = self._index - 1 7524 7525 partition_exists = self._parse_exists() 7526 if self._match(TokenType.PARTITION, advance=False): 7527 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7528 7529 self._retreat(index) 7530 return self._parse_csv(self._parse_drop_column) 7531 7532 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7533 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7534 exists = self._parse_exists() 7535 old_column = self._parse_column() 7536 to = self._match_text_seq("TO") 7537 new_column = self._parse_column() 7538 7539 if old_column is None or to is None or new_column is None: 7540 return None 7541 7542 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7543 7544 self._match_text_seq("TO") 7545 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7546 7547 def _parse_alter_table_set(self) -> exp.AlterSet: 7548 alter_set = self.expression(exp.AlterSet) 7549 7550 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7551 "TABLE", "PROPERTIES" 7552 ): 7553 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7554 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7555 alter_set.set("expressions", [self._parse_assignment()]) 7556 elif self._match_texts(("LOGGED", "UNLOGGED")): 7557 alter_set.set("option", exp.var(self._prev.text.upper())) 7558 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7559 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7560 elif self._match_text_seq("LOCATION"): 7561 alter_set.set("location", self._parse_field()) 7562 elif self._match_text_seq("ACCESS", "METHOD"): 7563 alter_set.set("access_method", self._parse_field()) 7564 elif self._match_text_seq("TABLESPACE"): 7565 alter_set.set("tablespace", self._parse_field()) 7566 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7567 alter_set.set("file_format", [self._parse_field()]) 7568 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7569 alter_set.set("file_format", self._parse_wrapped_options()) 7570 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7571 alter_set.set("copy_options", self._parse_wrapped_options()) 7572 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7573 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7574 else: 7575 if self._match_text_seq("SERDE"): 7576 alter_set.set("serde", self._parse_field()) 7577 7578 properties = self._parse_wrapped(self._parse_properties, optional=True) 7579 alter_set.set("expressions", [properties]) 7580 7581 return alter_set 7582 7583 def _parse_alter(self) -> exp.Alter | exp.Command: 7584 start = self._prev 7585 7586 alter_token = self._match_set(self.ALTERABLES) and self._prev 7587 if not alter_token: 7588 return self._parse_as_command(start) 7589 7590 exists = self._parse_exists() 7591 only = self._match_text_seq("ONLY") 7592 this = self._parse_table(schema=True) 7593 check = self._match_text_seq("WITH", "CHECK") 7594 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7595 7596 if self._next: 7597 self._advance() 7598 7599 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7600 if parser: 7601 actions = ensure_list(parser(self)) 7602 not_valid = self._match_text_seq("NOT", "VALID") 7603 options = self._parse_csv(self._parse_property) 7604 7605 if not self._curr and actions: 7606 return self.expression( 7607 exp.Alter, 7608 this=this, 7609 kind=alter_token.text.upper(), 7610 exists=exists, 7611 actions=actions, 7612 only=only, 7613 options=options, 7614 cluster=cluster, 7615 not_valid=not_valid, 7616 check=check, 7617 ) 7618 7619 return self._parse_as_command(start) 7620 7621 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7622 start = self._prev 7623 # https://duckdb.org/docs/sql/statements/analyze 7624 if not self._curr: 7625 return self.expression(exp.Analyze) 7626 7627 options = [] 7628 while self._match_texts(self.ANALYZE_STYLES): 7629 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7630 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7631 else: 7632 options.append(self._prev.text.upper()) 7633 7634 this: t.Optional[exp.Expression] = None 7635 inner_expression: t.Optional[exp.Expression] = None 7636 7637 kind = self._curr and self._curr.text.upper() 7638 7639 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7640 this = self._parse_table_parts() 7641 elif self._match_text_seq("TABLES"): 7642 if self._match_set((TokenType.FROM, TokenType.IN)): 7643 kind = f"{kind} {self._prev.text.upper()}" 7644 this = self._parse_table(schema=True, is_db_reference=True) 7645 elif self._match_text_seq("DATABASE"): 7646 this = self._parse_table(schema=True, is_db_reference=True) 7647 elif self._match_text_seq("CLUSTER"): 7648 this = self._parse_table() 7649 # Try matching inner expr keywords before fallback to parse table. 7650 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7651 kind = None 7652 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7653 else: 7654 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7655 kind = None 7656 this = self._parse_table_parts() 7657 7658 partition = self._try_parse(self._parse_partition) 7659 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7660 return self._parse_as_command(start) 7661 7662 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7663 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7664 "WITH", "ASYNC", "MODE" 7665 ): 7666 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7667 else: 7668 mode = None 7669 7670 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7671 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7672 7673 properties = self._parse_properties() 7674 return self.expression( 7675 exp.Analyze, 7676 kind=kind, 7677 this=this, 7678 mode=mode, 7679 partition=partition, 7680 properties=properties, 7681 expression=inner_expression, 7682 options=options, 7683 ) 7684 7685 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7686 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7687 this = None 7688 kind = self._prev.text.upper() 7689 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7690 expressions = [] 7691 7692 if not self._match_text_seq("STATISTICS"): 7693 self.raise_error("Expecting token STATISTICS") 7694 7695 if self._match_text_seq("NOSCAN"): 7696 this = "NOSCAN" 7697 elif self._match(TokenType.FOR): 7698 if self._match_text_seq("ALL", "COLUMNS"): 7699 this = "FOR ALL COLUMNS" 7700 if self._match_texts("COLUMNS"): 7701 this = "FOR COLUMNS" 7702 expressions = self._parse_csv(self._parse_column_reference) 7703 elif self._match_text_seq("SAMPLE"): 7704 sample = self._parse_number() 7705 expressions = [ 7706 self.expression( 7707 exp.AnalyzeSample, 7708 sample=sample, 7709 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7710 ) 7711 ] 7712 7713 return self.expression( 7714 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7715 ) 7716 7717 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7718 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7719 kind = None 7720 this = None 7721 expression: t.Optional[exp.Expression] = None 7722 if self._match_text_seq("REF", "UPDATE"): 7723 kind = "REF" 7724 this = "UPDATE" 7725 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7726 this = "UPDATE SET DANGLING TO NULL" 7727 elif self._match_text_seq("STRUCTURE"): 7728 kind = "STRUCTURE" 7729 if self._match_text_seq("CASCADE", "FAST"): 7730 this = "CASCADE FAST" 7731 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7732 ("ONLINE", "OFFLINE") 7733 ): 7734 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7735 expression = self._parse_into() 7736 7737 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7738 7739 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7740 this = self._prev.text.upper() 7741 if self._match_text_seq("COLUMNS"): 7742 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7743 return None 7744 7745 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7746 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7747 if self._match_text_seq("STATISTICS"): 7748 return self.expression(exp.AnalyzeDelete, kind=kind) 7749 return None 7750 7751 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7752 if self._match_text_seq("CHAINED", "ROWS"): 7753 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7754 return None 7755 7756 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7757 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7758 this = self._prev.text.upper() 7759 expression: t.Optional[exp.Expression] = None 7760 expressions = [] 7761 update_options = None 7762 7763 if self._match_text_seq("HISTOGRAM", "ON"): 7764 expressions = self._parse_csv(self._parse_column_reference) 7765 with_expressions = [] 7766 while self._match(TokenType.WITH): 7767 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7768 if self._match_texts(("SYNC", "ASYNC")): 7769 if self._match_text_seq("MODE", advance=False): 7770 with_expressions.append(f"{self._prev.text.upper()} MODE") 7771 self._advance() 7772 else: 7773 buckets = self._parse_number() 7774 if self._match_text_seq("BUCKETS"): 7775 with_expressions.append(f"{buckets} BUCKETS") 7776 if with_expressions: 7777 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7778 7779 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7780 TokenType.UPDATE, advance=False 7781 ): 7782 update_options = self._prev.text.upper() 7783 self._advance() 7784 elif self._match_text_seq("USING", "DATA"): 7785 expression = self.expression(exp.UsingData, this=self._parse_string()) 7786 7787 return self.expression( 7788 exp.AnalyzeHistogram, 7789 this=this, 7790 expressions=expressions, 7791 expression=expression, 7792 update_options=update_options, 7793 ) 7794 7795 def _parse_merge(self) -> exp.Merge: 7796 self._match(TokenType.INTO) 7797 target = self._parse_table() 7798 7799 if target and self._match(TokenType.ALIAS, advance=False): 7800 target.set("alias", self._parse_table_alias()) 7801 7802 self._match(TokenType.USING) 7803 using = self._parse_table() 7804 7805 self._match(TokenType.ON) 7806 on = self._parse_assignment() 7807 7808 return self.expression( 7809 exp.Merge, 7810 this=target, 7811 using=using, 7812 on=on, 7813 whens=self._parse_when_matched(), 7814 returning=self._parse_returning(), 7815 ) 7816 7817 def _parse_when_matched(self) -> exp.Whens: 7818 whens = [] 7819 7820 while self._match(TokenType.WHEN): 7821 matched = not self._match(TokenType.NOT) 7822 self._match_text_seq("MATCHED") 7823 source = ( 7824 False 7825 if self._match_text_seq("BY", "TARGET") 7826 else self._match_text_seq("BY", "SOURCE") 7827 ) 7828 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7829 7830 self._match(TokenType.THEN) 7831 7832 if self._match(TokenType.INSERT): 7833 this = self._parse_star() 7834 if this: 7835 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7836 else: 7837 then = self.expression( 7838 exp.Insert, 7839 this=exp.var("ROW") 7840 if self._match_text_seq("ROW") 7841 else self._parse_value(values=False), 7842 expression=self._match_text_seq("VALUES") and self._parse_value(), 7843 ) 7844 elif self._match(TokenType.UPDATE): 7845 expressions = self._parse_star() 7846 if expressions: 7847 then = self.expression(exp.Update, expressions=expressions) 7848 else: 7849 then = self.expression( 7850 exp.Update, 7851 expressions=self._match(TokenType.SET) 7852 and self._parse_csv(self._parse_equality), 7853 ) 7854 elif self._match(TokenType.DELETE): 7855 then = self.expression(exp.Var, this=self._prev.text) 7856 else: 7857 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7858 7859 whens.append( 7860 self.expression( 7861 exp.When, 7862 matched=matched, 7863 source=source, 7864 condition=condition, 7865 then=then, 7866 ) 7867 ) 7868 return self.expression(exp.Whens, expressions=whens) 7869 7870 def _parse_show(self) -> t.Optional[exp.Expression]: 7871 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7872 if parser: 7873 return parser(self) 7874 return self._parse_as_command(self._prev) 7875 7876 def _parse_set_item_assignment( 7877 self, kind: t.Optional[str] = None 7878 ) -> t.Optional[exp.Expression]: 7879 index = self._index 7880 7881 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7882 return self._parse_set_transaction(global_=kind == "GLOBAL") 7883 7884 left = self._parse_primary() or self._parse_column() 7885 assignment_delimiter = self._match_texts(("=", "TO")) 7886 7887 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7888 self._retreat(index) 7889 return None 7890 7891 right = self._parse_statement() or self._parse_id_var() 7892 if isinstance(right, (exp.Column, exp.Identifier)): 7893 right = exp.var(right.name) 7894 7895 this = self.expression(exp.EQ, this=left, expression=right) 7896 return self.expression(exp.SetItem, this=this, kind=kind) 7897 7898 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7899 self._match_text_seq("TRANSACTION") 7900 characteristics = self._parse_csv( 7901 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7902 ) 7903 return self.expression( 7904 exp.SetItem, 7905 expressions=characteristics, 7906 kind="TRANSACTION", 7907 **{"global": global_}, # type: ignore 7908 ) 7909 7910 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7911 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7912 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7913 7914 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7915 index = self._index 7916 set_ = self.expression( 7917 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7918 ) 7919 7920 if self._curr: 7921 self._retreat(index) 7922 return self._parse_as_command(self._prev) 7923 7924 return set_ 7925 7926 def _parse_var_from_options( 7927 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7928 ) -> t.Optional[exp.Var]: 7929 start = self._curr 7930 if not start: 7931 return None 7932 7933 option = start.text.upper() 7934 continuations = options.get(option) 7935 7936 index = self._index 7937 self._advance() 7938 for keywords in continuations or []: 7939 if isinstance(keywords, str): 7940 keywords = (keywords,) 7941 7942 if self._match_text_seq(*keywords): 7943 option = f"{option} {' '.join(keywords)}" 7944 break 7945 else: 7946 if continuations or continuations is None: 7947 if raise_unmatched: 7948 self.raise_error(f"Unknown option {option}") 7949 7950 self._retreat(index) 7951 return None 7952 7953 return exp.var(option) 7954 7955 def _parse_as_command(self, start: Token) -> exp.Command: 7956 while self._curr: 7957 self._advance() 7958 text = self._find_sql(start, self._prev) 7959 size = len(start.text) 7960 self._warn_unsupported() 7961 return exp.Command(this=text[:size], expression=text[size:]) 7962 7963 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7964 settings = [] 7965 7966 self._match_l_paren() 7967 kind = self._parse_id_var() 7968 7969 if self._match(TokenType.L_PAREN): 7970 while True: 7971 key = self._parse_id_var() 7972 value = self._parse_primary() 7973 if not key and value is None: 7974 break 7975 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7976 self._match(TokenType.R_PAREN) 7977 7978 self._match_r_paren() 7979 7980 return self.expression( 7981 exp.DictProperty, 7982 this=this, 7983 kind=kind.this if kind else None, 7984 settings=settings, 7985 ) 7986 7987 def _parse_dict_range(self, this: str) -> exp.DictRange: 7988 self._match_l_paren() 7989 has_min = self._match_text_seq("MIN") 7990 if has_min: 7991 min = self._parse_var() or self._parse_primary() 7992 self._match_text_seq("MAX") 7993 max = self._parse_var() or self._parse_primary() 7994 else: 7995 max = self._parse_var() or self._parse_primary() 7996 min = exp.Literal.number(0) 7997 self._match_r_paren() 7998 return self.expression(exp.DictRange, this=this, min=min, max=max) 7999 8000 def _parse_comprehension( 8001 self, this: t.Optional[exp.Expression] 8002 ) -> t.Optional[exp.Comprehension]: 8003 index = self._index 8004 expression = self._parse_column() 8005 if not self._match(TokenType.IN): 8006 self._retreat(index - 1) 8007 return None 8008 iterator = self._parse_column() 8009 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8010 return self.expression( 8011 exp.Comprehension, 8012 this=this, 8013 expression=expression, 8014 iterator=iterator, 8015 condition=condition, 8016 ) 8017 8018 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8019 if self._match(TokenType.HEREDOC_STRING): 8020 return self.expression(exp.Heredoc, this=self._prev.text) 8021 8022 if not self._match_text_seq("$"): 8023 return None 8024 8025 tags = ["$"] 8026 tag_text = None 8027 8028 if self._is_connected(): 8029 self._advance() 8030 tags.append(self._prev.text.upper()) 8031 else: 8032 self.raise_error("No closing $ found") 8033 8034 if tags[-1] != "$": 8035 if self._is_connected() and self._match_text_seq("$"): 8036 tag_text = tags[-1] 8037 tags.append("$") 8038 else: 8039 self.raise_error("No closing $ found") 8040 8041 heredoc_start = self._curr 8042 8043 while self._curr: 8044 if self._match_text_seq(*tags, advance=False): 8045 this = self._find_sql(heredoc_start, self._prev) 8046 self._advance(len(tags)) 8047 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8048 8049 self._advance() 8050 8051 self.raise_error(f"No closing {''.join(tags)} found") 8052 return None 8053 8054 def _find_parser( 8055 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8056 ) -> t.Optional[t.Callable]: 8057 if not self._curr: 8058 return None 8059 8060 index = self._index 8061 this = [] 8062 while True: 8063 # The current token might be multiple words 8064 curr = self._curr.text.upper() 8065 key = curr.split(" ") 8066 this.append(curr) 8067 8068 self._advance() 8069 result, trie = in_trie(trie, key) 8070 if result == TrieResult.FAILED: 8071 break 8072 8073 if result == TrieResult.EXISTS: 8074 subparser = parsers[" ".join(this)] 8075 return subparser 8076 8077 self._retreat(index) 8078 return None 8079 8080 def _match(self, token_type, advance=True, expression=None): 8081 if not self._curr: 8082 return None 8083 8084 if self._curr.token_type == token_type: 8085 if advance: 8086 self._advance() 8087 self._add_comments(expression) 8088 return True 8089 8090 return None 8091 8092 def _match_set(self, types, advance=True): 8093 if not self._curr: 8094 return None 8095 8096 if self._curr.token_type in types: 8097 if advance: 8098 self._advance() 8099 return True 8100 8101 return None 8102 8103 def _match_pair(self, token_type_a, token_type_b, advance=True): 8104 if not self._curr or not self._next: 8105 return None 8106 8107 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8108 if advance: 8109 self._advance(2) 8110 return True 8111 8112 return None 8113 8114 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8115 if not self._match(TokenType.L_PAREN, expression=expression): 8116 self.raise_error("Expecting (") 8117 8118 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8119 if not self._match(TokenType.R_PAREN, expression=expression): 8120 self.raise_error("Expecting )") 8121 8122 def _match_texts(self, texts, advance=True): 8123 if ( 8124 self._curr 8125 and self._curr.token_type != TokenType.STRING 8126 and self._curr.text.upper() in texts 8127 ): 8128 if advance: 8129 self._advance() 8130 return True 8131 return None 8132 8133 def _match_text_seq(self, *texts, advance=True): 8134 index = self._index 8135 for text in texts: 8136 if ( 8137 self._curr 8138 and self._curr.token_type != TokenType.STRING 8139 and self._curr.text.upper() == text 8140 ): 8141 self._advance() 8142 else: 8143 self._retreat(index) 8144 return None 8145 8146 if not advance: 8147 self._retreat(index) 8148 8149 return True 8150 8151 def _replace_lambda( 8152 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8153 ) -> t.Optional[exp.Expression]: 8154 if not node: 8155 return node 8156 8157 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8158 8159 for column in node.find_all(exp.Column): 8160 typ = lambda_types.get(column.parts[0].name) 8161 if typ is not None: 8162 dot_or_id = column.to_dot() if column.table else column.this 8163 8164 if typ: 8165 dot_or_id = self.expression( 8166 exp.Cast, 8167 this=dot_or_id, 8168 to=typ, 8169 ) 8170 8171 parent = column.parent 8172 8173 while isinstance(parent, exp.Dot): 8174 if not isinstance(parent.parent, exp.Dot): 8175 parent.replace(dot_or_id) 8176 break 8177 parent = parent.parent 8178 else: 8179 if column is node: 8180 node = dot_or_id 8181 else: 8182 column.replace(dot_or_id) 8183 return node 8184 8185 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8186 start = self._prev 8187 8188 # Not to be confused with TRUNCATE(number, decimals) function call 8189 if self._match(TokenType.L_PAREN): 8190 self._retreat(self._index - 2) 8191 return self._parse_function() 8192 8193 # Clickhouse supports TRUNCATE DATABASE as well 8194 is_database = self._match(TokenType.DATABASE) 8195 8196 self._match(TokenType.TABLE) 8197 8198 exists = self._parse_exists(not_=False) 8199 8200 expressions = self._parse_csv( 8201 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8202 ) 8203 8204 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8205 8206 if self._match_text_seq("RESTART", "IDENTITY"): 8207 identity = "RESTART" 8208 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8209 identity = "CONTINUE" 8210 else: 8211 identity = None 8212 8213 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8214 option = self._prev.text 8215 else: 8216 option = None 8217 8218 partition = self._parse_partition() 8219 8220 # Fallback case 8221 if self._curr: 8222 return self._parse_as_command(start) 8223 8224 return self.expression( 8225 exp.TruncateTable, 8226 expressions=expressions, 8227 is_database=is_database, 8228 exists=exists, 8229 cluster=cluster, 8230 identity=identity, 8231 option=option, 8232 partition=partition, 8233 ) 8234 8235 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8236 this = self._parse_ordered(self._parse_opclass) 8237 8238 if not self._match(TokenType.WITH): 8239 return this 8240 8241 op = self._parse_var(any_token=True) 8242 8243 return self.expression(exp.WithOperator, this=this, op=op) 8244 8245 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8246 self._match(TokenType.EQ) 8247 self._match(TokenType.L_PAREN) 8248 8249 opts: t.List[t.Optional[exp.Expression]] = [] 8250 option: exp.Expression | None 8251 while self._curr and not self._match(TokenType.R_PAREN): 8252 if self._match_text_seq("FORMAT_NAME", "="): 8253 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8254 option = self._parse_format_name() 8255 else: 8256 option = self._parse_property() 8257 8258 if option is None: 8259 self.raise_error("Unable to parse option") 8260 break 8261 8262 opts.append(option) 8263 8264 return opts 8265 8266 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8267 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8268 8269 options = [] 8270 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8271 option = self._parse_var(any_token=True) 8272 prev = self._prev.text.upper() 8273 8274 # Different dialects might separate options and values by white space, "=" and "AS" 8275 self._match(TokenType.EQ) 8276 self._match(TokenType.ALIAS) 8277 8278 param = self.expression(exp.CopyParameter, this=option) 8279 8280 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8281 TokenType.L_PAREN, advance=False 8282 ): 8283 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8284 param.set("expressions", self._parse_wrapped_options()) 8285 elif prev == "FILE_FORMAT": 8286 # T-SQL's external file format case 8287 param.set("expression", self._parse_field()) 8288 else: 8289 param.set("expression", self._parse_unquoted_field()) 8290 8291 options.append(param) 8292 self._match(sep) 8293 8294 return options 8295 8296 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8297 expr = self.expression(exp.Credentials) 8298 8299 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8300 expr.set("storage", self._parse_field()) 8301 if self._match_text_seq("CREDENTIALS"): 8302 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8303 creds = ( 8304 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8305 ) 8306 expr.set("credentials", creds) 8307 if self._match_text_seq("ENCRYPTION"): 8308 expr.set("encryption", self._parse_wrapped_options()) 8309 if self._match_text_seq("IAM_ROLE"): 8310 expr.set("iam_role", self._parse_field()) 8311 if self._match_text_seq("REGION"): 8312 expr.set("region", self._parse_field()) 8313 8314 return expr 8315 8316 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8317 return self._parse_field() 8318 8319 def _parse_copy(self) -> exp.Copy | exp.Command: 8320 start = self._prev 8321 8322 self._match(TokenType.INTO) 8323 8324 this = ( 8325 self._parse_select(nested=True, parse_subquery_alias=False) 8326 if self._match(TokenType.L_PAREN, advance=False) 8327 else self._parse_table(schema=True) 8328 ) 8329 8330 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8331 8332 files = self._parse_csv(self._parse_file_location) 8333 credentials = self._parse_credentials() 8334 8335 self._match_text_seq("WITH") 8336 8337 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8338 8339 # Fallback case 8340 if self._curr: 8341 return self._parse_as_command(start) 8342 8343 return self.expression( 8344 exp.Copy, 8345 this=this, 8346 kind=kind, 8347 credentials=credentials, 8348 files=files, 8349 params=params, 8350 ) 8351 8352 def _parse_normalize(self) -> exp.Normalize: 8353 return self.expression( 8354 exp.Normalize, 8355 this=self._parse_bitwise(), 8356 form=self._match(TokenType.COMMA) and self._parse_var(), 8357 ) 8358 8359 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8360 args = self._parse_csv(lambda: self._parse_lambda()) 8361 8362 this = seq_get(args, 0) 8363 decimals = seq_get(args, 1) 8364 8365 return expr_type( 8366 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8367 ) 8368 8369 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8370 star_token = self._prev 8371 8372 if self._match_text_seq("COLUMNS", "(", advance=False): 8373 this = self._parse_function() 8374 if isinstance(this, exp.Columns): 8375 this.set("unpack", True) 8376 return this 8377 8378 return self.expression( 8379 exp.Star, 8380 **{ # type: ignore 8381 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8382 "replace": self._parse_star_op("REPLACE"), 8383 "rename": self._parse_star_op("RENAME"), 8384 }, 8385 ).update_positions(star_token) 8386 8387 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8388 privilege_parts = [] 8389 8390 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8391 # (end of privilege list) or L_PAREN (start of column list) are met 8392 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8393 privilege_parts.append(self._curr.text.upper()) 8394 self._advance() 8395 8396 this = exp.var(" ".join(privilege_parts)) 8397 expressions = ( 8398 self._parse_wrapped_csv(self._parse_column) 8399 if self._match(TokenType.L_PAREN, advance=False) 8400 else None 8401 ) 8402 8403 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8404 8405 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8406 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8407 principal = self._parse_id_var() 8408 8409 if not principal: 8410 return None 8411 8412 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8413 8414 def _parse_grant_revoke_common( 8415 self, 8416 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8417 privileges = self._parse_csv(self._parse_grant_privilege) 8418 8419 self._match(TokenType.ON) 8420 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8421 8422 # Attempt to parse the securable e.g. MySQL allows names 8423 # such as "foo.*", "*.*" which are not easily parseable yet 8424 securable = self._try_parse(self._parse_table_parts) 8425 8426 return privileges, kind, securable 8427 8428 def _parse_grant(self) -> exp.Grant | exp.Command: 8429 start = self._prev 8430 8431 privileges, kind, securable = self._parse_grant_revoke_common() 8432 8433 if not securable or not self._match_text_seq("TO"): 8434 return self._parse_as_command(start) 8435 8436 principals = self._parse_csv(self._parse_grant_principal) 8437 8438 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8439 8440 if self._curr: 8441 return self._parse_as_command(start) 8442 8443 return self.expression( 8444 exp.Grant, 8445 privileges=privileges, 8446 kind=kind, 8447 securable=securable, 8448 principals=principals, 8449 grant_option=grant_option, 8450 ) 8451 8452 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8453 start = self._prev 8454 8455 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8456 8457 privileges, kind, securable = self._parse_grant_revoke_common() 8458 8459 if not securable or not self._match_text_seq("FROM"): 8460 return self._parse_as_command(start) 8461 8462 principals = self._parse_csv(self._parse_grant_principal) 8463 8464 cascade = None 8465 if self._match_texts(("CASCADE", "RESTRICT")): 8466 cascade = self._prev.text.upper() 8467 8468 if self._curr: 8469 return self._parse_as_command(start) 8470 8471 return self.expression( 8472 exp.Revoke, 8473 privileges=privileges, 8474 kind=kind, 8475 securable=securable, 8476 principals=principals, 8477 grant_option=grant_option, 8478 cascade=cascade, 8479 ) 8480 8481 def _parse_overlay(self) -> exp.Overlay: 8482 return self.expression( 8483 exp.Overlay, 8484 **{ # type: ignore 8485 "this": self._parse_bitwise(), 8486 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8487 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8488 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8489 }, 8490 ) 8491 8492 def _parse_format_name(self) -> exp.Property: 8493 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8494 # for FILE_FORMAT = <format_name> 8495 return self.expression( 8496 exp.Property, 8497 this=exp.var("FORMAT_NAME"), 8498 value=self._parse_string() or self._parse_table_parts(), 8499 ) 8500 8501 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8502 args: t.List[exp.Expression] = [] 8503 8504 if self._match(TokenType.DISTINCT): 8505 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8506 self._match(TokenType.COMMA) 8507 8508 args.extend(self._parse_csv(self._parse_assignment)) 8509 8510 return self.expression( 8511 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8512 ) 8513 8514 def _identifier_expression( 8515 self, token: t.Optional[Token] = None, **kwargs: t.Any 8516 ) -> exp.Identifier: 8517 token = token or self._prev 8518 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8519 expression.update_positions(token) 8520 return expression 8521 8522 def _build_pipe_cte( 8523 self, 8524 query: exp.Query, 8525 expressions: t.List[exp.Expression], 8526 alias_cte: t.Optional[exp.TableAlias] = None, 8527 ) -> exp.Select: 8528 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8529 if alias_cte: 8530 new_cte = alias_cte 8531 else: 8532 self._pipe_cte_counter += 1 8533 new_cte = f"__tmp{self._pipe_cte_counter}" 8534 8535 with_ = query.args.get("with") 8536 ctes = with_.pop() if with_ else None 8537 8538 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8539 if ctes: 8540 new_select.set("with", ctes) 8541 8542 return new_select.with_(new_cte, as_=query, copy=False) 8543 8544 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8545 select = self._parse_select(consume_pipe=False) 8546 if not select: 8547 return query 8548 8549 return self._build_pipe_cte( 8550 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8551 ) 8552 8553 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8554 limit = self._parse_limit() 8555 offset = self._parse_offset() 8556 if limit: 8557 curr_limit = query.args.get("limit", limit) 8558 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8559 query.limit(limit, copy=False) 8560 if offset: 8561 curr_offset = query.args.get("offset") 8562 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8563 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8564 8565 return query 8566 8567 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8568 this = self._parse_assignment() 8569 if self._match_text_seq("GROUP", "AND", advance=False): 8570 return this 8571 8572 this = self._parse_alias(this) 8573 8574 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8575 return self._parse_ordered(lambda: this) 8576 8577 return this 8578 8579 def _parse_pipe_syntax_aggregate_group_order_by( 8580 self, query: exp.Select, group_by_exists: bool = True 8581 ) -> exp.Select: 8582 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8583 aggregates_or_groups, orders = [], [] 8584 for element in expr: 8585 if isinstance(element, exp.Ordered): 8586 this = element.this 8587 if isinstance(this, exp.Alias): 8588 element.set("this", this.args["alias"]) 8589 orders.append(element) 8590 else: 8591 this = element 8592 aggregates_or_groups.append(this) 8593 8594 if group_by_exists: 8595 query.select(*aggregates_or_groups, copy=False).group_by( 8596 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8597 copy=False, 8598 ) 8599 else: 8600 query.select(*aggregates_or_groups, append=False, copy=False) 8601 8602 if orders: 8603 return query.order_by(*orders, append=False, copy=False) 8604 8605 return query 8606 8607 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8608 self._match_text_seq("AGGREGATE") 8609 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8610 8611 if self._match(TokenType.GROUP_BY) or ( 8612 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8613 ): 8614 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8615 8616 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8617 8618 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8619 first_setop = self.parse_set_operation(this=query) 8620 if not first_setop: 8621 return None 8622 8623 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8624 expr = self._parse_paren() 8625 return expr.assert_is(exp.Subquery).unnest() if expr else None 8626 8627 first_setop.this.pop() 8628 8629 setops = [ 8630 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8631 *self._parse_csv(_parse_and_unwrap_query), 8632 ] 8633 8634 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8635 with_ = query.args.get("with") 8636 ctes = with_.pop() if with_ else None 8637 8638 if isinstance(first_setop, exp.Union): 8639 query = query.union(*setops, copy=False, **first_setop.args) 8640 elif isinstance(first_setop, exp.Except): 8641 query = query.except_(*setops, copy=False, **first_setop.args) 8642 else: 8643 query = query.intersect(*setops, copy=False, **first_setop.args) 8644 8645 query.set("with", ctes) 8646 8647 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8648 8649 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8650 join = self._parse_join() 8651 if not join: 8652 return None 8653 8654 if isinstance(query, exp.Select): 8655 return query.join(join, copy=False) 8656 8657 return query 8658 8659 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8660 pivots = self._parse_pivots() 8661 if not pivots: 8662 return query 8663 8664 from_ = query.args.get("from") 8665 if from_: 8666 from_.this.set("pivots", pivots) 8667 8668 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8669 8670 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8671 self._match_text_seq("EXTEND") 8672 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8673 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8674 8675 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8676 sample = self._parse_table_sample() 8677 8678 with_ = query.args.get("with") 8679 if with_: 8680 with_.expressions[-1].this.set("sample", sample) 8681 else: 8682 query.set("sample", sample) 8683 8684 return query 8685 8686 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8687 if isinstance(query, exp.Subquery): 8688 query = exp.select("*").from_(query, copy=False) 8689 8690 if not query.args.get("from"): 8691 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8692 8693 while self._match(TokenType.PIPE_GT): 8694 start = self._curr 8695 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8696 if not parser: 8697 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8698 # keywords, making it tricky to disambiguate them without lookahead. The approach 8699 # here is to try and parse a set operation and if that fails, then try to parse a 8700 # join operator. If that fails as well, then the operator is not supported. 8701 parsed_query = self._parse_pipe_syntax_set_operator(query) 8702 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8703 if not parsed_query: 8704 self._retreat(start) 8705 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8706 break 8707 query = parsed_query 8708 else: 8709 query = parser(self, query) 8710 8711 return query 8712 8713 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8714 vars = self._parse_csv(self._parse_id_var) 8715 if not vars: 8716 return None 8717 8718 return self.expression( 8719 exp.DeclareItem, 8720 this=vars, 8721 kind=self._parse_types(), 8722 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8723 ) 8724 8725 def _parse_declare(self) -> exp.Declare | exp.Command: 8726 start = self._prev 8727 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8728 8729 if not expressions or self._curr: 8730 return self._parse_as_command(start) 8731 8732 return self.expression(exp.Declare, expressions=expressions) 8733 8734 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8735 exp_class = exp.Cast if strict else exp.TryCast 8736 8737 if exp_class == exp.TryCast: 8738 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8739 8740 return self.expression(exp_class, **kwargs) 8741 8742 def _parse_json_value(self) -> exp.JSONValue: 8743 this = self._parse_bitwise() 8744 self._match(TokenType.COMMA) 8745 path = self._parse_bitwise() 8746 8747 returning = self._match(TokenType.RETURNING) and self._parse_type() 8748 8749 return self.expression( 8750 exp.JSONValue, 8751 this=this, 8752 path=self.dialect.to_json_path(path), 8753 returning=returning, 8754 on_condition=self._parse_on_condition(), 8755 ) 8756 8757 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8758 def concat_exprs( 8759 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8760 ) -> exp.Expression: 8761 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8762 concat_exprs = [ 8763 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8764 ] 8765 node.set("expressions", concat_exprs) 8766 return node 8767 if len(exprs) == 1: 8768 return exprs[0] 8769 return self.expression(exp.Concat, expressions=args, safe=True) 8770 8771 args = self._parse_csv(self._parse_lambda) 8772 8773 if args: 8774 order = args[-1] if isinstance(args[-1], exp.Order) else None 8775 8776 if order: 8777 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8778 # remove 'expr' from exp.Order and add it back to args 8779 args[-1] = order.this 8780 order.set("this", concat_exprs(order.this, args)) 8781 8782 this = order or concat_exprs(args[0], args) 8783 else: 8784 this = None 8785 8786 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8787 8788 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 CAST_COLUMN_OPERATORS = { 796 TokenType.DOTCOLON, 797 TokenType.DCOLON, 798 } 799 800 EXPRESSION_PARSERS = { 801 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 802 exp.Column: lambda self: self._parse_column(), 803 exp.Condition: lambda self: self._parse_assignment(), 804 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 805 exp.Expression: lambda self: self._parse_expression(), 806 exp.From: lambda self: self._parse_from(joins=True), 807 exp.Group: lambda self: self._parse_group(), 808 exp.Having: lambda self: self._parse_having(), 809 exp.Hint: lambda self: self._parse_hint_body(), 810 exp.Identifier: lambda self: self._parse_id_var(), 811 exp.Join: lambda self: self._parse_join(), 812 exp.Lambda: lambda self: self._parse_lambda(), 813 exp.Lateral: lambda self: self._parse_lateral(), 814 exp.Limit: lambda self: self._parse_limit(), 815 exp.Offset: lambda self: self._parse_offset(), 816 exp.Order: lambda self: self._parse_order(), 817 exp.Ordered: lambda self: self._parse_ordered(), 818 exp.Properties: lambda self: self._parse_properties(), 819 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 820 exp.Qualify: lambda self: self._parse_qualify(), 821 exp.Returning: lambda self: self._parse_returning(), 822 exp.Select: lambda self: self._parse_select(), 823 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 824 exp.Table: lambda self: self._parse_table_parts(), 825 exp.TableAlias: lambda self: self._parse_table_alias(), 826 exp.Tuple: lambda self: self._parse_value(values=False), 827 exp.Whens: lambda self: self._parse_when_matched(), 828 exp.Where: lambda self: self._parse_where(), 829 exp.Window: lambda self: self._parse_named_window(), 830 exp.With: lambda self: self._parse_with(), 831 "JOIN_TYPE": lambda self: self._parse_join_parts(), 832 } 833 834 STATEMENT_PARSERS = { 835 TokenType.ALTER: lambda self: self._parse_alter(), 836 TokenType.ANALYZE: lambda self: self._parse_analyze(), 837 TokenType.BEGIN: lambda self: self._parse_transaction(), 838 TokenType.CACHE: lambda self: self._parse_cache(), 839 TokenType.COMMENT: lambda self: self._parse_comment(), 840 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 841 TokenType.COPY: lambda self: self._parse_copy(), 842 TokenType.CREATE: lambda self: self._parse_create(), 843 TokenType.DELETE: lambda self: self._parse_delete(), 844 TokenType.DESC: lambda self: self._parse_describe(), 845 TokenType.DESCRIBE: lambda self: self._parse_describe(), 846 TokenType.DROP: lambda self: self._parse_drop(), 847 TokenType.GRANT: lambda self: self._parse_grant(), 848 TokenType.REVOKE: lambda self: self._parse_revoke(), 849 TokenType.INSERT: lambda self: self._parse_insert(), 850 TokenType.KILL: lambda self: self._parse_kill(), 851 TokenType.LOAD: lambda self: self._parse_load(), 852 TokenType.MERGE: lambda self: self._parse_merge(), 853 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 854 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 855 TokenType.REFRESH: lambda self: self._parse_refresh(), 856 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 857 TokenType.SET: lambda self: self._parse_set(), 858 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 859 TokenType.UNCACHE: lambda self: self._parse_uncache(), 860 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 861 TokenType.UPDATE: lambda self: self._parse_update(), 862 TokenType.USE: lambda self: self._parse_use(), 863 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 864 } 865 866 UNARY_PARSERS = { 867 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 868 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 869 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 870 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 871 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 872 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 873 } 874 875 STRING_PARSERS = { 876 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 877 exp.RawString, this=token.text 878 ), 879 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 880 exp.National, this=token.text 881 ), 882 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 883 TokenType.STRING: lambda self, token: self.expression( 884 exp.Literal, this=token.text, is_string=True 885 ), 886 TokenType.UNICODE_STRING: lambda self, token: self.expression( 887 exp.UnicodeString, 888 this=token.text, 889 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 890 ), 891 } 892 893 NUMERIC_PARSERS = { 894 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 895 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 896 TokenType.HEX_STRING: lambda self, token: self.expression( 897 exp.HexString, 898 this=token.text, 899 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 900 ), 901 TokenType.NUMBER: lambda self, token: self.expression( 902 exp.Literal, this=token.text, is_string=False 903 ), 904 } 905 906 PRIMARY_PARSERS = { 907 **STRING_PARSERS, 908 **NUMERIC_PARSERS, 909 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 910 TokenType.NULL: lambda self, _: self.expression(exp.Null), 911 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 912 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 913 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 914 TokenType.STAR: lambda self, _: self._parse_star_ops(), 915 } 916 917 PLACEHOLDER_PARSERS = { 918 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 919 TokenType.PARAMETER: lambda self: self._parse_parameter(), 920 TokenType.COLON: lambda self: ( 921 self.expression(exp.Placeholder, this=self._prev.text) 922 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 923 else None 924 ), 925 } 926 927 RANGE_PARSERS = { 928 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 929 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 930 TokenType.GLOB: binary_range_parser(exp.Glob), 931 TokenType.ILIKE: binary_range_parser(exp.ILike), 932 TokenType.IN: lambda self, this: self._parse_in(this), 933 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 934 TokenType.IS: lambda self, this: self._parse_is(this), 935 TokenType.LIKE: binary_range_parser(exp.Like), 936 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 937 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 938 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 939 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 940 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 941 } 942 943 PIPE_SYNTAX_TRANSFORM_PARSERS = { 944 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 945 "AS": lambda self, query: self._build_pipe_cte( 946 query, [exp.Star()], self._parse_table_alias() 947 ), 948 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 949 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 950 "ORDER BY": lambda self, query: query.order_by( 951 self._parse_order(), append=False, copy=False 952 ), 953 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 954 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 955 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 956 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 957 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 958 } 959 960 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 961 "ALLOWED_VALUES": lambda self: self.expression( 962 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 963 ), 964 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 965 "AUTO": lambda self: self._parse_auto_property(), 966 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 967 "BACKUP": lambda self: self.expression( 968 exp.BackupProperty, this=self._parse_var(any_token=True) 969 ), 970 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 971 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 973 "CHECKSUM": lambda self: self._parse_checksum(), 974 "CLUSTER BY": lambda self: self._parse_cluster(), 975 "CLUSTERED": lambda self: self._parse_clustered_by(), 976 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 977 exp.CollateProperty, **kwargs 978 ), 979 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 980 "CONTAINS": lambda self: self._parse_contains_property(), 981 "COPY": lambda self: self._parse_copy_property(), 982 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 983 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 984 "DEFINER": lambda self: self._parse_definer(), 985 "DETERMINISTIC": lambda self: self.expression( 986 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 987 ), 988 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 989 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 990 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 991 "DISTKEY": lambda self: self._parse_distkey(), 992 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 993 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 994 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 995 "ENVIRONMENT": lambda self: self.expression( 996 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 997 ), 998 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 999 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1000 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1001 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1002 "FREESPACE": lambda self: self._parse_freespace(), 1003 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1004 "HEAP": lambda self: self.expression(exp.HeapProperty), 1005 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1006 "IMMUTABLE": lambda self: self.expression( 1007 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1008 ), 1009 "INHERITS": lambda self: self.expression( 1010 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1011 ), 1012 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1013 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1014 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1015 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1016 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1017 "LIKE": lambda self: self._parse_create_like(), 1018 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1019 "LOCK": lambda self: self._parse_locking(), 1020 "LOCKING": lambda self: self._parse_locking(), 1021 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1022 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1023 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1024 "MODIFIES": lambda self: self._parse_modifies_property(), 1025 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1026 "NO": lambda self: self._parse_no_property(), 1027 "ON": lambda self: self._parse_on_property(), 1028 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1029 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1030 "PARTITION": lambda self: self._parse_partitioned_of(), 1031 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1033 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1034 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1035 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1036 "READS": lambda self: self._parse_reads_property(), 1037 "REMOTE": lambda self: self._parse_remote_with_connection(), 1038 "RETURNS": lambda self: self._parse_returns(), 1039 "STRICT": lambda self: self.expression(exp.StrictProperty), 1040 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1041 "ROW": lambda self: self._parse_row(), 1042 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1043 "SAMPLE": lambda self: self.expression( 1044 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1045 ), 1046 "SECURE": lambda self: self.expression(exp.SecureProperty), 1047 "SECURITY": lambda self: self._parse_security(), 1048 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1049 "SETTINGS": lambda self: self._parse_settings_property(), 1050 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1051 "SORTKEY": lambda self: self._parse_sortkey(), 1052 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1053 "STABLE": lambda self: self.expression( 1054 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1055 ), 1056 "STORED": lambda self: self._parse_stored(), 1057 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1058 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1059 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1060 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1061 "TO": lambda self: self._parse_to_table(), 1062 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1063 "TRANSFORM": lambda self: self.expression( 1064 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1065 ), 1066 "TTL": lambda self: self._parse_ttl(), 1067 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1068 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1069 "VOLATILE": lambda self: self._parse_volatile_property(), 1070 "WITH": lambda self: self._parse_with_property(), 1071 } 1072 1073 CONSTRAINT_PARSERS = { 1074 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1075 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1076 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1077 "CHARACTER SET": lambda self: self.expression( 1078 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "CHECK": lambda self: self.expression( 1081 exp.CheckColumnConstraint, 1082 this=self._parse_wrapped(self._parse_assignment), 1083 enforced=self._match_text_seq("ENFORCED"), 1084 ), 1085 "COLLATE": lambda self: self.expression( 1086 exp.CollateColumnConstraint, 1087 this=self._parse_identifier() or self._parse_column(), 1088 ), 1089 "COMMENT": lambda self: self.expression( 1090 exp.CommentColumnConstraint, this=self._parse_string() 1091 ), 1092 "COMPRESS": lambda self: self._parse_compress(), 1093 "CLUSTERED": lambda self: self.expression( 1094 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1095 ), 1096 "NONCLUSTERED": lambda self: self.expression( 1097 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1098 ), 1099 "DEFAULT": lambda self: self.expression( 1100 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1101 ), 1102 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1103 "EPHEMERAL": lambda self: self.expression( 1104 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1105 ), 1106 "EXCLUDE": lambda self: self.expression( 1107 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1108 ), 1109 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1110 "FORMAT": lambda self: self.expression( 1111 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1112 ), 1113 "GENERATED": lambda self: self._parse_generated_as_identity(), 1114 "IDENTITY": lambda self: self._parse_auto_increment(), 1115 "INLINE": lambda self: self._parse_inline(), 1116 "LIKE": lambda self: self._parse_create_like(), 1117 "NOT": lambda self: self._parse_not_constraint(), 1118 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1119 "ON": lambda self: ( 1120 self._match(TokenType.UPDATE) 1121 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1122 ) 1123 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1124 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1125 "PERIOD": lambda self: self._parse_period_for_system_time(), 1126 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1127 "REFERENCES": lambda self: self._parse_references(match=False), 1128 "TITLE": lambda self: self.expression( 1129 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1130 ), 1131 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1132 "UNIQUE": lambda self: self._parse_unique(), 1133 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1134 "WATERMARK": lambda self: self.expression( 1135 exp.WatermarkColumnConstraint, 1136 this=self._match(TokenType.FOR) and self._parse_column(), 1137 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1138 ), 1139 "WITH": lambda self: self.expression( 1140 exp.Properties, expressions=self._parse_wrapped_properties() 1141 ), 1142 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1144 } 1145 1146 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1147 if not self._match(TokenType.L_PAREN, advance=False): 1148 # Partitioning by bucket or truncate follows the syntax: 1149 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1150 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1151 self._retreat(self._index - 1) 1152 return None 1153 1154 klass = ( 1155 exp.PartitionedByBucket 1156 if self._prev.text.upper() == "BUCKET" 1157 else exp.PartitionByTruncate 1158 ) 1159 1160 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1161 this, expression = seq_get(args, 0), seq_get(args, 1) 1162 1163 if isinstance(this, exp.Literal): 1164 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1165 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1166 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1167 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1168 # 1169 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1170 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1171 this, expression = expression, this 1172 1173 return self.expression(klass, this=this, expression=expression) 1174 1175 ALTER_PARSERS = { 1176 "ADD": lambda self: self._parse_alter_table_add(), 1177 "AS": lambda self: self._parse_select(), 1178 "ALTER": lambda self: self._parse_alter_table_alter(), 1179 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1180 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1181 "DROP": lambda self: self._parse_alter_table_drop(), 1182 "RENAME": lambda self: self._parse_alter_table_rename(), 1183 "SET": lambda self: self._parse_alter_table_set(), 1184 "SWAP": lambda self: self.expression( 1185 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1186 ), 1187 } 1188 1189 ALTER_ALTER_PARSERS = { 1190 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1191 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1192 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1193 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1194 } 1195 1196 SCHEMA_UNNAMED_CONSTRAINTS = { 1197 "CHECK", 1198 "EXCLUDE", 1199 "FOREIGN KEY", 1200 "LIKE", 1201 "PERIOD", 1202 "PRIMARY KEY", 1203 "UNIQUE", 1204 "WATERMARK", 1205 "BUCKET", 1206 "TRUNCATE", 1207 } 1208 1209 NO_PAREN_FUNCTION_PARSERS = { 1210 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1211 "CASE": lambda self: self._parse_case(), 1212 "CONNECT_BY_ROOT": lambda self: self.expression( 1213 exp.ConnectByRoot, this=self._parse_column() 1214 ), 1215 "IF": lambda self: self._parse_if(), 1216 } 1217 1218 INVALID_FUNC_NAME_TOKENS = { 1219 TokenType.IDENTIFIER, 1220 TokenType.STRING, 1221 } 1222 1223 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1224 1225 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1226 1227 FUNCTION_PARSERS = { 1228 **{ 1229 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1230 }, 1231 **{ 1232 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1233 }, 1234 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1235 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1236 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1237 "DECODE": lambda self: self._parse_decode(), 1238 "EXTRACT": lambda self: self._parse_extract(), 1239 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1240 "GAP_FILL": lambda self: self._parse_gap_fill(), 1241 "JSON_OBJECT": lambda self: self._parse_json_object(), 1242 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1243 "JSON_TABLE": lambda self: self._parse_json_table(), 1244 "MATCH": lambda self: self._parse_match_against(), 1245 "NORMALIZE": lambda self: self._parse_normalize(), 1246 "OPENJSON": lambda self: self._parse_open_json(), 1247 "OVERLAY": lambda self: self._parse_overlay(), 1248 "POSITION": lambda self: self._parse_position(), 1249 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1250 "STRING_AGG": lambda self: self._parse_string_agg(), 1251 "SUBSTRING": lambda self: self._parse_substring(), 1252 "TRIM": lambda self: self._parse_trim(), 1253 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1255 "XMLELEMENT": lambda self: self.expression( 1256 exp.XMLElement, 1257 this=self._match_text_seq("NAME") and self._parse_id_var(), 1258 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1259 ), 1260 "XMLTABLE": lambda self: self._parse_xml_table(), 1261 } 1262 1263 QUERY_MODIFIER_PARSERS = { 1264 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1265 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1266 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1267 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1268 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1269 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1270 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1271 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1272 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1273 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1274 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1275 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1276 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1277 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1279 TokenType.CLUSTER_BY: lambda self: ( 1280 "cluster", 1281 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1282 ), 1283 TokenType.DISTRIBUTE_BY: lambda self: ( 1284 "distribute", 1285 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1286 ), 1287 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1288 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1289 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1290 } 1291 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1292 1293 SET_PARSERS = { 1294 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1295 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1296 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1297 "TRANSACTION": lambda self: self._parse_set_transaction(), 1298 } 1299 1300 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1301 1302 TYPE_LITERAL_PARSERS = { 1303 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1304 } 1305 1306 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1307 1308 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1309 1310 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1311 1312 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1313 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1314 "ISOLATION": ( 1315 ("LEVEL", "REPEATABLE", "READ"), 1316 ("LEVEL", "READ", "COMMITTED"), 1317 ("LEVEL", "READ", "UNCOMITTED"), 1318 ("LEVEL", "SERIALIZABLE"), 1319 ), 1320 "READ": ("WRITE", "ONLY"), 1321 } 1322 1323 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1324 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1325 ) 1326 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1327 1328 CREATE_SEQUENCE: OPTIONS_TYPE = { 1329 "SCALE": ("EXTEND", "NOEXTEND"), 1330 "SHARD": ("EXTEND", "NOEXTEND"), 1331 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1332 **dict.fromkeys( 1333 ( 1334 "SESSION", 1335 "GLOBAL", 1336 "KEEP", 1337 "NOKEEP", 1338 "ORDER", 1339 "NOORDER", 1340 "NOCACHE", 1341 "CYCLE", 1342 "NOCYCLE", 1343 "NOMINVALUE", 1344 "NOMAXVALUE", 1345 "NOSCALE", 1346 "NOSHARD", 1347 ), 1348 tuple(), 1349 ), 1350 } 1351 1352 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1353 1354 USABLES: OPTIONS_TYPE = dict.fromkeys( 1355 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1356 ) 1357 1358 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1359 1360 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1361 "TYPE": ("EVOLUTION",), 1362 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1363 } 1364 1365 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1366 1367 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1368 1369 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1370 "NOT": ("ENFORCED",), 1371 "MATCH": ( 1372 "FULL", 1373 "PARTIAL", 1374 "SIMPLE", 1375 ), 1376 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1377 "USING": ( 1378 "BTREE", 1379 "HASH", 1380 ), 1381 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1382 } 1383 1384 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1385 "NO": ("OTHERS",), 1386 "CURRENT": ("ROW",), 1387 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1388 } 1389 1390 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1391 1392 CLONE_KEYWORDS = {"CLONE", "COPY"} 1393 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1394 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1395 1396 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1397 1398 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1399 1400 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1401 1402 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1403 1404 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1405 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1406 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1407 1408 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1409 1410 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1411 1412 ADD_CONSTRAINT_TOKENS = { 1413 TokenType.CONSTRAINT, 1414 TokenType.FOREIGN_KEY, 1415 TokenType.INDEX, 1416 TokenType.KEY, 1417 TokenType.PRIMARY_KEY, 1418 TokenType.UNIQUE, 1419 } 1420 1421 DISTINCT_TOKENS = {TokenType.DISTINCT} 1422 1423 NULL_TOKENS = {TokenType.NULL} 1424 1425 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1426 1427 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1428 1429 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1430 1431 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1432 1433 ODBC_DATETIME_LITERALS = { 1434 "d": exp.Date, 1435 "t": exp.Time, 1436 "ts": exp.Timestamp, 1437 } 1438 1439 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1440 1441 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1442 1443 # The style options for the DESCRIBE statement 1444 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1445 1446 # The style options for the ANALYZE statement 1447 ANALYZE_STYLES = { 1448 "BUFFER_USAGE_LIMIT", 1449 "FULL", 1450 "LOCAL", 1451 "NO_WRITE_TO_BINLOG", 1452 "SAMPLE", 1453 "SKIP_LOCKED", 1454 "VERBOSE", 1455 } 1456 1457 ANALYZE_EXPRESSION_PARSERS = { 1458 "ALL": lambda self: self._parse_analyze_columns(), 1459 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1460 "DELETE": lambda self: self._parse_analyze_delete(), 1461 "DROP": lambda self: self._parse_analyze_histogram(), 1462 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1463 "LIST": lambda self: self._parse_analyze_list(), 1464 "PREDICATE": lambda self: self._parse_analyze_columns(), 1465 "UPDATE": lambda self: self._parse_analyze_histogram(), 1466 "VALIDATE": lambda self: self._parse_analyze_validate(), 1467 } 1468 1469 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1470 1471 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1472 1473 OPERATION_MODIFIERS: t.Set[str] = set() 1474 1475 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1476 1477 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1478 1479 STRICT_CAST = True 1480 1481 PREFIXED_PIVOT_COLUMNS = False 1482 IDENTIFY_PIVOT_STRINGS = False 1483 1484 LOG_DEFAULTS_TO_LN = False 1485 1486 # Whether the table sample clause expects CSV syntax 1487 TABLESAMPLE_CSV = False 1488 1489 # The default method used for table sampling 1490 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1491 1492 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1493 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1494 1495 # Whether the TRIM function expects the characters to trim as its first argument 1496 TRIM_PATTERN_FIRST = False 1497 1498 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1499 STRING_ALIASES = False 1500 1501 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1502 MODIFIERS_ATTACHED_TO_SET_OP = True 1503 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1504 1505 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1506 NO_PAREN_IF_COMMANDS = True 1507 1508 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1509 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1510 1511 # Whether the `:` operator is used to extract a value from a VARIANT column 1512 COLON_IS_VARIANT_EXTRACT = False 1513 1514 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1515 # If this is True and '(' is not found, the keyword will be treated as an identifier 1516 VALUES_FOLLOWED_BY_PAREN = True 1517 1518 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1519 SUPPORTS_IMPLICIT_UNNEST = False 1520 1521 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1522 INTERVAL_SPANS = True 1523 1524 # Whether a PARTITION clause can follow a table reference 1525 SUPPORTS_PARTITION_SELECTION = False 1526 1527 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1528 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1529 1530 # Whether the 'AS' keyword is optional in the CTE definition syntax 1531 OPTIONAL_ALIAS_TOKEN_CTE = True 1532 1533 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1534 ALTER_RENAME_REQUIRES_COLUMN = True 1535 1536 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1537 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1538 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1539 # as BigQuery, where all joins have the same precedence. 1540 JOINS_HAVE_EQUAL_PRECEDENCE = False 1541 1542 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1543 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1544 1545 # Whether map literals support arbitrary expressions as keys. 1546 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1547 # When False, keys are typically restricted to identifiers. 1548 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1549 1550 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1551 # is true for Snowflake but not for BigQuery which can also process strings 1552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1553 1554 __slots__ = ( 1555 "error_level", 1556 "error_message_context", 1557 "max_errors", 1558 "dialect", 1559 "sql", 1560 "errors", 1561 "_tokens", 1562 "_index", 1563 "_curr", 1564 "_next", 1565 "_prev", 1566 "_prev_comments", 1567 "_pipe_cte_counter", 1568 ) 1569 1570 # Autofilled 1571 SHOW_TRIE: t.Dict = {} 1572 SET_TRIE: t.Dict = {} 1573 1574 def __init__( 1575 self, 1576 error_level: t.Optional[ErrorLevel] = None, 1577 error_message_context: int = 100, 1578 max_errors: int = 3, 1579 dialect: DialectType = None, 1580 ): 1581 from sqlglot.dialects import Dialect 1582 1583 self.error_level = error_level or ErrorLevel.IMMEDIATE 1584 self.error_message_context = error_message_context 1585 self.max_errors = max_errors 1586 self.dialect = Dialect.get_or_raise(dialect) 1587 self.reset() 1588 1589 def reset(self): 1590 self.sql = "" 1591 self.errors = [] 1592 self._tokens = [] 1593 self._index = 0 1594 self._curr = None 1595 self._next = None 1596 self._prev = None 1597 self._prev_comments = None 1598 self._pipe_cte_counter = 0 1599 1600 def parse( 1601 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens and returns a list of syntax trees, one tree 1605 per parsed SQL statement. 1606 1607 Args: 1608 raw_tokens: The list of tokens. 1609 sql: The original SQL string, used to produce helpful debug messages. 1610 1611 Returns: 1612 The list of the produced syntax trees. 1613 """ 1614 return self._parse( 1615 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1616 ) 1617 1618 def parse_into( 1619 self, 1620 expression_types: exp.IntoType, 1621 raw_tokens: t.List[Token], 1622 sql: t.Optional[str] = None, 1623 ) -> t.List[t.Optional[exp.Expression]]: 1624 """ 1625 Parses a list of tokens into a given Expression type. If a collection of Expression 1626 types is given instead, this method will try to parse the token list into each one 1627 of them, stopping at the first for which the parsing succeeds. 1628 1629 Args: 1630 expression_types: The expression type(s) to try and parse the token list into. 1631 raw_tokens: The list of tokens. 1632 sql: The original SQL string, used to produce helpful debug messages. 1633 1634 Returns: 1635 The target Expression. 1636 """ 1637 errors = [] 1638 for expression_type in ensure_list(expression_types): 1639 parser = self.EXPRESSION_PARSERS.get(expression_type) 1640 if not parser: 1641 raise TypeError(f"No parser registered for {expression_type}") 1642 1643 try: 1644 return self._parse(parser, raw_tokens, sql) 1645 except ParseError as e: 1646 e.errors[0]["into_expression"] = expression_type 1647 errors.append(e) 1648 1649 raise ParseError( 1650 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1651 errors=merge_errors(errors), 1652 ) from errors[-1] 1653 1654 def _parse( 1655 self, 1656 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1657 raw_tokens: t.List[Token], 1658 sql: t.Optional[str] = None, 1659 ) -> t.List[t.Optional[exp.Expression]]: 1660 self.reset() 1661 self.sql = sql or "" 1662 1663 total = len(raw_tokens) 1664 chunks: t.List[t.List[Token]] = [[]] 1665 1666 for i, token in enumerate(raw_tokens): 1667 if token.token_type == TokenType.SEMICOLON: 1668 if token.comments: 1669 chunks.append([token]) 1670 1671 if i < total - 1: 1672 chunks.append([]) 1673 else: 1674 chunks[-1].append(token) 1675 1676 expressions = [] 1677 1678 for tokens in chunks: 1679 self._index = -1 1680 self._tokens = tokens 1681 self._advance() 1682 1683 expressions.append(parse_method(self)) 1684 1685 if self._index < len(self._tokens): 1686 self.raise_error("Invalid expression / Unexpected token") 1687 1688 self.check_errors() 1689 1690 return expressions 1691 1692 def check_errors(self) -> None: 1693 """Logs or raises any found errors, depending on the chosen error level setting.""" 1694 if self.error_level == ErrorLevel.WARN: 1695 for error in self.errors: 1696 logger.error(str(error)) 1697 elif self.error_level == ErrorLevel.RAISE and self.errors: 1698 raise ParseError( 1699 concat_messages(self.errors, self.max_errors), 1700 errors=merge_errors(self.errors), 1701 ) 1702 1703 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1704 """ 1705 Appends an error in the list of recorded errors or raises it, depending on the chosen 1706 error level setting. 1707 """ 1708 token = token or self._curr or self._prev or Token.string("") 1709 start = token.start 1710 end = token.end + 1 1711 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1712 highlight = self.sql[start:end] 1713 end_context = self.sql[end : end + self.error_message_context] 1714 1715 error = ParseError.new( 1716 f"{message}. Line {token.line}, Col: {token.col}.\n" 1717 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1718 description=message, 1719 line=token.line, 1720 col=token.col, 1721 start_context=start_context, 1722 highlight=highlight, 1723 end_context=end_context, 1724 ) 1725 1726 if self.error_level == ErrorLevel.IMMEDIATE: 1727 raise error 1728 1729 self.errors.append(error) 1730 1731 def expression( 1732 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1733 ) -> E: 1734 """ 1735 Creates a new, validated Expression. 1736 1737 Args: 1738 exp_class: The expression class to instantiate. 1739 comments: An optional list of comments to attach to the expression. 1740 kwargs: The arguments to set for the expression along with their respective values. 1741 1742 Returns: 1743 The target expression. 1744 """ 1745 instance = exp_class(**kwargs) 1746 instance.add_comments(comments) if comments else self._add_comments(instance) 1747 return self.validate_expression(instance) 1748 1749 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1750 if expression and self._prev_comments: 1751 expression.add_comments(self._prev_comments) 1752 self._prev_comments = None 1753 1754 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1755 """ 1756 Validates an Expression, making sure that all its mandatory arguments are set. 1757 1758 Args: 1759 expression: The expression to validate. 1760 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1761 1762 Returns: 1763 The validated expression. 1764 """ 1765 if self.error_level != ErrorLevel.IGNORE: 1766 for error_message in expression.error_messages(args): 1767 self.raise_error(error_message) 1768 1769 return expression 1770 1771 def _find_sql(self, start: Token, end: Token) -> str: 1772 return self.sql[start.start : end.end + 1] 1773 1774 def _is_connected(self) -> bool: 1775 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1776 1777 def _advance(self, times: int = 1) -> None: 1778 self._index += times 1779 self._curr = seq_get(self._tokens, self._index) 1780 self._next = seq_get(self._tokens, self._index + 1) 1781 1782 if self._index > 0: 1783 self._prev = self._tokens[self._index - 1] 1784 self._prev_comments = self._prev.comments 1785 else: 1786 self._prev = None 1787 self._prev_comments = None 1788 1789 def _retreat(self, index: int) -> None: 1790 if index != self._index: 1791 self._advance(index - self._index) 1792 1793 def _warn_unsupported(self) -> None: 1794 if len(self._tokens) <= 1: 1795 return 1796 1797 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1798 # interested in emitting a warning for the one being currently processed. 1799 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1800 1801 logger.warning( 1802 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1803 ) 1804 1805 def _parse_command(self) -> exp.Command: 1806 self._warn_unsupported() 1807 return self.expression( 1808 exp.Command, 1809 comments=self._prev_comments, 1810 this=self._prev.text.upper(), 1811 expression=self._parse_string(), 1812 ) 1813 1814 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1815 """ 1816 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1817 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1818 solve this by setting & resetting the parser state accordingly 1819 """ 1820 index = self._index 1821 error_level = self.error_level 1822 1823 self.error_level = ErrorLevel.IMMEDIATE 1824 try: 1825 this = parse_method() 1826 except ParseError: 1827 this = None 1828 finally: 1829 if not this or retreat: 1830 self._retreat(index) 1831 self.error_level = error_level 1832 1833 return this 1834 1835 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1836 start = self._prev 1837 exists = self._parse_exists() if allow_exists else None 1838 1839 self._match(TokenType.ON) 1840 1841 materialized = self._match_text_seq("MATERIALIZED") 1842 kind = self._match_set(self.CREATABLES) and self._prev 1843 if not kind: 1844 return self._parse_as_command(start) 1845 1846 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=kind.token_type) 1848 elif kind.token_type == TokenType.TABLE: 1849 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1850 elif kind.token_type == TokenType.COLUMN: 1851 this = self._parse_column() 1852 else: 1853 this = self._parse_id_var() 1854 1855 self._match(TokenType.IS) 1856 1857 return self.expression( 1858 exp.Comment, 1859 this=this, 1860 kind=kind.text, 1861 expression=self._parse_string(), 1862 exists=exists, 1863 materialized=materialized, 1864 ) 1865 1866 def _parse_to_table( 1867 self, 1868 ) -> exp.ToTableProperty: 1869 table = self._parse_table_parts(schema=True) 1870 return self.expression(exp.ToTableProperty, this=table) 1871 1872 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1873 def _parse_ttl(self) -> exp.Expression: 1874 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1875 this = self._parse_bitwise() 1876 1877 if self._match_text_seq("DELETE"): 1878 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1879 if self._match_text_seq("RECOMPRESS"): 1880 return self.expression( 1881 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1882 ) 1883 if self._match_text_seq("TO", "DISK"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1886 ) 1887 if self._match_text_seq("TO", "VOLUME"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1890 ) 1891 1892 return this 1893 1894 expressions = self._parse_csv(_parse_ttl_action) 1895 where = self._parse_where() 1896 group = self._parse_group() 1897 1898 aggregates = None 1899 if group and self._match(TokenType.SET): 1900 aggregates = self._parse_csv(self._parse_set_item) 1901 1902 return self.expression( 1903 exp.MergeTreeTTL, 1904 expressions=expressions, 1905 where=where, 1906 group=group, 1907 aggregates=aggregates, 1908 ) 1909 1910 def _parse_statement(self) -> t.Optional[exp.Expression]: 1911 if self._curr is None: 1912 return None 1913 1914 if self._match_set(self.STATEMENT_PARSERS): 1915 comments = self._prev_comments 1916 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1917 stmt.add_comments(comments, prepend=True) 1918 return stmt 1919 1920 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1921 return self._parse_command() 1922 1923 expression = self._parse_expression() 1924 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1925 return self._parse_query_modifiers(expression) 1926 1927 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1928 start = self._prev 1929 temporary = self._match(TokenType.TEMPORARY) 1930 materialized = self._match_text_seq("MATERIALIZED") 1931 1932 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1933 if not kind: 1934 return self._parse_as_command(start) 1935 1936 concurrently = self._match_text_seq("CONCURRENTLY") 1937 if_exists = exists or self._parse_exists() 1938 1939 if kind == "COLUMN": 1940 this = self._parse_column() 1941 else: 1942 this = self._parse_table_parts( 1943 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1944 ) 1945 1946 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1947 1948 if self._match(TokenType.L_PAREN, advance=False): 1949 expressions = self._parse_wrapped_csv(self._parse_types) 1950 else: 1951 expressions = None 1952 1953 return self.expression( 1954 exp.Drop, 1955 exists=if_exists, 1956 this=this, 1957 expressions=expressions, 1958 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1959 temporary=temporary, 1960 materialized=materialized, 1961 cascade=self._match_text_seq("CASCADE"), 1962 constraints=self._match_text_seq("CONSTRAINTS"), 1963 purge=self._match_text_seq("PURGE"), 1964 cluster=cluster, 1965 concurrently=concurrently, 1966 ) 1967 1968 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1969 return ( 1970 self._match_text_seq("IF") 1971 and (not not_ or self._match(TokenType.NOT)) 1972 and self._match(TokenType.EXISTS) 1973 ) 1974 1975 def _parse_create(self) -> exp.Create | exp.Command: 1976 # Note: this can't be None because we've matched a statement parser 1977 start = self._prev 1978 1979 replace = ( 1980 start.token_type == TokenType.REPLACE 1981 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1982 or self._match_pair(TokenType.OR, TokenType.ALTER) 1983 ) 1984 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1985 1986 unique = self._match(TokenType.UNIQUE) 1987 1988 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1989 clustered = True 1990 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1991 "COLUMNSTORE" 1992 ): 1993 clustered = False 1994 else: 1995 clustered = None 1996 1997 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1998 self._advance() 1999 2000 properties = None 2001 create_token = self._match_set(self.CREATABLES) and self._prev 2002 2003 if not create_token: 2004 # exp.Properties.Location.POST_CREATE 2005 properties = self._parse_properties() 2006 create_token = self._match_set(self.CREATABLES) and self._prev 2007 2008 if not properties or not create_token: 2009 return self._parse_as_command(start) 2010 2011 concurrently = self._match_text_seq("CONCURRENTLY") 2012 exists = self._parse_exists(not_=True) 2013 this = None 2014 expression: t.Optional[exp.Expression] = None 2015 indexes = None 2016 no_schema_binding = None 2017 begin = None 2018 end = None 2019 clone = None 2020 2021 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2022 nonlocal properties 2023 if properties and temp_props: 2024 properties.expressions.extend(temp_props.expressions) 2025 elif temp_props: 2026 properties = temp_props 2027 2028 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2029 this = self._parse_user_defined_function(kind=create_token.token_type) 2030 2031 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2032 extend_props(self._parse_properties()) 2033 2034 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2035 extend_props(self._parse_properties()) 2036 2037 if not expression: 2038 if self._match(TokenType.COMMAND): 2039 expression = self._parse_as_command(self._prev) 2040 else: 2041 begin = self._match(TokenType.BEGIN) 2042 return_ = self._match_text_seq("RETURN") 2043 2044 if self._match(TokenType.STRING, advance=False): 2045 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2046 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2047 expression = self._parse_string() 2048 extend_props(self._parse_properties()) 2049 else: 2050 expression = self._parse_user_defined_function_expression() 2051 2052 end = self._match_text_seq("END") 2053 2054 if return_: 2055 expression = self.expression(exp.Return, this=expression) 2056 elif create_token.token_type == TokenType.INDEX: 2057 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2058 if not self._match(TokenType.ON): 2059 index = self._parse_id_var() 2060 anonymous = False 2061 else: 2062 index = None 2063 anonymous = True 2064 2065 this = self._parse_index(index=index, anonymous=anonymous) 2066 elif create_token.token_type in self.DB_CREATABLES: 2067 table_parts = self._parse_table_parts( 2068 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2069 ) 2070 2071 # exp.Properties.Location.POST_NAME 2072 self._match(TokenType.COMMA) 2073 extend_props(self._parse_properties(before=True)) 2074 2075 this = self._parse_schema(this=table_parts) 2076 2077 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2078 extend_props(self._parse_properties()) 2079 2080 has_alias = self._match(TokenType.ALIAS) 2081 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2082 # exp.Properties.Location.POST_ALIAS 2083 extend_props(self._parse_properties()) 2084 2085 if create_token.token_type == TokenType.SEQUENCE: 2086 expression = self._parse_types() 2087 props = self._parse_properties() 2088 if props: 2089 sequence_props = exp.SequenceProperties() 2090 options = [] 2091 for prop in props: 2092 if isinstance(prop, exp.SequenceProperties): 2093 for arg, value in prop.args.items(): 2094 if arg == "options": 2095 options.extend(value) 2096 else: 2097 sequence_props.set(arg, value) 2098 prop.pop() 2099 2100 if options: 2101 sequence_props.set("options", options) 2102 2103 props.append("expressions", sequence_props) 2104 extend_props(props) 2105 else: 2106 expression = self._parse_ddl_select() 2107 2108 # Some dialects also support using a table as an alias instead of a SELECT. 2109 # Here we fallback to this as an alternative. 2110 if not expression and has_alias: 2111 expression = self._try_parse(self._parse_table_parts) 2112 2113 if create_token.token_type == TokenType.TABLE: 2114 # exp.Properties.Location.POST_EXPRESSION 2115 extend_props(self._parse_properties()) 2116 2117 indexes = [] 2118 while True: 2119 index = self._parse_index() 2120 2121 # exp.Properties.Location.POST_INDEX 2122 extend_props(self._parse_properties()) 2123 if not index: 2124 break 2125 else: 2126 self._match(TokenType.COMMA) 2127 indexes.append(index) 2128 elif create_token.token_type == TokenType.VIEW: 2129 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2130 no_schema_binding = True 2131 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2132 extend_props(self._parse_properties()) 2133 2134 shallow = self._match_text_seq("SHALLOW") 2135 2136 if self._match_texts(self.CLONE_KEYWORDS): 2137 copy = self._prev.text.lower() == "copy" 2138 clone = self.expression( 2139 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2140 ) 2141 2142 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2143 return self._parse_as_command(start) 2144 2145 create_kind_text = create_token.text.upper() 2146 return self.expression( 2147 exp.Create, 2148 this=this, 2149 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2150 replace=replace, 2151 refresh=refresh, 2152 unique=unique, 2153 expression=expression, 2154 exists=exists, 2155 properties=properties, 2156 indexes=indexes, 2157 no_schema_binding=no_schema_binding, 2158 begin=begin, 2159 end=end, 2160 clone=clone, 2161 concurrently=concurrently, 2162 clustered=clustered, 2163 ) 2164 2165 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2166 seq = exp.SequenceProperties() 2167 2168 options = [] 2169 index = self._index 2170 2171 while self._curr: 2172 self._match(TokenType.COMMA) 2173 if self._match_text_seq("INCREMENT"): 2174 self._match_text_seq("BY") 2175 self._match_text_seq("=") 2176 seq.set("increment", self._parse_term()) 2177 elif self._match_text_seq("MINVALUE"): 2178 seq.set("minvalue", self._parse_term()) 2179 elif self._match_text_seq("MAXVALUE"): 2180 seq.set("maxvalue", self._parse_term()) 2181 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2182 self._match_text_seq("=") 2183 seq.set("start", self._parse_term()) 2184 elif self._match_text_seq("CACHE"): 2185 # T-SQL allows empty CACHE which is initialized dynamically 2186 seq.set("cache", self._parse_number() or True) 2187 elif self._match_text_seq("OWNED", "BY"): 2188 # "OWNED BY NONE" is the default 2189 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2190 else: 2191 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2192 if opt: 2193 options.append(opt) 2194 else: 2195 break 2196 2197 seq.set("options", options if options else None) 2198 return None if self._index == index else seq 2199 2200 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2201 # only used for teradata currently 2202 self._match(TokenType.COMMA) 2203 2204 kwargs = { 2205 "no": self._match_text_seq("NO"), 2206 "dual": self._match_text_seq("DUAL"), 2207 "before": self._match_text_seq("BEFORE"), 2208 "default": self._match_text_seq("DEFAULT"), 2209 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2210 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2211 "after": self._match_text_seq("AFTER"), 2212 "minimum": self._match_texts(("MIN", "MINIMUM")), 2213 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2214 } 2215 2216 if self._match_texts(self.PROPERTY_PARSERS): 2217 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2218 try: 2219 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2220 except TypeError: 2221 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2222 2223 return None 2224 2225 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2226 return self._parse_wrapped_csv(self._parse_property) 2227 2228 def _parse_property(self) -> t.Optional[exp.Expression]: 2229 if self._match_texts(self.PROPERTY_PARSERS): 2230 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2231 2232 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2233 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2234 2235 if self._match_text_seq("COMPOUND", "SORTKEY"): 2236 return self._parse_sortkey(compound=True) 2237 2238 if self._match_text_seq("SQL", "SECURITY"): 2239 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2240 2241 index = self._index 2242 2243 seq_props = self._parse_sequence_properties() 2244 if seq_props: 2245 return seq_props 2246 2247 self._retreat(index) 2248 key = self._parse_column() 2249 2250 if not self._match(TokenType.EQ): 2251 self._retreat(index) 2252 return None 2253 2254 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2255 if isinstance(key, exp.Column): 2256 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2257 2258 value = self._parse_bitwise() or self._parse_var(any_token=True) 2259 2260 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2261 if isinstance(value, exp.Column): 2262 value = exp.var(value.name) 2263 2264 return self.expression(exp.Property, this=key, value=value) 2265 2266 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2267 if self._match_text_seq("BY"): 2268 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2269 2270 self._match(TokenType.ALIAS) 2271 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2272 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2273 2274 return self.expression( 2275 exp.FileFormatProperty, 2276 this=( 2277 self.expression( 2278 exp.InputOutputFormat, 2279 input_format=input_format, 2280 output_format=output_format, 2281 ) 2282 if input_format or output_format 2283 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2284 ), 2285 hive_format=True, 2286 ) 2287 2288 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2289 field = self._parse_field() 2290 if isinstance(field, exp.Identifier) and not field.quoted: 2291 field = exp.var(field) 2292 2293 return field 2294 2295 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2296 self._match(TokenType.EQ) 2297 self._match(TokenType.ALIAS) 2298 2299 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2300 2301 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2302 properties = [] 2303 while True: 2304 if before: 2305 prop = self._parse_property_before() 2306 else: 2307 prop = self._parse_property() 2308 if not prop: 2309 break 2310 for p in ensure_list(prop): 2311 properties.append(p) 2312 2313 if properties: 2314 return self.expression(exp.Properties, expressions=properties) 2315 2316 return None 2317 2318 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2319 return self.expression( 2320 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2321 ) 2322 2323 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2324 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2325 security_specifier = self._prev.text.upper() 2326 return self.expression(exp.SecurityProperty, this=security_specifier) 2327 return None 2328 2329 def _parse_settings_property(self) -> exp.SettingsProperty: 2330 return self.expression( 2331 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2332 ) 2333 2334 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2335 if self._index >= 2: 2336 pre_volatile_token = self._tokens[self._index - 2] 2337 else: 2338 pre_volatile_token = None 2339 2340 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2341 return exp.VolatileProperty() 2342 2343 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2344 2345 def _parse_retention_period(self) -> exp.Var: 2346 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2347 number = self._parse_number() 2348 number_str = f"{number} " if number else "" 2349 unit = self._parse_var(any_token=True) 2350 return exp.var(f"{number_str}{unit}") 2351 2352 def _parse_system_versioning_property( 2353 self, with_: bool = False 2354 ) -> exp.WithSystemVersioningProperty: 2355 self._match(TokenType.EQ) 2356 prop = self.expression( 2357 exp.WithSystemVersioningProperty, 2358 **{ # type: ignore 2359 "on": True, 2360 "with": with_, 2361 }, 2362 ) 2363 2364 if self._match_text_seq("OFF"): 2365 prop.set("on", False) 2366 return prop 2367 2368 self._match(TokenType.ON) 2369 if self._match(TokenType.L_PAREN): 2370 while self._curr and not self._match(TokenType.R_PAREN): 2371 if self._match_text_seq("HISTORY_TABLE", "="): 2372 prop.set("this", self._parse_table_parts()) 2373 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2374 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2375 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2376 prop.set("retention_period", self._parse_retention_period()) 2377 2378 self._match(TokenType.COMMA) 2379 2380 return prop 2381 2382 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2383 self._match(TokenType.EQ) 2384 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2385 prop = self.expression(exp.DataDeletionProperty, on=on) 2386 2387 if self._match(TokenType.L_PAREN): 2388 while self._curr and not self._match(TokenType.R_PAREN): 2389 if self._match_text_seq("FILTER_COLUMN", "="): 2390 prop.set("filter_column", self._parse_column()) 2391 elif self._match_text_seq("RETENTION_PERIOD", "="): 2392 prop.set("retention_period", self._parse_retention_period()) 2393 2394 self._match(TokenType.COMMA) 2395 2396 return prop 2397 2398 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2399 kind = "HASH" 2400 expressions: t.Optional[t.List[exp.Expression]] = None 2401 if self._match_text_seq("BY", "HASH"): 2402 expressions = self._parse_wrapped_csv(self._parse_id_var) 2403 elif self._match_text_seq("BY", "RANDOM"): 2404 kind = "RANDOM" 2405 2406 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2407 buckets: t.Optional[exp.Expression] = None 2408 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2409 buckets = self._parse_number() 2410 2411 return self.expression( 2412 exp.DistributedByProperty, 2413 expressions=expressions, 2414 kind=kind, 2415 buckets=buckets, 2416 order=self._parse_order(), 2417 ) 2418 2419 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2420 self._match_text_seq("KEY") 2421 expressions = self._parse_wrapped_id_vars() 2422 return self.expression(expr_type, expressions=expressions) 2423 2424 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2425 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2426 prop = self._parse_system_versioning_property(with_=True) 2427 self._match_r_paren() 2428 return prop 2429 2430 if self._match(TokenType.L_PAREN, advance=False): 2431 return self._parse_wrapped_properties() 2432 2433 if self._match_text_seq("JOURNAL"): 2434 return self._parse_withjournaltable() 2435 2436 if self._match_texts(self.VIEW_ATTRIBUTES): 2437 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2438 2439 if self._match_text_seq("DATA"): 2440 return self._parse_withdata(no=False) 2441 elif self._match_text_seq("NO", "DATA"): 2442 return self._parse_withdata(no=True) 2443 2444 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2445 return self._parse_serde_properties(with_=True) 2446 2447 if self._match(TokenType.SCHEMA): 2448 return self.expression( 2449 exp.WithSchemaBindingProperty, 2450 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2451 ) 2452 2453 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2454 return self.expression( 2455 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2456 ) 2457 2458 if not self._next: 2459 return None 2460 2461 return self._parse_withisolatedloading() 2462 2463 def _parse_procedure_option(self) -> exp.Expression | None: 2464 if self._match_text_seq("EXECUTE", "AS"): 2465 return self.expression( 2466 exp.ExecuteAsProperty, 2467 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2468 or self._parse_string(), 2469 ) 2470 2471 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2472 2473 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2474 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2475 self._match(TokenType.EQ) 2476 2477 user = self._parse_id_var() 2478 self._match(TokenType.PARAMETER) 2479 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2480 2481 if not user or not host: 2482 return None 2483 2484 return exp.DefinerProperty(this=f"{user}@{host}") 2485 2486 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2487 self._match(TokenType.TABLE) 2488 self._match(TokenType.EQ) 2489 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2490 2491 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2492 return self.expression(exp.LogProperty, no=no) 2493 2494 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2495 return self.expression(exp.JournalProperty, **kwargs) 2496 2497 def _parse_checksum(self) -> exp.ChecksumProperty: 2498 self._match(TokenType.EQ) 2499 2500 on = None 2501 if self._match(TokenType.ON): 2502 on = True 2503 elif self._match_text_seq("OFF"): 2504 on = False 2505 2506 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2507 2508 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2509 return self.expression( 2510 exp.Cluster, 2511 expressions=( 2512 self._parse_wrapped_csv(self._parse_ordered) 2513 if wrapped 2514 else self._parse_csv(self._parse_ordered) 2515 ), 2516 ) 2517 2518 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2519 self._match_text_seq("BY") 2520 2521 self._match_l_paren() 2522 expressions = self._parse_csv(self._parse_column) 2523 self._match_r_paren() 2524 2525 if self._match_text_seq("SORTED", "BY"): 2526 self._match_l_paren() 2527 sorted_by = self._parse_csv(self._parse_ordered) 2528 self._match_r_paren() 2529 else: 2530 sorted_by = None 2531 2532 self._match(TokenType.INTO) 2533 buckets = self._parse_number() 2534 self._match_text_seq("BUCKETS") 2535 2536 return self.expression( 2537 exp.ClusteredByProperty, 2538 expressions=expressions, 2539 sorted_by=sorted_by, 2540 buckets=buckets, 2541 ) 2542 2543 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2544 if not self._match_text_seq("GRANTS"): 2545 self._retreat(self._index - 1) 2546 return None 2547 2548 return self.expression(exp.CopyGrantsProperty) 2549 2550 def _parse_freespace(self) -> exp.FreespaceProperty: 2551 self._match(TokenType.EQ) 2552 return self.expression( 2553 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2554 ) 2555 2556 def _parse_mergeblockratio( 2557 self, no: bool = False, default: bool = False 2558 ) -> exp.MergeBlockRatioProperty: 2559 if self._match(TokenType.EQ): 2560 return self.expression( 2561 exp.MergeBlockRatioProperty, 2562 this=self._parse_number(), 2563 percent=self._match(TokenType.PERCENT), 2564 ) 2565 2566 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2567 2568 def _parse_datablocksize( 2569 self, 2570 default: t.Optional[bool] = None, 2571 minimum: t.Optional[bool] = None, 2572 maximum: t.Optional[bool] = None, 2573 ) -> exp.DataBlocksizeProperty: 2574 self._match(TokenType.EQ) 2575 size = self._parse_number() 2576 2577 units = None 2578 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2579 units = self._prev.text 2580 2581 return self.expression( 2582 exp.DataBlocksizeProperty, 2583 size=size, 2584 units=units, 2585 default=default, 2586 minimum=minimum, 2587 maximum=maximum, 2588 ) 2589 2590 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2591 self._match(TokenType.EQ) 2592 always = self._match_text_seq("ALWAYS") 2593 manual = self._match_text_seq("MANUAL") 2594 never = self._match_text_seq("NEVER") 2595 default = self._match_text_seq("DEFAULT") 2596 2597 autotemp = None 2598 if self._match_text_seq("AUTOTEMP"): 2599 autotemp = self._parse_schema() 2600 2601 return self.expression( 2602 exp.BlockCompressionProperty, 2603 always=always, 2604 manual=manual, 2605 never=never, 2606 default=default, 2607 autotemp=autotemp, 2608 ) 2609 2610 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2611 index = self._index 2612 no = self._match_text_seq("NO") 2613 concurrent = self._match_text_seq("CONCURRENT") 2614 2615 if not self._match_text_seq("ISOLATED", "LOADING"): 2616 self._retreat(index) 2617 return None 2618 2619 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2620 return self.expression( 2621 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2622 ) 2623 2624 def _parse_locking(self) -> exp.LockingProperty: 2625 if self._match(TokenType.TABLE): 2626 kind = "TABLE" 2627 elif self._match(TokenType.VIEW): 2628 kind = "VIEW" 2629 elif self._match(TokenType.ROW): 2630 kind = "ROW" 2631 elif self._match_text_seq("DATABASE"): 2632 kind = "DATABASE" 2633 else: 2634 kind = None 2635 2636 if kind in ("DATABASE", "TABLE", "VIEW"): 2637 this = self._parse_table_parts() 2638 else: 2639 this = None 2640 2641 if self._match(TokenType.FOR): 2642 for_or_in = "FOR" 2643 elif self._match(TokenType.IN): 2644 for_or_in = "IN" 2645 else: 2646 for_or_in = None 2647 2648 if self._match_text_seq("ACCESS"): 2649 lock_type = "ACCESS" 2650 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2651 lock_type = "EXCLUSIVE" 2652 elif self._match_text_seq("SHARE"): 2653 lock_type = "SHARE" 2654 elif self._match_text_seq("READ"): 2655 lock_type = "READ" 2656 elif self._match_text_seq("WRITE"): 2657 lock_type = "WRITE" 2658 elif self._match_text_seq("CHECKSUM"): 2659 lock_type = "CHECKSUM" 2660 else: 2661 lock_type = None 2662 2663 override = self._match_text_seq("OVERRIDE") 2664 2665 return self.expression( 2666 exp.LockingProperty, 2667 this=this, 2668 kind=kind, 2669 for_or_in=for_or_in, 2670 lock_type=lock_type, 2671 override=override, 2672 ) 2673 2674 def _parse_partition_by(self) -> t.List[exp.Expression]: 2675 if self._match(TokenType.PARTITION_BY): 2676 return self._parse_csv(self._parse_assignment) 2677 return [] 2678 2679 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2680 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2681 if self._match_text_seq("MINVALUE"): 2682 return exp.var("MINVALUE") 2683 if self._match_text_seq("MAXVALUE"): 2684 return exp.var("MAXVALUE") 2685 return self._parse_bitwise() 2686 2687 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2688 expression = None 2689 from_expressions = None 2690 to_expressions = None 2691 2692 if self._match(TokenType.IN): 2693 this = self._parse_wrapped_csv(self._parse_bitwise) 2694 elif self._match(TokenType.FROM): 2695 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2696 self._match_text_seq("TO") 2697 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2698 elif self._match_text_seq("WITH", "(", "MODULUS"): 2699 this = self._parse_number() 2700 self._match_text_seq(",", "REMAINDER") 2701 expression = self._parse_number() 2702 self._match_r_paren() 2703 else: 2704 self.raise_error("Failed to parse partition bound spec.") 2705 2706 return self.expression( 2707 exp.PartitionBoundSpec, 2708 this=this, 2709 expression=expression, 2710 from_expressions=from_expressions, 2711 to_expressions=to_expressions, 2712 ) 2713 2714 # https://www.postgresql.org/docs/current/sql-createtable.html 2715 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2716 if not self._match_text_seq("OF"): 2717 self._retreat(self._index - 1) 2718 return None 2719 2720 this = self._parse_table(schema=True) 2721 2722 if self._match(TokenType.DEFAULT): 2723 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2724 elif self._match_text_seq("FOR", "VALUES"): 2725 expression = self._parse_partition_bound_spec() 2726 else: 2727 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2728 2729 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2730 2731 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2732 self._match(TokenType.EQ) 2733 return self.expression( 2734 exp.PartitionedByProperty, 2735 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2736 ) 2737 2738 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2739 if self._match_text_seq("AND", "STATISTICS"): 2740 statistics = True 2741 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2742 statistics = False 2743 else: 2744 statistics = None 2745 2746 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2747 2748 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2749 if self._match_text_seq("SQL"): 2750 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2751 return None 2752 2753 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2754 if self._match_text_seq("SQL", "DATA"): 2755 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2756 return None 2757 2758 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2759 if self._match_text_seq("PRIMARY", "INDEX"): 2760 return exp.NoPrimaryIndexProperty() 2761 if self._match_text_seq("SQL"): 2762 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2763 return None 2764 2765 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2766 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2767 return exp.OnCommitProperty() 2768 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2769 return exp.OnCommitProperty(delete=True) 2770 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2771 2772 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2773 if self._match_text_seq("SQL", "DATA"): 2774 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2775 return None 2776 2777 def _parse_distkey(self) -> exp.DistKeyProperty: 2778 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2779 2780 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2781 table = self._parse_table(schema=True) 2782 2783 options = [] 2784 while self._match_texts(("INCLUDING", "EXCLUDING")): 2785 this = self._prev.text.upper() 2786 2787 id_var = self._parse_id_var() 2788 if not id_var: 2789 return None 2790 2791 options.append( 2792 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2793 ) 2794 2795 return self.expression(exp.LikeProperty, this=table, expressions=options) 2796 2797 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2798 return self.expression( 2799 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2800 ) 2801 2802 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2803 self._match(TokenType.EQ) 2804 return self.expression( 2805 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2806 ) 2807 2808 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2809 self._match_text_seq("WITH", "CONNECTION") 2810 return self.expression( 2811 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2812 ) 2813 2814 def _parse_returns(self) -> exp.ReturnsProperty: 2815 value: t.Optional[exp.Expression] 2816 null = None 2817 is_table = self._match(TokenType.TABLE) 2818 2819 if is_table: 2820 if self._match(TokenType.LT): 2821 value = self.expression( 2822 exp.Schema, 2823 this="TABLE", 2824 expressions=self._parse_csv(self._parse_struct_types), 2825 ) 2826 if not self._match(TokenType.GT): 2827 self.raise_error("Expecting >") 2828 else: 2829 value = self._parse_schema(exp.var("TABLE")) 2830 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2831 null = True 2832 value = None 2833 else: 2834 value = self._parse_types() 2835 2836 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2837 2838 def _parse_describe(self) -> exp.Describe: 2839 kind = self._match_set(self.CREATABLES) and self._prev.text 2840 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2841 if self._match(TokenType.DOT): 2842 style = None 2843 self._retreat(self._index - 2) 2844 2845 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2846 2847 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2848 this = self._parse_statement() 2849 else: 2850 this = self._parse_table(schema=True) 2851 2852 properties = self._parse_properties() 2853 expressions = properties.expressions if properties else None 2854 partition = self._parse_partition() 2855 return self.expression( 2856 exp.Describe, 2857 this=this, 2858 style=style, 2859 kind=kind, 2860 expressions=expressions, 2861 partition=partition, 2862 format=format, 2863 ) 2864 2865 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2866 kind = self._prev.text.upper() 2867 expressions = [] 2868 2869 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2870 if self._match(TokenType.WHEN): 2871 expression = self._parse_disjunction() 2872 self._match(TokenType.THEN) 2873 else: 2874 expression = None 2875 2876 else_ = self._match(TokenType.ELSE) 2877 2878 if not self._match(TokenType.INTO): 2879 return None 2880 2881 return self.expression( 2882 exp.ConditionalInsert, 2883 this=self.expression( 2884 exp.Insert, 2885 this=self._parse_table(schema=True), 2886 expression=self._parse_derived_table_values(), 2887 ), 2888 expression=expression, 2889 else_=else_, 2890 ) 2891 2892 expression = parse_conditional_insert() 2893 while expression is not None: 2894 expressions.append(expression) 2895 expression = parse_conditional_insert() 2896 2897 return self.expression( 2898 exp.MultitableInserts, 2899 kind=kind, 2900 comments=comments, 2901 expressions=expressions, 2902 source=self._parse_table(), 2903 ) 2904 2905 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2906 comments = [] 2907 hint = self._parse_hint() 2908 overwrite = self._match(TokenType.OVERWRITE) 2909 ignore = self._match(TokenType.IGNORE) 2910 local = self._match_text_seq("LOCAL") 2911 alternative = None 2912 is_function = None 2913 2914 if self._match_text_seq("DIRECTORY"): 2915 this: t.Optional[exp.Expression] = self.expression( 2916 exp.Directory, 2917 this=self._parse_var_or_string(), 2918 local=local, 2919 row_format=self._parse_row_format(match_row=True), 2920 ) 2921 else: 2922 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2923 comments += ensure_list(self._prev_comments) 2924 return self._parse_multitable_inserts(comments) 2925 2926 if self._match(TokenType.OR): 2927 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2928 2929 self._match(TokenType.INTO) 2930 comments += ensure_list(self._prev_comments) 2931 self._match(TokenType.TABLE) 2932 is_function = self._match(TokenType.FUNCTION) 2933 2934 this = ( 2935 self._parse_table(schema=True, parse_partition=True) 2936 if not is_function 2937 else self._parse_function() 2938 ) 2939 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2940 this.set("alias", self._parse_table_alias()) 2941 2942 returning = self._parse_returning() 2943 2944 return self.expression( 2945 exp.Insert, 2946 comments=comments, 2947 hint=hint, 2948 is_function=is_function, 2949 this=this, 2950 stored=self._match_text_seq("STORED") and self._parse_stored(), 2951 by_name=self._match_text_seq("BY", "NAME"), 2952 exists=self._parse_exists(), 2953 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2954 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2955 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2956 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2957 conflict=self._parse_on_conflict(), 2958 returning=returning or self._parse_returning(), 2959 overwrite=overwrite, 2960 alternative=alternative, 2961 ignore=ignore, 2962 source=self._match(TokenType.TABLE) and self._parse_table(), 2963 ) 2964 2965 def _parse_kill(self) -> exp.Kill: 2966 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2967 2968 return self.expression( 2969 exp.Kill, 2970 this=self._parse_primary(), 2971 kind=kind, 2972 ) 2973 2974 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2975 conflict = self._match_text_seq("ON", "CONFLICT") 2976 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2977 2978 if not conflict and not duplicate: 2979 return None 2980 2981 conflict_keys = None 2982 constraint = None 2983 2984 if conflict: 2985 if self._match_text_seq("ON", "CONSTRAINT"): 2986 constraint = self._parse_id_var() 2987 elif self._match(TokenType.L_PAREN): 2988 conflict_keys = self._parse_csv(self._parse_id_var) 2989 self._match_r_paren() 2990 2991 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2992 if self._prev.token_type == TokenType.UPDATE: 2993 self._match(TokenType.SET) 2994 expressions = self._parse_csv(self._parse_equality) 2995 else: 2996 expressions = None 2997 2998 return self.expression( 2999 exp.OnConflict, 3000 duplicate=duplicate, 3001 expressions=expressions, 3002 action=action, 3003 conflict_keys=conflict_keys, 3004 constraint=constraint, 3005 where=self._parse_where(), 3006 ) 3007 3008 def _parse_returning(self) -> t.Optional[exp.Returning]: 3009 if not self._match(TokenType.RETURNING): 3010 return None 3011 return self.expression( 3012 exp.Returning, 3013 expressions=self._parse_csv(self._parse_expression), 3014 into=self._match(TokenType.INTO) and self._parse_table_part(), 3015 ) 3016 3017 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3018 if not self._match(TokenType.FORMAT): 3019 return None 3020 return self._parse_row_format() 3021 3022 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3023 index = self._index 3024 with_ = with_ or self._match_text_seq("WITH") 3025 3026 if not self._match(TokenType.SERDE_PROPERTIES): 3027 self._retreat(index) 3028 return None 3029 return self.expression( 3030 exp.SerdeProperties, 3031 **{ # type: ignore 3032 "expressions": self._parse_wrapped_properties(), 3033 "with": with_, 3034 }, 3035 ) 3036 3037 def _parse_row_format( 3038 self, match_row: bool = False 3039 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3040 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3041 return None 3042 3043 if self._match_text_seq("SERDE"): 3044 this = self._parse_string() 3045 3046 serde_properties = self._parse_serde_properties() 3047 3048 return self.expression( 3049 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3050 ) 3051 3052 self._match_text_seq("DELIMITED") 3053 3054 kwargs = {} 3055 3056 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3057 kwargs["fields"] = self._parse_string() 3058 if self._match_text_seq("ESCAPED", "BY"): 3059 kwargs["escaped"] = self._parse_string() 3060 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3061 kwargs["collection_items"] = self._parse_string() 3062 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3063 kwargs["map_keys"] = self._parse_string() 3064 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3065 kwargs["lines"] = self._parse_string() 3066 if self._match_text_seq("NULL", "DEFINED", "AS"): 3067 kwargs["null"] = self._parse_string() 3068 3069 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3070 3071 def _parse_load(self) -> exp.LoadData | exp.Command: 3072 if self._match_text_seq("DATA"): 3073 local = self._match_text_seq("LOCAL") 3074 self._match_text_seq("INPATH") 3075 inpath = self._parse_string() 3076 overwrite = self._match(TokenType.OVERWRITE) 3077 self._match_pair(TokenType.INTO, TokenType.TABLE) 3078 3079 return self.expression( 3080 exp.LoadData, 3081 this=self._parse_table(schema=True), 3082 local=local, 3083 overwrite=overwrite, 3084 inpath=inpath, 3085 partition=self._parse_partition(), 3086 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3087 serde=self._match_text_seq("SERDE") and self._parse_string(), 3088 ) 3089 return self._parse_as_command(self._prev) 3090 3091 def _parse_delete(self) -> exp.Delete: 3092 # This handles MySQL's "Multiple-Table Syntax" 3093 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3094 tables = None 3095 if not self._match(TokenType.FROM, advance=False): 3096 tables = self._parse_csv(self._parse_table) or None 3097 3098 returning = self._parse_returning() 3099 3100 return self.expression( 3101 exp.Delete, 3102 tables=tables, 3103 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3104 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3105 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3106 where=self._parse_where(), 3107 returning=returning or self._parse_returning(), 3108 limit=self._parse_limit(), 3109 ) 3110 3111 def _parse_update(self) -> exp.Update: 3112 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3113 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3114 returning = self._parse_returning() 3115 return self.expression( 3116 exp.Update, 3117 **{ # type: ignore 3118 "this": this, 3119 "expressions": expressions, 3120 "from": self._parse_from(joins=True), 3121 "where": self._parse_where(), 3122 "returning": returning or self._parse_returning(), 3123 "order": self._parse_order(), 3124 "limit": self._parse_limit(), 3125 }, 3126 ) 3127 3128 def _parse_use(self) -> exp.Use: 3129 return self.expression( 3130 exp.Use, 3131 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3132 this=self._parse_table(schema=False), 3133 ) 3134 3135 def _parse_uncache(self) -> exp.Uncache: 3136 if not self._match(TokenType.TABLE): 3137 self.raise_error("Expecting TABLE after UNCACHE") 3138 3139 return self.expression( 3140 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3141 ) 3142 3143 def _parse_cache(self) -> exp.Cache: 3144 lazy = self._match_text_seq("LAZY") 3145 self._match(TokenType.TABLE) 3146 table = self._parse_table(schema=True) 3147 3148 options = [] 3149 if self._match_text_seq("OPTIONS"): 3150 self._match_l_paren() 3151 k = self._parse_string() 3152 self._match(TokenType.EQ) 3153 v = self._parse_string() 3154 options = [k, v] 3155 self._match_r_paren() 3156 3157 self._match(TokenType.ALIAS) 3158 return self.expression( 3159 exp.Cache, 3160 this=table, 3161 lazy=lazy, 3162 options=options, 3163 expression=self._parse_select(nested=True), 3164 ) 3165 3166 def _parse_partition(self) -> t.Optional[exp.Partition]: 3167 if not self._match_texts(self.PARTITION_KEYWORDS): 3168 return None 3169 3170 return self.expression( 3171 exp.Partition, 3172 subpartition=self._prev.text.upper() == "SUBPARTITION", 3173 expressions=self._parse_wrapped_csv(self._parse_assignment), 3174 ) 3175 3176 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3177 def _parse_value_expression() -> t.Optional[exp.Expression]: 3178 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3179 return exp.var(self._prev.text.upper()) 3180 return self._parse_expression() 3181 3182 if self._match(TokenType.L_PAREN): 3183 expressions = self._parse_csv(_parse_value_expression) 3184 self._match_r_paren() 3185 return self.expression(exp.Tuple, expressions=expressions) 3186 3187 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3188 expression = self._parse_expression() 3189 if expression: 3190 return self.expression(exp.Tuple, expressions=[expression]) 3191 return None 3192 3193 def _parse_projections(self) -> t.List[exp.Expression]: 3194 return self._parse_expressions() 3195 3196 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3197 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3198 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3199 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3200 ) 3201 elif self._match(TokenType.FROM): 3202 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3203 # Support parentheses for duckdb FROM-first syntax 3204 select = self._parse_select() 3205 if select: 3206 select.set("from", from_) 3207 this = select 3208 else: 3209 this = exp.select("*").from_(t.cast(exp.From, from_)) 3210 else: 3211 this = ( 3212 self._parse_table(consume_pipe=True) 3213 if table 3214 else self._parse_select(nested=True, parse_set_operation=False) 3215 ) 3216 3217 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3218 # in case a modifier (e.g. join) is following 3219 if table and isinstance(this, exp.Values) and this.alias: 3220 alias = this.args["alias"].pop() 3221 this = exp.Table(this=this, alias=alias) 3222 3223 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3224 3225 return this 3226 3227 def _parse_select( 3228 self, 3229 nested: bool = False, 3230 table: bool = False, 3231 parse_subquery_alias: bool = True, 3232 parse_set_operation: bool = True, 3233 consume_pipe: bool = True, 3234 ) -> t.Optional[exp.Expression]: 3235 query = self._parse_select_query( 3236 nested=nested, 3237 table=table, 3238 parse_subquery_alias=parse_subquery_alias, 3239 parse_set_operation=parse_set_operation, 3240 ) 3241 3242 if ( 3243 consume_pipe 3244 and self._match(TokenType.PIPE_GT, advance=False) 3245 and isinstance(query, exp.Query) 3246 ): 3247 query = self._parse_pipe_syntax_query(query) 3248 query = query.subquery(copy=False) if query and table else query 3249 3250 return query 3251 3252 def _parse_select_query( 3253 self, 3254 nested: bool = False, 3255 table: bool = False, 3256 parse_subquery_alias: bool = True, 3257 parse_set_operation: bool = True, 3258 ) -> t.Optional[exp.Expression]: 3259 cte = self._parse_with() 3260 3261 if cte: 3262 this = self._parse_statement() 3263 3264 if not this: 3265 self.raise_error("Failed to parse any statement following CTE") 3266 return cte 3267 3268 if "with" in this.arg_types: 3269 this.set("with", cte) 3270 else: 3271 self.raise_error(f"{this.key} does not support CTE") 3272 this = cte 3273 3274 return this 3275 3276 # duckdb supports leading with FROM x 3277 from_ = ( 3278 self._parse_from(consume_pipe=True) 3279 if self._match(TokenType.FROM, advance=False) 3280 else None 3281 ) 3282 3283 if self._match(TokenType.SELECT): 3284 comments = self._prev_comments 3285 3286 hint = self._parse_hint() 3287 3288 if self._next and not self._next.token_type == TokenType.DOT: 3289 all_ = self._match(TokenType.ALL) 3290 distinct = self._match_set(self.DISTINCT_TOKENS) 3291 else: 3292 all_, distinct = None, None 3293 3294 kind = ( 3295 self._match(TokenType.ALIAS) 3296 and self._match_texts(("STRUCT", "VALUE")) 3297 and self._prev.text.upper() 3298 ) 3299 3300 if distinct: 3301 distinct = self.expression( 3302 exp.Distinct, 3303 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3304 ) 3305 3306 if all_ and distinct: 3307 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3308 3309 operation_modifiers = [] 3310 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3311 operation_modifiers.append(exp.var(self._prev.text.upper())) 3312 3313 limit = self._parse_limit(top=True) 3314 projections = self._parse_projections() 3315 3316 this = self.expression( 3317 exp.Select, 3318 kind=kind, 3319 hint=hint, 3320 distinct=distinct, 3321 expressions=projections, 3322 limit=limit, 3323 operation_modifiers=operation_modifiers or None, 3324 ) 3325 this.comments = comments 3326 3327 into = self._parse_into() 3328 if into: 3329 this.set("into", into) 3330 3331 if not from_: 3332 from_ = self._parse_from() 3333 3334 if from_: 3335 this.set("from", from_) 3336 3337 this = self._parse_query_modifiers(this) 3338 elif (table or nested) and self._match(TokenType.L_PAREN): 3339 this = self._parse_wrapped_select(table=table) 3340 3341 # We return early here so that the UNION isn't attached to the subquery by the 3342 # following call to _parse_set_operations, but instead becomes the parent node 3343 self._match_r_paren() 3344 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3345 elif self._match(TokenType.VALUES, advance=False): 3346 this = self._parse_derived_table_values() 3347 elif from_: 3348 this = exp.select("*").from_(from_.this, copy=False) 3349 elif self._match(TokenType.SUMMARIZE): 3350 table = self._match(TokenType.TABLE) 3351 this = self._parse_select() or self._parse_string() or self._parse_table() 3352 return self.expression(exp.Summarize, this=this, table=table) 3353 elif self._match(TokenType.DESCRIBE): 3354 this = self._parse_describe() 3355 elif self._match_text_seq("STREAM"): 3356 this = self._parse_function() 3357 if this: 3358 this = self.expression(exp.Stream, this=this) 3359 else: 3360 self._retreat(self._index - 1) 3361 else: 3362 this = None 3363 3364 return self._parse_set_operations(this) if parse_set_operation else this 3365 3366 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3367 self._match_text_seq("SEARCH") 3368 3369 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3370 3371 if not kind: 3372 return None 3373 3374 self._match_text_seq("FIRST", "BY") 3375 3376 return self.expression( 3377 exp.RecursiveWithSearch, 3378 kind=kind, 3379 this=self._parse_id_var(), 3380 expression=self._match_text_seq("SET") and self._parse_id_var(), 3381 using=self._match_text_seq("USING") and self._parse_id_var(), 3382 ) 3383 3384 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3385 if not skip_with_token and not self._match(TokenType.WITH): 3386 return None 3387 3388 comments = self._prev_comments 3389 recursive = self._match(TokenType.RECURSIVE) 3390 3391 last_comments = None 3392 expressions = [] 3393 while True: 3394 cte = self._parse_cte() 3395 if isinstance(cte, exp.CTE): 3396 expressions.append(cte) 3397 if last_comments: 3398 cte.add_comments(last_comments) 3399 3400 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3401 break 3402 else: 3403 self._match(TokenType.WITH) 3404 3405 last_comments = self._prev_comments 3406 3407 return self.expression( 3408 exp.With, 3409 comments=comments, 3410 expressions=expressions, 3411 recursive=recursive, 3412 search=self._parse_recursive_with_search(), 3413 ) 3414 3415 def _parse_cte(self) -> t.Optional[exp.CTE]: 3416 index = self._index 3417 3418 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3419 if not alias or not alias.this: 3420 self.raise_error("Expected CTE to have alias") 3421 3422 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3423 self._retreat(index) 3424 return None 3425 3426 comments = self._prev_comments 3427 3428 if self._match_text_seq("NOT", "MATERIALIZED"): 3429 materialized = False 3430 elif self._match_text_seq("MATERIALIZED"): 3431 materialized = True 3432 else: 3433 materialized = None 3434 3435 cte = self.expression( 3436 exp.CTE, 3437 this=self._parse_wrapped(self._parse_statement), 3438 alias=alias, 3439 materialized=materialized, 3440 comments=comments, 3441 ) 3442 3443 values = cte.this 3444 if isinstance(values, exp.Values): 3445 if values.alias: 3446 cte.set("this", exp.select("*").from_(values)) 3447 else: 3448 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3449 3450 return cte 3451 3452 def _parse_table_alias( 3453 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3454 ) -> t.Optional[exp.TableAlias]: 3455 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3456 # so this section tries to parse the clause version and if it fails, it treats the token 3457 # as an identifier (alias) 3458 if self._can_parse_limit_or_offset(): 3459 return None 3460 3461 any_token = self._match(TokenType.ALIAS) 3462 alias = ( 3463 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3464 or self._parse_string_as_identifier() 3465 ) 3466 3467 index = self._index 3468 if self._match(TokenType.L_PAREN): 3469 columns = self._parse_csv(self._parse_function_parameter) 3470 self._match_r_paren() if columns else self._retreat(index) 3471 else: 3472 columns = None 3473 3474 if not alias and not columns: 3475 return None 3476 3477 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3478 3479 # We bubble up comments from the Identifier to the TableAlias 3480 if isinstance(alias, exp.Identifier): 3481 table_alias.add_comments(alias.pop_comments()) 3482 3483 return table_alias 3484 3485 def _parse_subquery( 3486 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3487 ) -> t.Optional[exp.Subquery]: 3488 if not this: 3489 return None 3490 3491 return self.expression( 3492 exp.Subquery, 3493 this=this, 3494 pivots=self._parse_pivots(), 3495 alias=self._parse_table_alias() if parse_alias else None, 3496 sample=self._parse_table_sample(), 3497 ) 3498 3499 def _implicit_unnests_to_explicit(self, this: E) -> E: 3500 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3501 3502 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3503 for i, join in enumerate(this.args.get("joins") or []): 3504 table = join.this 3505 normalized_table = table.copy() 3506 normalized_table.meta["maybe_column"] = True 3507 normalized_table = _norm(normalized_table, dialect=self.dialect) 3508 3509 if isinstance(table, exp.Table) and not join.args.get("on"): 3510 if normalized_table.parts[0].name in refs: 3511 table_as_column = table.to_column() 3512 unnest = exp.Unnest(expressions=[table_as_column]) 3513 3514 # Table.to_column creates a parent Alias node that we want to convert to 3515 # a TableAlias and attach to the Unnest, so it matches the parser's output 3516 if isinstance(table.args.get("alias"), exp.TableAlias): 3517 table_as_column.replace(table_as_column.this) 3518 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3519 3520 table.replace(unnest) 3521 3522 refs.add(normalized_table.alias_or_name) 3523 3524 return this 3525 3526 def _parse_query_modifiers( 3527 self, this: t.Optional[exp.Expression] 3528 ) -> t.Optional[exp.Expression]: 3529 if isinstance(this, self.MODIFIABLES): 3530 for join in self._parse_joins(): 3531 this.append("joins", join) 3532 for lateral in iter(self._parse_lateral, None): 3533 this.append("laterals", lateral) 3534 3535 while True: 3536 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3537 modifier_token = self._curr 3538 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3539 key, expression = parser(self) 3540 3541 if expression: 3542 if this.args.get(key): 3543 self.raise_error( 3544 f"Found multiple '{modifier_token.text.upper()}' clauses", 3545 token=modifier_token, 3546 ) 3547 3548 this.set(key, expression) 3549 if key == "limit": 3550 offset = expression.args.pop("offset", None) 3551 3552 if offset: 3553 offset = exp.Offset(expression=offset) 3554 this.set("offset", offset) 3555 3556 limit_by_expressions = expression.expressions 3557 expression.set("expressions", None) 3558 offset.set("expressions", limit_by_expressions) 3559 continue 3560 break 3561 3562 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3563 this = self._implicit_unnests_to_explicit(this) 3564 3565 return this 3566 3567 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3568 start = self._curr 3569 while self._curr: 3570 self._advance() 3571 3572 end = self._tokens[self._index - 1] 3573 return exp.Hint(expressions=[self._find_sql(start, end)]) 3574 3575 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3576 return self._parse_function_call() 3577 3578 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3579 start_index = self._index 3580 should_fallback_to_string = False 3581 3582 hints = [] 3583 try: 3584 for hint in iter( 3585 lambda: self._parse_csv( 3586 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3587 ), 3588 [], 3589 ): 3590 hints.extend(hint) 3591 except ParseError: 3592 should_fallback_to_string = True 3593 3594 if should_fallback_to_string or self._curr: 3595 self._retreat(start_index) 3596 return self._parse_hint_fallback_to_string() 3597 3598 return self.expression(exp.Hint, expressions=hints) 3599 3600 def _parse_hint(self) -> t.Optional[exp.Hint]: 3601 if self._match(TokenType.HINT) and self._prev_comments: 3602 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3603 3604 return None 3605 3606 def _parse_into(self) -> t.Optional[exp.Into]: 3607 if not self._match(TokenType.INTO): 3608 return None 3609 3610 temp = self._match(TokenType.TEMPORARY) 3611 unlogged = self._match_text_seq("UNLOGGED") 3612 self._match(TokenType.TABLE) 3613 3614 return self.expression( 3615 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3616 ) 3617 3618 def _parse_from( 3619 self, 3620 joins: bool = False, 3621 skip_from_token: bool = False, 3622 consume_pipe: bool = False, 3623 ) -> t.Optional[exp.From]: 3624 if not skip_from_token and not self._match(TokenType.FROM): 3625 return None 3626 3627 return self.expression( 3628 exp.From, 3629 comments=self._prev_comments, 3630 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3631 ) 3632 3633 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3634 return self.expression( 3635 exp.MatchRecognizeMeasure, 3636 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3637 this=self._parse_expression(), 3638 ) 3639 3640 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3641 if not self._match(TokenType.MATCH_RECOGNIZE): 3642 return None 3643 3644 self._match_l_paren() 3645 3646 partition = self._parse_partition_by() 3647 order = self._parse_order() 3648 3649 measures = ( 3650 self._parse_csv(self._parse_match_recognize_measure) 3651 if self._match_text_seq("MEASURES") 3652 else None 3653 ) 3654 3655 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3656 rows = exp.var("ONE ROW PER MATCH") 3657 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3658 text = "ALL ROWS PER MATCH" 3659 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3660 text += " SHOW EMPTY MATCHES" 3661 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3662 text += " OMIT EMPTY MATCHES" 3663 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3664 text += " WITH UNMATCHED ROWS" 3665 rows = exp.var(text) 3666 else: 3667 rows = None 3668 3669 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3670 text = "AFTER MATCH SKIP" 3671 if self._match_text_seq("PAST", "LAST", "ROW"): 3672 text += " PAST LAST ROW" 3673 elif self._match_text_seq("TO", "NEXT", "ROW"): 3674 text += " TO NEXT ROW" 3675 elif self._match_text_seq("TO", "FIRST"): 3676 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3677 elif self._match_text_seq("TO", "LAST"): 3678 text += f" TO LAST {self._advance_any().text}" # type: ignore 3679 after = exp.var(text) 3680 else: 3681 after = None 3682 3683 if self._match_text_seq("PATTERN"): 3684 self._match_l_paren() 3685 3686 if not self._curr: 3687 self.raise_error("Expecting )", self._curr) 3688 3689 paren = 1 3690 start = self._curr 3691 3692 while self._curr and paren > 0: 3693 if self._curr.token_type == TokenType.L_PAREN: 3694 paren += 1 3695 if self._curr.token_type == TokenType.R_PAREN: 3696 paren -= 1 3697 3698 end = self._prev 3699 self._advance() 3700 3701 if paren > 0: 3702 self.raise_error("Expecting )", self._curr) 3703 3704 pattern = exp.var(self._find_sql(start, end)) 3705 else: 3706 pattern = None 3707 3708 define = ( 3709 self._parse_csv(self._parse_name_as_expression) 3710 if self._match_text_seq("DEFINE") 3711 else None 3712 ) 3713 3714 self._match_r_paren() 3715 3716 return self.expression( 3717 exp.MatchRecognize, 3718 partition_by=partition, 3719 order=order, 3720 measures=measures, 3721 rows=rows, 3722 after=after, 3723 pattern=pattern, 3724 define=define, 3725 alias=self._parse_table_alias(), 3726 ) 3727 3728 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3729 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3730 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3731 cross_apply = False 3732 3733 if cross_apply is not None: 3734 this = self._parse_select(table=True) 3735 view = None 3736 outer = None 3737 elif self._match(TokenType.LATERAL): 3738 this = self._parse_select(table=True) 3739 view = self._match(TokenType.VIEW) 3740 outer = self._match(TokenType.OUTER) 3741 else: 3742 return None 3743 3744 if not this: 3745 this = ( 3746 self._parse_unnest() 3747 or self._parse_function() 3748 or self._parse_id_var(any_token=False) 3749 ) 3750 3751 while self._match(TokenType.DOT): 3752 this = exp.Dot( 3753 this=this, 3754 expression=self._parse_function() or self._parse_id_var(any_token=False), 3755 ) 3756 3757 ordinality: t.Optional[bool] = None 3758 3759 if view: 3760 table = self._parse_id_var(any_token=False) 3761 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3762 table_alias: t.Optional[exp.TableAlias] = self.expression( 3763 exp.TableAlias, this=table, columns=columns 3764 ) 3765 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3766 # We move the alias from the lateral's child node to the lateral itself 3767 table_alias = this.args["alias"].pop() 3768 else: 3769 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3770 table_alias = self._parse_table_alias() 3771 3772 return self.expression( 3773 exp.Lateral, 3774 this=this, 3775 view=view, 3776 outer=outer, 3777 alias=table_alias, 3778 cross_apply=cross_apply, 3779 ordinality=ordinality, 3780 ) 3781 3782 def _parse_join_parts( 3783 self, 3784 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3785 return ( 3786 self._match_set(self.JOIN_METHODS) and self._prev, 3787 self._match_set(self.JOIN_SIDES) and self._prev, 3788 self._match_set(self.JOIN_KINDS) and self._prev, 3789 ) 3790 3791 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3792 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3793 this = self._parse_column() 3794 if isinstance(this, exp.Column): 3795 return this.this 3796 return this 3797 3798 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3799 3800 def _parse_join( 3801 self, skip_join_token: bool = False, parse_bracket: bool = False 3802 ) -> t.Optional[exp.Join]: 3803 if self._match(TokenType.COMMA): 3804 table = self._try_parse(self._parse_table) 3805 cross_join = self.expression(exp.Join, this=table) if table else None 3806 3807 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3808 cross_join.set("kind", "CROSS") 3809 3810 return cross_join 3811 3812 index = self._index 3813 method, side, kind = self._parse_join_parts() 3814 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3815 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3816 join_comments = self._prev_comments 3817 3818 if not skip_join_token and not join: 3819 self._retreat(index) 3820 kind = None 3821 method = None 3822 side = None 3823 3824 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3825 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3826 3827 if not skip_join_token and not join and not outer_apply and not cross_apply: 3828 return None 3829 3830 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3831 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3832 kwargs["expressions"] = self._parse_csv( 3833 lambda: self._parse_table(parse_bracket=parse_bracket) 3834 ) 3835 3836 if method: 3837 kwargs["method"] = method.text 3838 if side: 3839 kwargs["side"] = side.text 3840 if kind: 3841 kwargs["kind"] = kind.text 3842 if hint: 3843 kwargs["hint"] = hint 3844 3845 if self._match(TokenType.MATCH_CONDITION): 3846 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3847 3848 if self._match(TokenType.ON): 3849 kwargs["on"] = self._parse_assignment() 3850 elif self._match(TokenType.USING): 3851 kwargs["using"] = self._parse_using_identifiers() 3852 elif ( 3853 not method 3854 and not (outer_apply or cross_apply) 3855 and not isinstance(kwargs["this"], exp.Unnest) 3856 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3857 ): 3858 index = self._index 3859 joins: t.Optional[list] = list(self._parse_joins()) 3860 3861 if joins and self._match(TokenType.ON): 3862 kwargs["on"] = self._parse_assignment() 3863 elif joins and self._match(TokenType.USING): 3864 kwargs["using"] = self._parse_using_identifiers() 3865 else: 3866 joins = None 3867 self._retreat(index) 3868 3869 kwargs["this"].set("joins", joins if joins else None) 3870 3871 kwargs["pivots"] = self._parse_pivots() 3872 3873 comments = [c for token in (method, side, kind) if token for c in token.comments] 3874 comments = (join_comments or []) + comments 3875 return self.expression(exp.Join, comments=comments, **kwargs) 3876 3877 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3878 this = self._parse_assignment() 3879 3880 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3881 return this 3882 3883 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3884 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3885 3886 return this 3887 3888 def _parse_index_params(self) -> exp.IndexParameters: 3889 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3890 3891 if self._match(TokenType.L_PAREN, advance=False): 3892 columns = self._parse_wrapped_csv(self._parse_with_operator) 3893 else: 3894 columns = None 3895 3896 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3897 partition_by = self._parse_partition_by() 3898 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3899 tablespace = ( 3900 self._parse_var(any_token=True) 3901 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3902 else None 3903 ) 3904 where = self._parse_where() 3905 3906 on = self._parse_field() if self._match(TokenType.ON) else None 3907 3908 return self.expression( 3909 exp.IndexParameters, 3910 using=using, 3911 columns=columns, 3912 include=include, 3913 partition_by=partition_by, 3914 where=where, 3915 with_storage=with_storage, 3916 tablespace=tablespace, 3917 on=on, 3918 ) 3919 3920 def _parse_index( 3921 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3922 ) -> t.Optional[exp.Index]: 3923 if index or anonymous: 3924 unique = None 3925 primary = None 3926 amp = None 3927 3928 self._match(TokenType.ON) 3929 self._match(TokenType.TABLE) # hive 3930 table = self._parse_table_parts(schema=True) 3931 else: 3932 unique = self._match(TokenType.UNIQUE) 3933 primary = self._match_text_seq("PRIMARY") 3934 amp = self._match_text_seq("AMP") 3935 3936 if not self._match(TokenType.INDEX): 3937 return None 3938 3939 index = self._parse_id_var() 3940 table = None 3941 3942 params = self._parse_index_params() 3943 3944 return self.expression( 3945 exp.Index, 3946 this=index, 3947 table=table, 3948 unique=unique, 3949 primary=primary, 3950 amp=amp, 3951 params=params, 3952 ) 3953 3954 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3955 hints: t.List[exp.Expression] = [] 3956 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3957 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3958 hints.append( 3959 self.expression( 3960 exp.WithTableHint, 3961 expressions=self._parse_csv( 3962 lambda: self._parse_function() or self._parse_var(any_token=True) 3963 ), 3964 ) 3965 ) 3966 self._match_r_paren() 3967 else: 3968 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3969 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3970 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3971 3972 self._match_set((TokenType.INDEX, TokenType.KEY)) 3973 if self._match(TokenType.FOR): 3974 hint.set("target", self._advance_any() and self._prev.text.upper()) 3975 3976 hint.set("expressions", self._parse_wrapped_id_vars()) 3977 hints.append(hint) 3978 3979 return hints or None 3980 3981 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3982 return ( 3983 (not schema and self._parse_function(optional_parens=False)) 3984 or self._parse_id_var(any_token=False) 3985 or self._parse_string_as_identifier() 3986 or self._parse_placeholder() 3987 ) 3988 3989 def _parse_table_parts( 3990 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3991 ) -> exp.Table: 3992 catalog = None 3993 db = None 3994 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3995 3996 while self._match(TokenType.DOT): 3997 if catalog: 3998 # This allows nesting the table in arbitrarily many dot expressions if needed 3999 table = self.expression( 4000 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4001 ) 4002 else: 4003 catalog = db 4004 db = table 4005 # "" used for tsql FROM a..b case 4006 table = self._parse_table_part(schema=schema) or "" 4007 4008 if ( 4009 wildcard 4010 and self._is_connected() 4011 and (isinstance(table, exp.Identifier) or not table) 4012 and self._match(TokenType.STAR) 4013 ): 4014 if isinstance(table, exp.Identifier): 4015 table.args["this"] += "*" 4016 else: 4017 table = exp.Identifier(this="*") 4018 4019 # We bubble up comments from the Identifier to the Table 4020 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4021 4022 if is_db_reference: 4023 catalog = db 4024 db = table 4025 table = None 4026 4027 if not table and not is_db_reference: 4028 self.raise_error(f"Expected table name but got {self._curr}") 4029 if not db and is_db_reference: 4030 self.raise_error(f"Expected database name but got {self._curr}") 4031 4032 table = self.expression( 4033 exp.Table, 4034 comments=comments, 4035 this=table, 4036 db=db, 4037 catalog=catalog, 4038 ) 4039 4040 changes = self._parse_changes() 4041 if changes: 4042 table.set("changes", changes) 4043 4044 at_before = self._parse_historical_data() 4045 if at_before: 4046 table.set("when", at_before) 4047 4048 pivots = self._parse_pivots() 4049 if pivots: 4050 table.set("pivots", pivots) 4051 4052 return table 4053 4054 def _parse_table( 4055 self, 4056 schema: bool = False, 4057 joins: bool = False, 4058 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4059 parse_bracket: bool = False, 4060 is_db_reference: bool = False, 4061 parse_partition: bool = False, 4062 consume_pipe: bool = False, 4063 ) -> t.Optional[exp.Expression]: 4064 lateral = self._parse_lateral() 4065 if lateral: 4066 return lateral 4067 4068 unnest = self._parse_unnest() 4069 if unnest: 4070 return unnest 4071 4072 values = self._parse_derived_table_values() 4073 if values: 4074 return values 4075 4076 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4077 if subquery: 4078 if not subquery.args.get("pivots"): 4079 subquery.set("pivots", self._parse_pivots()) 4080 return subquery 4081 4082 bracket = parse_bracket and self._parse_bracket(None) 4083 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4084 4085 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4086 self._parse_table 4087 ) 4088 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4089 4090 only = self._match(TokenType.ONLY) 4091 4092 this = t.cast( 4093 exp.Expression, 4094 bracket 4095 or rows_from 4096 or self._parse_bracket( 4097 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4098 ), 4099 ) 4100 4101 if only: 4102 this.set("only", only) 4103 4104 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4105 self._match_text_seq("*") 4106 4107 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4108 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4109 this.set("partition", self._parse_partition()) 4110 4111 if schema: 4112 return self._parse_schema(this=this) 4113 4114 version = self._parse_version() 4115 4116 if version: 4117 this.set("version", version) 4118 4119 if self.dialect.ALIAS_POST_TABLESAMPLE: 4120 this.set("sample", self._parse_table_sample()) 4121 4122 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4123 if alias: 4124 this.set("alias", alias) 4125 4126 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4127 return self.expression( 4128 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4129 ) 4130 4131 this.set("hints", self._parse_table_hints()) 4132 4133 if not this.args.get("pivots"): 4134 this.set("pivots", self._parse_pivots()) 4135 4136 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4137 this.set("sample", self._parse_table_sample()) 4138 4139 if joins: 4140 for join in self._parse_joins(): 4141 this.append("joins", join) 4142 4143 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4144 this.set("ordinality", True) 4145 this.set("alias", self._parse_table_alias()) 4146 4147 return this 4148 4149 def _parse_version(self) -> t.Optional[exp.Version]: 4150 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4151 this = "TIMESTAMP" 4152 elif self._match(TokenType.VERSION_SNAPSHOT): 4153 this = "VERSION" 4154 else: 4155 return None 4156 4157 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4158 kind = self._prev.text.upper() 4159 start = self._parse_bitwise() 4160 self._match_texts(("TO", "AND")) 4161 end = self._parse_bitwise() 4162 expression: t.Optional[exp.Expression] = self.expression( 4163 exp.Tuple, expressions=[start, end] 4164 ) 4165 elif self._match_text_seq("CONTAINED", "IN"): 4166 kind = "CONTAINED IN" 4167 expression = self.expression( 4168 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4169 ) 4170 elif self._match(TokenType.ALL): 4171 kind = "ALL" 4172 expression = None 4173 else: 4174 self._match_text_seq("AS", "OF") 4175 kind = "AS OF" 4176 expression = self._parse_type() 4177 4178 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4179 4180 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4181 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4182 index = self._index 4183 historical_data = None 4184 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4185 this = self._prev.text.upper() 4186 kind = ( 4187 self._match(TokenType.L_PAREN) 4188 and self._match_texts(self.HISTORICAL_DATA_KIND) 4189 and self._prev.text.upper() 4190 ) 4191 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4192 4193 if expression: 4194 self._match_r_paren() 4195 historical_data = self.expression( 4196 exp.HistoricalData, this=this, kind=kind, expression=expression 4197 ) 4198 else: 4199 self._retreat(index) 4200 4201 return historical_data 4202 4203 def _parse_changes(self) -> t.Optional[exp.Changes]: 4204 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4205 return None 4206 4207 information = self._parse_var(any_token=True) 4208 self._match_r_paren() 4209 4210 return self.expression( 4211 exp.Changes, 4212 information=information, 4213 at_before=self._parse_historical_data(), 4214 end=self._parse_historical_data(), 4215 ) 4216 4217 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4218 if not self._match(TokenType.UNNEST): 4219 return None 4220 4221 expressions = self._parse_wrapped_csv(self._parse_equality) 4222 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4223 4224 alias = self._parse_table_alias() if with_alias else None 4225 4226 if alias: 4227 if self.dialect.UNNEST_COLUMN_ONLY: 4228 if alias.args.get("columns"): 4229 self.raise_error("Unexpected extra column alias in unnest.") 4230 4231 alias.set("columns", [alias.this]) 4232 alias.set("this", None) 4233 4234 columns = alias.args.get("columns") or [] 4235 if offset and len(expressions) < len(columns): 4236 offset = columns.pop() 4237 4238 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4239 self._match(TokenType.ALIAS) 4240 offset = self._parse_id_var( 4241 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4242 ) or exp.to_identifier("offset") 4243 4244 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4245 4246 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4247 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4248 if not is_derived and not ( 4249 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4250 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4251 ): 4252 return None 4253 4254 expressions = self._parse_csv(self._parse_value) 4255 alias = self._parse_table_alias() 4256 4257 if is_derived: 4258 self._match_r_paren() 4259 4260 return self.expression( 4261 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4262 ) 4263 4264 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4265 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4266 as_modifier and self._match_text_seq("USING", "SAMPLE") 4267 ): 4268 return None 4269 4270 bucket_numerator = None 4271 bucket_denominator = None 4272 bucket_field = None 4273 percent = None 4274 size = None 4275 seed = None 4276 4277 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4278 matched_l_paren = self._match(TokenType.L_PAREN) 4279 4280 if self.TABLESAMPLE_CSV: 4281 num = None 4282 expressions = self._parse_csv(self._parse_primary) 4283 else: 4284 expressions = None 4285 num = ( 4286 self._parse_factor() 4287 if self._match(TokenType.NUMBER, advance=False) 4288 else self._parse_primary() or self._parse_placeholder() 4289 ) 4290 4291 if self._match_text_seq("BUCKET"): 4292 bucket_numerator = self._parse_number() 4293 self._match_text_seq("OUT", "OF") 4294 bucket_denominator = bucket_denominator = self._parse_number() 4295 self._match(TokenType.ON) 4296 bucket_field = self._parse_field() 4297 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4298 percent = num 4299 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4300 size = num 4301 else: 4302 percent = num 4303 4304 if matched_l_paren: 4305 self._match_r_paren() 4306 4307 if self._match(TokenType.L_PAREN): 4308 method = self._parse_var(upper=True) 4309 seed = self._match(TokenType.COMMA) and self._parse_number() 4310 self._match_r_paren() 4311 elif self._match_texts(("SEED", "REPEATABLE")): 4312 seed = self._parse_wrapped(self._parse_number) 4313 4314 if not method and self.DEFAULT_SAMPLING_METHOD: 4315 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4316 4317 return self.expression( 4318 exp.TableSample, 4319 expressions=expressions, 4320 method=method, 4321 bucket_numerator=bucket_numerator, 4322 bucket_denominator=bucket_denominator, 4323 bucket_field=bucket_field, 4324 percent=percent, 4325 size=size, 4326 seed=seed, 4327 ) 4328 4329 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4330 return list(iter(self._parse_pivot, None)) or None 4331 4332 def _parse_joins(self) -> t.Iterator[exp.Join]: 4333 return iter(self._parse_join, None) 4334 4335 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4336 if not self._match(TokenType.INTO): 4337 return None 4338 4339 return self.expression( 4340 exp.UnpivotColumns, 4341 this=self._match_text_seq("NAME") and self._parse_column(), 4342 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4343 ) 4344 4345 # https://duckdb.org/docs/sql/statements/pivot 4346 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4347 def _parse_on() -> t.Optional[exp.Expression]: 4348 this = self._parse_bitwise() 4349 4350 if self._match(TokenType.IN): 4351 # PIVOT ... ON col IN (row_val1, row_val2) 4352 return self._parse_in(this) 4353 if self._match(TokenType.ALIAS, advance=False): 4354 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4355 return self._parse_alias(this) 4356 4357 return this 4358 4359 this = self._parse_table() 4360 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4361 into = self._parse_unpivot_columns() 4362 using = self._match(TokenType.USING) and self._parse_csv( 4363 lambda: self._parse_alias(self._parse_function()) 4364 ) 4365 group = self._parse_group() 4366 4367 return self.expression( 4368 exp.Pivot, 4369 this=this, 4370 expressions=expressions, 4371 using=using, 4372 group=group, 4373 unpivot=is_unpivot, 4374 into=into, 4375 ) 4376 4377 def _parse_pivot_in(self) -> exp.In: 4378 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4379 this = self._parse_select_or_expression() 4380 4381 self._match(TokenType.ALIAS) 4382 alias = self._parse_bitwise() 4383 if alias: 4384 if isinstance(alias, exp.Column) and not alias.db: 4385 alias = alias.this 4386 return self.expression(exp.PivotAlias, this=this, alias=alias) 4387 4388 return this 4389 4390 value = self._parse_column() 4391 4392 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4393 self.raise_error("Expecting IN (") 4394 4395 if self._match(TokenType.ANY): 4396 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4397 else: 4398 exprs = self._parse_csv(_parse_aliased_expression) 4399 4400 self._match_r_paren() 4401 return self.expression(exp.In, this=value, expressions=exprs) 4402 4403 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4404 func = self._parse_function() 4405 if not func: 4406 self.raise_error("Expecting an aggregation function in PIVOT") 4407 4408 return self._parse_alias(func) 4409 4410 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4411 index = self._index 4412 include_nulls = None 4413 4414 if self._match(TokenType.PIVOT): 4415 unpivot = False 4416 elif self._match(TokenType.UNPIVOT): 4417 unpivot = True 4418 4419 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4420 if self._match_text_seq("INCLUDE", "NULLS"): 4421 include_nulls = True 4422 elif self._match_text_seq("EXCLUDE", "NULLS"): 4423 include_nulls = False 4424 else: 4425 return None 4426 4427 expressions = [] 4428 4429 if not self._match(TokenType.L_PAREN): 4430 self._retreat(index) 4431 return None 4432 4433 if unpivot: 4434 expressions = self._parse_csv(self._parse_column) 4435 else: 4436 expressions = self._parse_csv(self._parse_pivot_aggregation) 4437 4438 if not expressions: 4439 self.raise_error("Failed to parse PIVOT's aggregation list") 4440 4441 if not self._match(TokenType.FOR): 4442 self.raise_error("Expecting FOR") 4443 4444 fields = [] 4445 while True: 4446 field = self._try_parse(self._parse_pivot_in) 4447 if not field: 4448 break 4449 fields.append(field) 4450 4451 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4452 self._parse_bitwise 4453 ) 4454 4455 group = self._parse_group() 4456 4457 self._match_r_paren() 4458 4459 pivot = self.expression( 4460 exp.Pivot, 4461 expressions=expressions, 4462 fields=fields, 4463 unpivot=unpivot, 4464 include_nulls=include_nulls, 4465 default_on_null=default_on_null, 4466 group=group, 4467 ) 4468 4469 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4470 pivot.set("alias", self._parse_table_alias()) 4471 4472 if not unpivot: 4473 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4474 4475 columns: t.List[exp.Expression] = [] 4476 all_fields = [] 4477 for pivot_field in pivot.fields: 4478 pivot_field_expressions = pivot_field.expressions 4479 4480 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4481 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4482 continue 4483 4484 all_fields.append( 4485 [ 4486 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4487 for fld in pivot_field_expressions 4488 ] 4489 ) 4490 4491 if all_fields: 4492 if names: 4493 all_fields.append(names) 4494 4495 # Generate all possible combinations of the pivot columns 4496 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4497 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4498 for fld_parts_tuple in itertools.product(*all_fields): 4499 fld_parts = list(fld_parts_tuple) 4500 4501 if names and self.PREFIXED_PIVOT_COLUMNS: 4502 # Move the "name" to the front of the list 4503 fld_parts.insert(0, fld_parts.pop(-1)) 4504 4505 columns.append(exp.to_identifier("_".join(fld_parts))) 4506 4507 pivot.set("columns", columns) 4508 4509 return pivot 4510 4511 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4512 return [agg.alias for agg in aggregations if agg.alias] 4513 4514 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4515 if not skip_where_token and not self._match(TokenType.PREWHERE): 4516 return None 4517 4518 return self.expression( 4519 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4520 ) 4521 4522 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4523 if not skip_where_token and not self._match(TokenType.WHERE): 4524 return None 4525 4526 return self.expression( 4527 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4528 ) 4529 4530 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4531 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4532 return None 4533 comments = self._prev_comments 4534 4535 elements: t.Dict[str, t.Any] = defaultdict(list) 4536 4537 if self._match(TokenType.ALL): 4538 elements["all"] = True 4539 elif self._match(TokenType.DISTINCT): 4540 elements["all"] = False 4541 4542 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4543 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4544 4545 while True: 4546 index = self._index 4547 4548 elements["expressions"].extend( 4549 self._parse_csv( 4550 lambda: None 4551 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4552 else self._parse_assignment() 4553 ) 4554 ) 4555 4556 before_with_index = self._index 4557 with_prefix = self._match(TokenType.WITH) 4558 4559 if self._match(TokenType.ROLLUP): 4560 elements["rollup"].append( 4561 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4562 ) 4563 elif self._match(TokenType.CUBE): 4564 elements["cube"].append( 4565 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4566 ) 4567 elif self._match(TokenType.GROUPING_SETS): 4568 elements["grouping_sets"].append( 4569 self.expression( 4570 exp.GroupingSets, 4571 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4572 ) 4573 ) 4574 elif self._match_text_seq("TOTALS"): 4575 elements["totals"] = True # type: ignore 4576 4577 if before_with_index <= self._index <= before_with_index + 1: 4578 self._retreat(before_with_index) 4579 break 4580 4581 if index == self._index: 4582 break 4583 4584 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4585 4586 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4587 return self.expression( 4588 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4589 ) 4590 4591 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4592 if self._match(TokenType.L_PAREN): 4593 grouping_set = self._parse_csv(self._parse_column) 4594 self._match_r_paren() 4595 return self.expression(exp.Tuple, expressions=grouping_set) 4596 4597 return self._parse_column() 4598 4599 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4600 if not skip_having_token and not self._match(TokenType.HAVING): 4601 return None 4602 return self.expression( 4603 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4604 ) 4605 4606 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4607 if not self._match(TokenType.QUALIFY): 4608 return None 4609 return self.expression(exp.Qualify, this=self._parse_assignment()) 4610 4611 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4612 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4613 exp.Prior, this=self._parse_bitwise() 4614 ) 4615 connect = self._parse_assignment() 4616 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4617 return connect 4618 4619 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4620 if skip_start_token: 4621 start = None 4622 elif self._match(TokenType.START_WITH): 4623 start = self._parse_assignment() 4624 else: 4625 return None 4626 4627 self._match(TokenType.CONNECT_BY) 4628 nocycle = self._match_text_seq("NOCYCLE") 4629 connect = self._parse_connect_with_prior() 4630 4631 if not start and self._match(TokenType.START_WITH): 4632 start = self._parse_assignment() 4633 4634 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4635 4636 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4637 this = self._parse_id_var(any_token=True) 4638 if self._match(TokenType.ALIAS): 4639 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4640 return this 4641 4642 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4643 if self._match_text_seq("INTERPOLATE"): 4644 return self._parse_wrapped_csv(self._parse_name_as_expression) 4645 return None 4646 4647 def _parse_order( 4648 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4649 ) -> t.Optional[exp.Expression]: 4650 siblings = None 4651 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4652 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4653 return this 4654 4655 siblings = True 4656 4657 return self.expression( 4658 exp.Order, 4659 comments=self._prev_comments, 4660 this=this, 4661 expressions=self._parse_csv(self._parse_ordered), 4662 siblings=siblings, 4663 ) 4664 4665 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4666 if not self._match(token): 4667 return None 4668 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4669 4670 def _parse_ordered( 4671 self, parse_method: t.Optional[t.Callable] = None 4672 ) -> t.Optional[exp.Ordered]: 4673 this = parse_method() if parse_method else self._parse_assignment() 4674 if not this: 4675 return None 4676 4677 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4678 this = exp.var("ALL") 4679 4680 asc = self._match(TokenType.ASC) 4681 desc = self._match(TokenType.DESC) or (asc and False) 4682 4683 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4684 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4685 4686 nulls_first = is_nulls_first or False 4687 explicitly_null_ordered = is_nulls_first or is_nulls_last 4688 4689 if ( 4690 not explicitly_null_ordered 4691 and ( 4692 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4693 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4694 ) 4695 and self.dialect.NULL_ORDERING != "nulls_are_last" 4696 ): 4697 nulls_first = True 4698 4699 if self._match_text_seq("WITH", "FILL"): 4700 with_fill = self.expression( 4701 exp.WithFill, 4702 **{ # type: ignore 4703 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4704 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4705 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4706 "interpolate": self._parse_interpolate(), 4707 }, 4708 ) 4709 else: 4710 with_fill = None 4711 4712 return self.expression( 4713 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4714 ) 4715 4716 def _parse_limit_options(self) -> exp.LimitOptions: 4717 percent = self._match(TokenType.PERCENT) 4718 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4719 self._match_text_seq("ONLY") 4720 with_ties = self._match_text_seq("WITH", "TIES") 4721 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4722 4723 def _parse_limit( 4724 self, 4725 this: t.Optional[exp.Expression] = None, 4726 top: bool = False, 4727 skip_limit_token: bool = False, 4728 ) -> t.Optional[exp.Expression]: 4729 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4730 comments = self._prev_comments 4731 if top: 4732 limit_paren = self._match(TokenType.L_PAREN) 4733 expression = self._parse_term() if limit_paren else self._parse_number() 4734 4735 if limit_paren: 4736 self._match_r_paren() 4737 4738 limit_options = self._parse_limit_options() 4739 else: 4740 limit_options = None 4741 expression = self._parse_term() 4742 4743 if self._match(TokenType.COMMA): 4744 offset = expression 4745 expression = self._parse_term() 4746 else: 4747 offset = None 4748 4749 limit_exp = self.expression( 4750 exp.Limit, 4751 this=this, 4752 expression=expression, 4753 offset=offset, 4754 comments=comments, 4755 limit_options=limit_options, 4756 expressions=self._parse_limit_by(), 4757 ) 4758 4759 return limit_exp 4760 4761 if self._match(TokenType.FETCH): 4762 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4763 direction = self._prev.text.upper() if direction else "FIRST" 4764 4765 count = self._parse_field(tokens=self.FETCH_TOKENS) 4766 4767 return self.expression( 4768 exp.Fetch, 4769 direction=direction, 4770 count=count, 4771 limit_options=self._parse_limit_options(), 4772 ) 4773 4774 return this 4775 4776 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4777 if not self._match(TokenType.OFFSET): 4778 return this 4779 4780 count = self._parse_term() 4781 self._match_set((TokenType.ROW, TokenType.ROWS)) 4782 4783 return self.expression( 4784 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4785 ) 4786 4787 def _can_parse_limit_or_offset(self) -> bool: 4788 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4789 return False 4790 4791 index = self._index 4792 result = bool( 4793 self._try_parse(self._parse_limit, retreat=True) 4794 or self._try_parse(self._parse_offset, retreat=True) 4795 ) 4796 self._retreat(index) 4797 return result 4798 4799 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4800 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4801 4802 def _parse_locks(self) -> t.List[exp.Lock]: 4803 locks = [] 4804 while True: 4805 update, key = None, None 4806 if self._match_text_seq("FOR", "UPDATE"): 4807 update = True 4808 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4809 "LOCK", "IN", "SHARE", "MODE" 4810 ): 4811 update = False 4812 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4813 update, key = False, True 4814 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4815 update, key = True, True 4816 else: 4817 break 4818 4819 expressions = None 4820 if self._match_text_seq("OF"): 4821 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4822 4823 wait: t.Optional[bool | exp.Expression] = None 4824 if self._match_text_seq("NOWAIT"): 4825 wait = True 4826 elif self._match_text_seq("WAIT"): 4827 wait = self._parse_primary() 4828 elif self._match_text_seq("SKIP", "LOCKED"): 4829 wait = False 4830 4831 locks.append( 4832 self.expression( 4833 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4834 ) 4835 ) 4836 4837 return locks 4838 4839 def parse_set_operation( 4840 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4841 ) -> t.Optional[exp.Expression]: 4842 start = self._index 4843 _, side_token, kind_token = self._parse_join_parts() 4844 4845 side = side_token.text if side_token else None 4846 kind = kind_token.text if kind_token else None 4847 4848 if not self._match_set(self.SET_OPERATIONS): 4849 self._retreat(start) 4850 return None 4851 4852 token_type = self._prev.token_type 4853 4854 if token_type == TokenType.UNION: 4855 operation: t.Type[exp.SetOperation] = exp.Union 4856 elif token_type == TokenType.EXCEPT: 4857 operation = exp.Except 4858 else: 4859 operation = exp.Intersect 4860 4861 comments = self._prev.comments 4862 4863 if self._match(TokenType.DISTINCT): 4864 distinct: t.Optional[bool] = True 4865 elif self._match(TokenType.ALL): 4866 distinct = False 4867 else: 4868 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4869 if distinct is None: 4870 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4871 4872 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4873 "STRICT", "CORRESPONDING" 4874 ) 4875 if self._match_text_seq("CORRESPONDING"): 4876 by_name = True 4877 if not side and not kind: 4878 kind = "INNER" 4879 4880 on_column_list = None 4881 if by_name and self._match_texts(("ON", "BY")): 4882 on_column_list = self._parse_wrapped_csv(self._parse_column) 4883 4884 expression = self._parse_select( 4885 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4886 ) 4887 4888 return self.expression( 4889 operation, 4890 comments=comments, 4891 this=this, 4892 distinct=distinct, 4893 by_name=by_name, 4894 expression=expression, 4895 side=side, 4896 kind=kind, 4897 on=on_column_list, 4898 ) 4899 4900 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4901 while this: 4902 setop = self.parse_set_operation(this) 4903 if not setop: 4904 break 4905 this = setop 4906 4907 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4908 expression = this.expression 4909 4910 if expression: 4911 for arg in self.SET_OP_MODIFIERS: 4912 expr = expression.args.get(arg) 4913 if expr: 4914 this.set(arg, expr.pop()) 4915 4916 return this 4917 4918 def _parse_expression(self) -> t.Optional[exp.Expression]: 4919 return self._parse_alias(self._parse_assignment()) 4920 4921 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4922 this = self._parse_disjunction() 4923 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4924 # This allows us to parse <non-identifier token> := <expr> 4925 this = exp.column( 4926 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4927 ) 4928 4929 while self._match_set(self.ASSIGNMENT): 4930 if isinstance(this, exp.Column) and len(this.parts) == 1: 4931 this = this.this 4932 4933 this = self.expression( 4934 self.ASSIGNMENT[self._prev.token_type], 4935 this=this, 4936 comments=self._prev_comments, 4937 expression=self._parse_assignment(), 4938 ) 4939 4940 return this 4941 4942 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4943 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4944 4945 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4946 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4947 4948 def _parse_equality(self) -> t.Optional[exp.Expression]: 4949 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4950 4951 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4952 return self._parse_tokens(self._parse_range, self.COMPARISON) 4953 4954 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4955 this = this or self._parse_bitwise() 4956 negate = self._match(TokenType.NOT) 4957 4958 if self._match_set(self.RANGE_PARSERS): 4959 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4960 if not expression: 4961 return this 4962 4963 this = expression 4964 elif self._match(TokenType.ISNULL): 4965 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4966 4967 # Postgres supports ISNULL and NOTNULL for conditions. 4968 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4969 if self._match(TokenType.NOTNULL): 4970 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4971 this = self.expression(exp.Not, this=this) 4972 4973 if negate: 4974 this = self._negate_range(this) 4975 4976 if self._match(TokenType.IS): 4977 this = self._parse_is(this) 4978 4979 return this 4980 4981 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4982 if not this: 4983 return this 4984 4985 return self.expression(exp.Not, this=this) 4986 4987 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4988 index = self._index - 1 4989 negate = self._match(TokenType.NOT) 4990 4991 if self._match_text_seq("DISTINCT", "FROM"): 4992 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4993 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4994 4995 if self._match(TokenType.JSON): 4996 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4997 4998 if self._match_text_seq("WITH"): 4999 _with = True 5000 elif self._match_text_seq("WITHOUT"): 5001 _with = False 5002 else: 5003 _with = None 5004 5005 unique = self._match(TokenType.UNIQUE) 5006 self._match_text_seq("KEYS") 5007 expression: t.Optional[exp.Expression] = self.expression( 5008 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5009 ) 5010 else: 5011 expression = self._parse_primary() or self._parse_null() 5012 if not expression: 5013 self._retreat(index) 5014 return None 5015 5016 this = self.expression(exp.Is, this=this, expression=expression) 5017 return self.expression(exp.Not, this=this) if negate else this 5018 5019 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5020 unnest = self._parse_unnest(with_alias=False) 5021 if unnest: 5022 this = self.expression(exp.In, this=this, unnest=unnest) 5023 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5024 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5025 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5026 5027 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5028 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5029 else: 5030 this = self.expression(exp.In, this=this, expressions=expressions) 5031 5032 if matched_l_paren: 5033 self._match_r_paren(this) 5034 elif not self._match(TokenType.R_BRACKET, expression=this): 5035 self.raise_error("Expecting ]") 5036 else: 5037 this = self.expression(exp.In, this=this, field=self._parse_column()) 5038 5039 return this 5040 5041 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5042 symmetric = None 5043 if self._match_text_seq("SYMMETRIC"): 5044 symmetric = True 5045 elif self._match_text_seq("ASYMMETRIC"): 5046 symmetric = False 5047 5048 low = self._parse_bitwise() 5049 self._match(TokenType.AND) 5050 high = self._parse_bitwise() 5051 5052 return self.expression( 5053 exp.Between, 5054 this=this, 5055 low=low, 5056 high=high, 5057 symmetric=symmetric, 5058 ) 5059 5060 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5061 if not self._match(TokenType.ESCAPE): 5062 return this 5063 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5064 5065 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5066 index = self._index 5067 5068 if not self._match(TokenType.INTERVAL) and match_interval: 5069 return None 5070 5071 if self._match(TokenType.STRING, advance=False): 5072 this = self._parse_primary() 5073 else: 5074 this = self._parse_term() 5075 5076 if not this or ( 5077 isinstance(this, exp.Column) 5078 and not this.table 5079 and not this.this.quoted 5080 and this.name.upper() == "IS" 5081 ): 5082 self._retreat(index) 5083 return None 5084 5085 unit = self._parse_function() or ( 5086 not self._match(TokenType.ALIAS, advance=False) 5087 and self._parse_var(any_token=True, upper=True) 5088 ) 5089 5090 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5091 # each INTERVAL expression into this canonical form so it's easy to transpile 5092 if this and this.is_number: 5093 this = exp.Literal.string(this.to_py()) 5094 elif this and this.is_string: 5095 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5096 if parts and unit: 5097 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5098 unit = None 5099 self._retreat(self._index - 1) 5100 5101 if len(parts) == 1: 5102 this = exp.Literal.string(parts[0][0]) 5103 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5104 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5105 unit = self.expression( 5106 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5107 ) 5108 5109 interval = self.expression(exp.Interval, this=this, unit=unit) 5110 5111 index = self._index 5112 self._match(TokenType.PLUS) 5113 5114 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5115 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5116 return self.expression( 5117 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5118 ) 5119 5120 self._retreat(index) 5121 return interval 5122 5123 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5124 this = self._parse_term() 5125 5126 while True: 5127 if self._match_set(self.BITWISE): 5128 this = self.expression( 5129 self.BITWISE[self._prev.token_type], 5130 this=this, 5131 expression=self._parse_term(), 5132 ) 5133 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5134 this = self.expression( 5135 exp.DPipe, 5136 this=this, 5137 expression=self._parse_term(), 5138 safe=not self.dialect.STRICT_STRING_CONCAT, 5139 ) 5140 elif self._match(TokenType.DQMARK): 5141 this = self.expression( 5142 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5143 ) 5144 elif self._match_pair(TokenType.LT, TokenType.LT): 5145 this = self.expression( 5146 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5147 ) 5148 elif self._match_pair(TokenType.GT, TokenType.GT): 5149 this = self.expression( 5150 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5151 ) 5152 else: 5153 break 5154 5155 return this 5156 5157 def _parse_term(self) -> t.Optional[exp.Expression]: 5158 this = self._parse_factor() 5159 5160 while self._match_set(self.TERM): 5161 klass = self.TERM[self._prev.token_type] 5162 comments = self._prev_comments 5163 expression = self._parse_factor() 5164 5165 this = self.expression(klass, this=this, comments=comments, expression=expression) 5166 5167 if isinstance(this, exp.Collate): 5168 expr = this.expression 5169 5170 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5171 # fallback to Identifier / Var 5172 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5173 ident = expr.this 5174 if isinstance(ident, exp.Identifier): 5175 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5176 5177 return this 5178 5179 def _parse_factor(self) -> t.Optional[exp.Expression]: 5180 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5181 this = parse_method() 5182 5183 while self._match_set(self.FACTOR): 5184 klass = self.FACTOR[self._prev.token_type] 5185 comments = self._prev_comments 5186 expression = parse_method() 5187 5188 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5189 self._retreat(self._index - 1) 5190 return this 5191 5192 this = self.expression(klass, this=this, comments=comments, expression=expression) 5193 5194 if isinstance(this, exp.Div): 5195 this.args["typed"] = self.dialect.TYPED_DIVISION 5196 this.args["safe"] = self.dialect.SAFE_DIVISION 5197 5198 return this 5199 5200 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5201 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5202 5203 def _parse_unary(self) -> t.Optional[exp.Expression]: 5204 if self._match_set(self.UNARY_PARSERS): 5205 return self.UNARY_PARSERS[self._prev.token_type](self) 5206 return self._parse_at_time_zone(self._parse_type()) 5207 5208 def _parse_type( 5209 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5210 ) -> t.Optional[exp.Expression]: 5211 interval = parse_interval and self._parse_interval() 5212 if interval: 5213 return interval 5214 5215 index = self._index 5216 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5217 5218 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5219 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5220 if isinstance(data_type, exp.Cast): 5221 # This constructor can contain ops directly after it, for instance struct unnesting: 5222 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5223 return self._parse_column_ops(data_type) 5224 5225 if data_type: 5226 index2 = self._index 5227 this = self._parse_primary() 5228 5229 if isinstance(this, exp.Literal): 5230 literal = this.name 5231 this = self._parse_column_ops(this) 5232 5233 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5234 if parser: 5235 return parser(self, this, data_type) 5236 5237 if ( 5238 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5239 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5240 and TIME_ZONE_RE.search(literal) 5241 ): 5242 data_type = exp.DataType.build("TIMESTAMPTZ") 5243 5244 return self.expression(exp.Cast, this=this, to=data_type) 5245 5246 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5247 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5248 # 5249 # If the index difference here is greater than 1, that means the parser itself must have 5250 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5251 # 5252 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5253 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5254 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5255 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5256 # 5257 # In these cases, we don't really want to return the converted type, but instead retreat 5258 # and try to parse a Column or Identifier in the section below. 5259 if data_type.expressions and index2 - index > 1: 5260 self._retreat(index2) 5261 return self._parse_column_ops(data_type) 5262 5263 self._retreat(index) 5264 5265 if fallback_to_identifier: 5266 return self._parse_id_var() 5267 5268 this = self._parse_column() 5269 return this and self._parse_column_ops(this) 5270 5271 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5272 this = self._parse_type() 5273 if not this: 5274 return None 5275 5276 if isinstance(this, exp.Column) and not this.table: 5277 this = exp.var(this.name.upper()) 5278 5279 return self.expression( 5280 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5281 ) 5282 5283 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5284 type_name = identifier.name 5285 5286 while self._match(TokenType.DOT): 5287 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5288 5289 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5290 5291 def _parse_types( 5292 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5293 ) -> t.Optional[exp.Expression]: 5294 index = self._index 5295 5296 this: t.Optional[exp.Expression] = None 5297 prefix = self._match_text_seq("SYSUDTLIB", ".") 5298 5299 if not self._match_set(self.TYPE_TOKENS): 5300 identifier = allow_identifiers and self._parse_id_var( 5301 any_token=False, tokens=(TokenType.VAR,) 5302 ) 5303 if isinstance(identifier, exp.Identifier): 5304 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5305 5306 if len(tokens) != 1: 5307 self.raise_error("Unexpected identifier", self._prev) 5308 5309 if tokens[0].token_type in self.TYPE_TOKENS: 5310 self._prev = tokens[0] 5311 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5312 this = self._parse_user_defined_type(identifier) 5313 else: 5314 self._retreat(self._index - 1) 5315 return None 5316 else: 5317 return None 5318 5319 type_token = self._prev.token_type 5320 5321 if type_token == TokenType.PSEUDO_TYPE: 5322 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5323 5324 if type_token == TokenType.OBJECT_IDENTIFIER: 5325 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5326 5327 # https://materialize.com/docs/sql/types/map/ 5328 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5329 key_type = self._parse_types( 5330 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5331 ) 5332 if not self._match(TokenType.FARROW): 5333 self._retreat(index) 5334 return None 5335 5336 value_type = self._parse_types( 5337 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5338 ) 5339 if not self._match(TokenType.R_BRACKET): 5340 self._retreat(index) 5341 return None 5342 5343 return exp.DataType( 5344 this=exp.DataType.Type.MAP, 5345 expressions=[key_type, value_type], 5346 nested=True, 5347 prefix=prefix, 5348 ) 5349 5350 nested = type_token in self.NESTED_TYPE_TOKENS 5351 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5352 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5353 expressions = None 5354 maybe_func = False 5355 5356 if self._match(TokenType.L_PAREN): 5357 if is_struct: 5358 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5359 elif nested: 5360 expressions = self._parse_csv( 5361 lambda: self._parse_types( 5362 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5363 ) 5364 ) 5365 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5366 this = expressions[0] 5367 this.set("nullable", True) 5368 self._match_r_paren() 5369 return this 5370 elif type_token in self.ENUM_TYPE_TOKENS: 5371 expressions = self._parse_csv(self._parse_equality) 5372 elif is_aggregate: 5373 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5374 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5375 ) 5376 if not func_or_ident: 5377 return None 5378 expressions = [func_or_ident] 5379 if self._match(TokenType.COMMA): 5380 expressions.extend( 5381 self._parse_csv( 5382 lambda: self._parse_types( 5383 check_func=check_func, 5384 schema=schema, 5385 allow_identifiers=allow_identifiers, 5386 ) 5387 ) 5388 ) 5389 else: 5390 expressions = self._parse_csv(self._parse_type_size) 5391 5392 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5393 if type_token == TokenType.VECTOR and len(expressions) == 2: 5394 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5395 5396 if not expressions or not self._match(TokenType.R_PAREN): 5397 self._retreat(index) 5398 return None 5399 5400 maybe_func = True 5401 5402 values: t.Optional[t.List[exp.Expression]] = None 5403 5404 if nested and self._match(TokenType.LT): 5405 if is_struct: 5406 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5407 else: 5408 expressions = self._parse_csv( 5409 lambda: self._parse_types( 5410 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5411 ) 5412 ) 5413 5414 if not self._match(TokenType.GT): 5415 self.raise_error("Expecting >") 5416 5417 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5418 values = self._parse_csv(self._parse_assignment) 5419 if not values and is_struct: 5420 values = None 5421 self._retreat(self._index - 1) 5422 else: 5423 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5424 5425 if type_token in self.TIMESTAMPS: 5426 if self._match_text_seq("WITH", "TIME", "ZONE"): 5427 maybe_func = False 5428 tz_type = ( 5429 exp.DataType.Type.TIMETZ 5430 if type_token in self.TIMES 5431 else exp.DataType.Type.TIMESTAMPTZ 5432 ) 5433 this = exp.DataType(this=tz_type, expressions=expressions) 5434 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5435 maybe_func = False 5436 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5437 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5438 maybe_func = False 5439 elif type_token == TokenType.INTERVAL: 5440 unit = self._parse_var(upper=True) 5441 if unit: 5442 if self._match_text_seq("TO"): 5443 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5444 5445 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5446 else: 5447 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5448 elif type_token == TokenType.VOID: 5449 this = exp.DataType(this=exp.DataType.Type.NULL) 5450 5451 if maybe_func and check_func: 5452 index2 = self._index 5453 peek = self._parse_string() 5454 5455 if not peek: 5456 self._retreat(index) 5457 return None 5458 5459 self._retreat(index2) 5460 5461 if not this: 5462 if self._match_text_seq("UNSIGNED"): 5463 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5464 if not unsigned_type_token: 5465 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5466 5467 type_token = unsigned_type_token or type_token 5468 5469 this = exp.DataType( 5470 this=exp.DataType.Type[type_token.value], 5471 expressions=expressions, 5472 nested=nested, 5473 prefix=prefix, 5474 ) 5475 5476 # Empty arrays/structs are allowed 5477 if values is not None: 5478 cls = exp.Struct if is_struct else exp.Array 5479 this = exp.cast(cls(expressions=values), this, copy=False) 5480 5481 elif expressions: 5482 this.set("expressions", expressions) 5483 5484 # https://materialize.com/docs/sql/types/list/#type-name 5485 while self._match(TokenType.LIST): 5486 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5487 5488 index = self._index 5489 5490 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5491 matched_array = self._match(TokenType.ARRAY) 5492 5493 while self._curr: 5494 datatype_token = self._prev.token_type 5495 matched_l_bracket = self._match(TokenType.L_BRACKET) 5496 5497 if (not matched_l_bracket and not matched_array) or ( 5498 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5499 ): 5500 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5501 # not to be confused with the fixed size array parsing 5502 break 5503 5504 matched_array = False 5505 values = self._parse_csv(self._parse_assignment) or None 5506 if ( 5507 values 5508 and not schema 5509 and ( 5510 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5511 ) 5512 ): 5513 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5514 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5515 self._retreat(index) 5516 break 5517 5518 this = exp.DataType( 5519 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5520 ) 5521 self._match(TokenType.R_BRACKET) 5522 5523 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5524 converter = self.TYPE_CONVERTERS.get(this.this) 5525 if converter: 5526 this = converter(t.cast(exp.DataType, this)) 5527 5528 return this 5529 5530 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5531 index = self._index 5532 5533 if ( 5534 self._curr 5535 and self._next 5536 and self._curr.token_type in self.TYPE_TOKENS 5537 and self._next.token_type in self.TYPE_TOKENS 5538 ): 5539 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5540 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5541 this = self._parse_id_var() 5542 else: 5543 this = ( 5544 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5545 or self._parse_id_var() 5546 ) 5547 5548 self._match(TokenType.COLON) 5549 5550 if ( 5551 type_required 5552 and not isinstance(this, exp.DataType) 5553 and not self._match_set(self.TYPE_TOKENS, advance=False) 5554 ): 5555 self._retreat(index) 5556 return self._parse_types() 5557 5558 return self._parse_column_def(this) 5559 5560 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5561 if not self._match_text_seq("AT", "TIME", "ZONE"): 5562 return this 5563 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5564 5565 def _parse_column(self) -> t.Optional[exp.Expression]: 5566 this = self._parse_column_reference() 5567 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5568 5569 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5570 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5571 5572 return column 5573 5574 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5575 this = self._parse_field() 5576 if ( 5577 not this 5578 and self._match(TokenType.VALUES, advance=False) 5579 and self.VALUES_FOLLOWED_BY_PAREN 5580 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5581 ): 5582 this = self._parse_id_var() 5583 5584 if isinstance(this, exp.Identifier): 5585 # We bubble up comments from the Identifier to the Column 5586 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5587 5588 return this 5589 5590 def _parse_colon_as_variant_extract( 5591 self, this: t.Optional[exp.Expression] 5592 ) -> t.Optional[exp.Expression]: 5593 casts = [] 5594 json_path = [] 5595 escape = None 5596 5597 while self._match(TokenType.COLON): 5598 start_index = self._index 5599 5600 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5601 path = self._parse_column_ops( 5602 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5603 ) 5604 5605 # The cast :: operator has a lower precedence than the extraction operator :, so 5606 # we rearrange the AST appropriately to avoid casting the JSON path 5607 while isinstance(path, exp.Cast): 5608 casts.append(path.to) 5609 path = path.this 5610 5611 if casts: 5612 dcolon_offset = next( 5613 i 5614 for i, t in enumerate(self._tokens[start_index:]) 5615 if t.token_type == TokenType.DCOLON 5616 ) 5617 end_token = self._tokens[start_index + dcolon_offset - 1] 5618 else: 5619 end_token = self._prev 5620 5621 if path: 5622 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5623 # it'll roundtrip to a string literal in GET_PATH 5624 if isinstance(path, exp.Identifier) and path.quoted: 5625 escape = True 5626 5627 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5628 5629 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5630 # Databricks transforms it back to the colon/dot notation 5631 if json_path: 5632 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5633 5634 if json_path_expr: 5635 json_path_expr.set("escape", escape) 5636 5637 this = self.expression( 5638 exp.JSONExtract, 5639 this=this, 5640 expression=json_path_expr, 5641 variant_extract=True, 5642 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5643 ) 5644 5645 while casts: 5646 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5647 5648 return this 5649 5650 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5651 return self._parse_types() 5652 5653 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5654 this = self._parse_bracket(this) 5655 5656 while self._match_set(self.COLUMN_OPERATORS): 5657 op_token = self._prev.token_type 5658 op = self.COLUMN_OPERATORS.get(op_token) 5659 5660 if op_token in self.CAST_COLUMN_OPERATORS: 5661 field = self._parse_dcolon() 5662 if not field: 5663 self.raise_error("Expected type") 5664 elif op and self._curr: 5665 field = self._parse_column_reference() or self._parse_bracket() 5666 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5667 field = self._parse_column_ops(field) 5668 else: 5669 field = self._parse_field(any_token=True, anonymous_func=True) 5670 5671 # Function calls can be qualified, e.g., x.y.FOO() 5672 # This converts the final AST to a series of Dots leading to the function call 5673 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5674 if isinstance(field, (exp.Func, exp.Window)) and this: 5675 this = this.transform( 5676 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5677 ) 5678 5679 if op: 5680 this = op(self, this, field) 5681 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5682 this = self.expression( 5683 exp.Column, 5684 comments=this.comments, 5685 this=field, 5686 table=this.this, 5687 db=this.args.get("table"), 5688 catalog=this.args.get("db"), 5689 ) 5690 elif isinstance(field, exp.Window): 5691 # Move the exp.Dot's to the window's function 5692 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5693 field.set("this", window_func) 5694 this = field 5695 else: 5696 this = self.expression(exp.Dot, this=this, expression=field) 5697 5698 if field and field.comments: 5699 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5700 5701 this = self._parse_bracket(this) 5702 5703 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5704 5705 def _parse_paren(self) -> t.Optional[exp.Expression]: 5706 if not self._match(TokenType.L_PAREN): 5707 return None 5708 5709 comments = self._prev_comments 5710 query = self._parse_select() 5711 5712 if query: 5713 expressions = [query] 5714 else: 5715 expressions = self._parse_expressions() 5716 5717 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5718 5719 if not this and self._match(TokenType.R_PAREN, advance=False): 5720 this = self.expression(exp.Tuple) 5721 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5722 this = self._parse_subquery(this=this, parse_alias=False) 5723 elif isinstance(this, exp.Subquery): 5724 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5725 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5726 this = self.expression(exp.Tuple, expressions=expressions) 5727 else: 5728 this = self.expression(exp.Paren, this=this) 5729 5730 if this: 5731 this.add_comments(comments) 5732 5733 self._match_r_paren(expression=this) 5734 return this 5735 5736 def _parse_primary(self) -> t.Optional[exp.Expression]: 5737 if self._match_set(self.PRIMARY_PARSERS): 5738 token_type = self._prev.token_type 5739 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5740 5741 if token_type == TokenType.STRING: 5742 expressions = [primary] 5743 while self._match(TokenType.STRING): 5744 expressions.append(exp.Literal.string(self._prev.text)) 5745 5746 if len(expressions) > 1: 5747 return self.expression(exp.Concat, expressions=expressions) 5748 5749 return primary 5750 5751 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5752 return exp.Literal.number(f"0.{self._prev.text}") 5753 5754 return self._parse_paren() 5755 5756 def _parse_field( 5757 self, 5758 any_token: bool = False, 5759 tokens: t.Optional[t.Collection[TokenType]] = None, 5760 anonymous_func: bool = False, 5761 ) -> t.Optional[exp.Expression]: 5762 if anonymous_func: 5763 field = ( 5764 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5765 or self._parse_primary() 5766 ) 5767 else: 5768 field = self._parse_primary() or self._parse_function( 5769 anonymous=anonymous_func, any_token=any_token 5770 ) 5771 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5772 5773 def _parse_function( 5774 self, 5775 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5776 anonymous: bool = False, 5777 optional_parens: bool = True, 5778 any_token: bool = False, 5779 ) -> t.Optional[exp.Expression]: 5780 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5781 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5782 fn_syntax = False 5783 if ( 5784 self._match(TokenType.L_BRACE, advance=False) 5785 and self._next 5786 and self._next.text.upper() == "FN" 5787 ): 5788 self._advance(2) 5789 fn_syntax = True 5790 5791 func = self._parse_function_call( 5792 functions=functions, 5793 anonymous=anonymous, 5794 optional_parens=optional_parens, 5795 any_token=any_token, 5796 ) 5797 5798 if fn_syntax: 5799 self._match(TokenType.R_BRACE) 5800 5801 return func 5802 5803 def _parse_function_call( 5804 self, 5805 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5806 anonymous: bool = False, 5807 optional_parens: bool = True, 5808 any_token: bool = False, 5809 ) -> t.Optional[exp.Expression]: 5810 if not self._curr: 5811 return None 5812 5813 comments = self._curr.comments 5814 prev = self._prev 5815 token = self._curr 5816 token_type = self._curr.token_type 5817 this = self._curr.text 5818 upper = this.upper() 5819 5820 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5821 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5822 self._advance() 5823 return self._parse_window(parser(self)) 5824 5825 if not self._next or self._next.token_type != TokenType.L_PAREN: 5826 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5827 self._advance() 5828 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5829 5830 return None 5831 5832 if any_token: 5833 if token_type in self.RESERVED_TOKENS: 5834 return None 5835 elif token_type not in self.FUNC_TOKENS: 5836 return None 5837 5838 self._advance(2) 5839 5840 parser = self.FUNCTION_PARSERS.get(upper) 5841 if parser and not anonymous: 5842 this = parser(self) 5843 else: 5844 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5845 5846 if subquery_predicate: 5847 expr = None 5848 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5849 expr = self._parse_select() 5850 self._match_r_paren() 5851 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5852 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5853 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5854 self._advance(-1) 5855 expr = self._parse_bitwise() 5856 5857 if expr: 5858 return self.expression(subquery_predicate, comments=comments, this=expr) 5859 5860 if functions is None: 5861 functions = self.FUNCTIONS 5862 5863 function = functions.get(upper) 5864 known_function = function and not anonymous 5865 5866 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5867 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5868 5869 post_func_comments = self._curr and self._curr.comments 5870 if known_function and post_func_comments: 5871 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5872 # call we'll construct it as exp.Anonymous, even if it's "known" 5873 if any( 5874 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5875 for comment in post_func_comments 5876 ): 5877 known_function = False 5878 5879 if alias and known_function: 5880 args = self._kv_to_prop_eq(args) 5881 5882 if known_function: 5883 func_builder = t.cast(t.Callable, function) 5884 5885 if "dialect" in func_builder.__code__.co_varnames: 5886 func = func_builder(args, dialect=self.dialect) 5887 else: 5888 func = func_builder(args) 5889 5890 func = self.validate_expression(func, args) 5891 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5892 func.meta["name"] = this 5893 5894 this = func 5895 else: 5896 if token_type == TokenType.IDENTIFIER: 5897 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5898 5899 this = self.expression(exp.Anonymous, this=this, expressions=args) 5900 this = this.update_positions(token) 5901 5902 if isinstance(this, exp.Expression): 5903 this.add_comments(comments) 5904 5905 self._match_r_paren(this) 5906 return self._parse_window(this) 5907 5908 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5909 return expression 5910 5911 def _kv_to_prop_eq( 5912 self, expressions: t.List[exp.Expression], parse_map: bool = False 5913 ) -> t.List[exp.Expression]: 5914 transformed = [] 5915 5916 for index, e in enumerate(expressions): 5917 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5918 if isinstance(e, exp.Alias): 5919 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5920 5921 if not isinstance(e, exp.PropertyEQ): 5922 e = self.expression( 5923 exp.PropertyEQ, 5924 this=e.this if parse_map else exp.to_identifier(e.this.name), 5925 expression=e.expression, 5926 ) 5927 5928 if isinstance(e.this, exp.Column): 5929 e.this.replace(e.this.this) 5930 else: 5931 e = self._to_prop_eq(e, index) 5932 5933 transformed.append(e) 5934 5935 return transformed 5936 5937 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5938 return self._parse_statement() 5939 5940 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5941 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5942 5943 def _parse_user_defined_function( 5944 self, kind: t.Optional[TokenType] = None 5945 ) -> t.Optional[exp.Expression]: 5946 this = self._parse_table_parts(schema=True) 5947 5948 if not self._match(TokenType.L_PAREN): 5949 return this 5950 5951 expressions = self._parse_csv(self._parse_function_parameter) 5952 self._match_r_paren() 5953 return self.expression( 5954 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5955 ) 5956 5957 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5958 literal = self._parse_primary() 5959 if literal: 5960 return self.expression(exp.Introducer, this=token.text, expression=literal) 5961 5962 return self._identifier_expression(token) 5963 5964 def _parse_session_parameter(self) -> exp.SessionParameter: 5965 kind = None 5966 this = self._parse_id_var() or self._parse_primary() 5967 5968 if this and self._match(TokenType.DOT): 5969 kind = this.name 5970 this = self._parse_var() or self._parse_primary() 5971 5972 return self.expression(exp.SessionParameter, this=this, kind=kind) 5973 5974 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5975 return self._parse_id_var() 5976 5977 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5978 index = self._index 5979 5980 if self._match(TokenType.L_PAREN): 5981 expressions = t.cast( 5982 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5983 ) 5984 5985 if not self._match(TokenType.R_PAREN): 5986 self._retreat(index) 5987 else: 5988 expressions = [self._parse_lambda_arg()] 5989 5990 if self._match_set(self.LAMBDAS): 5991 return self.LAMBDAS[self._prev.token_type](self, expressions) 5992 5993 self._retreat(index) 5994 5995 this: t.Optional[exp.Expression] 5996 5997 if self._match(TokenType.DISTINCT): 5998 this = self.expression( 5999 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6000 ) 6001 else: 6002 this = self._parse_select_or_expression(alias=alias) 6003 6004 return self._parse_limit( 6005 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6006 ) 6007 6008 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6009 index = self._index 6010 if not self._match(TokenType.L_PAREN): 6011 return this 6012 6013 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6014 # expr can be of both types 6015 if self._match_set(self.SELECT_START_TOKENS): 6016 self._retreat(index) 6017 return this 6018 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6019 self._match_r_paren() 6020 return self.expression(exp.Schema, this=this, expressions=args) 6021 6022 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6023 return self._parse_column_def(self._parse_field(any_token=True)) 6024 6025 def _parse_column_def( 6026 self, this: t.Optional[exp.Expression], computed_column: bool = True 6027 ) -> t.Optional[exp.Expression]: 6028 # column defs are not really columns, they're identifiers 6029 if isinstance(this, exp.Column): 6030 this = this.this 6031 6032 if not computed_column: 6033 self._match(TokenType.ALIAS) 6034 6035 kind = self._parse_types(schema=True) 6036 6037 if self._match_text_seq("FOR", "ORDINALITY"): 6038 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6039 6040 constraints: t.List[exp.Expression] = [] 6041 6042 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6043 ("ALIAS", "MATERIALIZED") 6044 ): 6045 persisted = self._prev.text.upper() == "MATERIALIZED" 6046 constraint_kind = exp.ComputedColumnConstraint( 6047 this=self._parse_assignment(), 6048 persisted=persisted or self._match_text_seq("PERSISTED"), 6049 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6050 ) 6051 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6052 elif ( 6053 kind 6054 and self._match(TokenType.ALIAS, advance=False) 6055 and ( 6056 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6057 or (self._next and self._next.token_type == TokenType.L_PAREN) 6058 ) 6059 ): 6060 self._advance() 6061 constraints.append( 6062 self.expression( 6063 exp.ColumnConstraint, 6064 kind=exp.ComputedColumnConstraint( 6065 this=self._parse_disjunction(), 6066 persisted=self._match_texts(("STORED", "VIRTUAL")) 6067 and self._prev.text.upper() == "STORED", 6068 ), 6069 ) 6070 ) 6071 6072 while True: 6073 constraint = self._parse_column_constraint() 6074 if not constraint: 6075 break 6076 constraints.append(constraint) 6077 6078 if not kind and not constraints: 6079 return this 6080 6081 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6082 6083 def _parse_auto_increment( 6084 self, 6085 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6086 start = None 6087 increment = None 6088 order = None 6089 6090 if self._match(TokenType.L_PAREN, advance=False): 6091 args = self._parse_wrapped_csv(self._parse_bitwise) 6092 start = seq_get(args, 0) 6093 increment = seq_get(args, 1) 6094 elif self._match_text_seq("START"): 6095 start = self._parse_bitwise() 6096 self._match_text_seq("INCREMENT") 6097 increment = self._parse_bitwise() 6098 if self._match_text_seq("ORDER"): 6099 order = True 6100 elif self._match_text_seq("NOORDER"): 6101 order = False 6102 6103 if start and increment: 6104 return exp.GeneratedAsIdentityColumnConstraint( 6105 start=start, increment=increment, this=False, order=order 6106 ) 6107 6108 return exp.AutoIncrementColumnConstraint() 6109 6110 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6111 if not self._match_text_seq("REFRESH"): 6112 self._retreat(self._index - 1) 6113 return None 6114 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6115 6116 def _parse_compress(self) -> exp.CompressColumnConstraint: 6117 if self._match(TokenType.L_PAREN, advance=False): 6118 return self.expression( 6119 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6120 ) 6121 6122 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6123 6124 def _parse_generated_as_identity( 6125 self, 6126 ) -> ( 6127 exp.GeneratedAsIdentityColumnConstraint 6128 | exp.ComputedColumnConstraint 6129 | exp.GeneratedAsRowColumnConstraint 6130 ): 6131 if self._match_text_seq("BY", "DEFAULT"): 6132 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6133 this = self.expression( 6134 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6135 ) 6136 else: 6137 self._match_text_seq("ALWAYS") 6138 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6139 6140 self._match(TokenType.ALIAS) 6141 6142 if self._match_text_seq("ROW"): 6143 start = self._match_text_seq("START") 6144 if not start: 6145 self._match(TokenType.END) 6146 hidden = self._match_text_seq("HIDDEN") 6147 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6148 6149 identity = self._match_text_seq("IDENTITY") 6150 6151 if self._match(TokenType.L_PAREN): 6152 if self._match(TokenType.START_WITH): 6153 this.set("start", self._parse_bitwise()) 6154 if self._match_text_seq("INCREMENT", "BY"): 6155 this.set("increment", self._parse_bitwise()) 6156 if self._match_text_seq("MINVALUE"): 6157 this.set("minvalue", self._parse_bitwise()) 6158 if self._match_text_seq("MAXVALUE"): 6159 this.set("maxvalue", self._parse_bitwise()) 6160 6161 if self._match_text_seq("CYCLE"): 6162 this.set("cycle", True) 6163 elif self._match_text_seq("NO", "CYCLE"): 6164 this.set("cycle", False) 6165 6166 if not identity: 6167 this.set("expression", self._parse_range()) 6168 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6169 args = self._parse_csv(self._parse_bitwise) 6170 this.set("start", seq_get(args, 0)) 6171 this.set("increment", seq_get(args, 1)) 6172 6173 self._match_r_paren() 6174 6175 return this 6176 6177 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6178 self._match_text_seq("LENGTH") 6179 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6180 6181 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6182 if self._match_text_seq("NULL"): 6183 return self.expression(exp.NotNullColumnConstraint) 6184 if self._match_text_seq("CASESPECIFIC"): 6185 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6186 if self._match_text_seq("FOR", "REPLICATION"): 6187 return self.expression(exp.NotForReplicationColumnConstraint) 6188 6189 # Unconsume the `NOT` token 6190 self._retreat(self._index - 1) 6191 return None 6192 6193 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6194 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6195 6196 procedure_option_follows = ( 6197 self._match(TokenType.WITH, advance=False) 6198 and self._next 6199 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6200 ) 6201 6202 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6203 return self.expression( 6204 exp.ColumnConstraint, 6205 this=this, 6206 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6207 ) 6208 6209 return this 6210 6211 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6212 if not self._match(TokenType.CONSTRAINT): 6213 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6214 6215 return self.expression( 6216 exp.Constraint, 6217 this=self._parse_id_var(), 6218 expressions=self._parse_unnamed_constraints(), 6219 ) 6220 6221 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6222 constraints = [] 6223 while True: 6224 constraint = self._parse_unnamed_constraint() or self._parse_function() 6225 if not constraint: 6226 break 6227 constraints.append(constraint) 6228 6229 return constraints 6230 6231 def _parse_unnamed_constraint( 6232 self, constraints: t.Optional[t.Collection[str]] = None 6233 ) -> t.Optional[exp.Expression]: 6234 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6235 constraints or self.CONSTRAINT_PARSERS 6236 ): 6237 return None 6238 6239 constraint = self._prev.text.upper() 6240 if constraint not in self.CONSTRAINT_PARSERS: 6241 self.raise_error(f"No parser found for schema constraint {constraint}.") 6242 6243 return self.CONSTRAINT_PARSERS[constraint](self) 6244 6245 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6246 return self._parse_id_var(any_token=False) 6247 6248 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6249 self._match_texts(("KEY", "INDEX")) 6250 return self.expression( 6251 exp.UniqueColumnConstraint, 6252 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6253 this=self._parse_schema(self._parse_unique_key()), 6254 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6255 on_conflict=self._parse_on_conflict(), 6256 options=self._parse_key_constraint_options(), 6257 ) 6258 6259 def _parse_key_constraint_options(self) -> t.List[str]: 6260 options = [] 6261 while True: 6262 if not self._curr: 6263 break 6264 6265 if self._match(TokenType.ON): 6266 action = None 6267 on = self._advance_any() and self._prev.text 6268 6269 if self._match_text_seq("NO", "ACTION"): 6270 action = "NO ACTION" 6271 elif self._match_text_seq("CASCADE"): 6272 action = "CASCADE" 6273 elif self._match_text_seq("RESTRICT"): 6274 action = "RESTRICT" 6275 elif self._match_pair(TokenType.SET, TokenType.NULL): 6276 action = "SET NULL" 6277 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6278 action = "SET DEFAULT" 6279 else: 6280 self.raise_error("Invalid key constraint") 6281 6282 options.append(f"ON {on} {action}") 6283 else: 6284 var = self._parse_var_from_options( 6285 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6286 ) 6287 if not var: 6288 break 6289 options.append(var.name) 6290 6291 return options 6292 6293 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6294 if match and not self._match(TokenType.REFERENCES): 6295 return None 6296 6297 expressions = None 6298 this = self._parse_table(schema=True) 6299 options = self._parse_key_constraint_options() 6300 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6301 6302 def _parse_foreign_key(self) -> exp.ForeignKey: 6303 expressions = ( 6304 self._parse_wrapped_id_vars() 6305 if not self._match(TokenType.REFERENCES, advance=False) 6306 else None 6307 ) 6308 reference = self._parse_references() 6309 on_options = {} 6310 6311 while self._match(TokenType.ON): 6312 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6313 self.raise_error("Expected DELETE or UPDATE") 6314 6315 kind = self._prev.text.lower() 6316 6317 if self._match_text_seq("NO", "ACTION"): 6318 action = "NO ACTION" 6319 elif self._match(TokenType.SET): 6320 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6321 action = "SET " + self._prev.text.upper() 6322 else: 6323 self._advance() 6324 action = self._prev.text.upper() 6325 6326 on_options[kind] = action 6327 6328 return self.expression( 6329 exp.ForeignKey, 6330 expressions=expressions, 6331 reference=reference, 6332 options=self._parse_key_constraint_options(), 6333 **on_options, # type: ignore 6334 ) 6335 6336 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6337 return self._parse_ordered() or self._parse_field() 6338 6339 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6340 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6341 self._retreat(self._index - 1) 6342 return None 6343 6344 id_vars = self._parse_wrapped_id_vars() 6345 return self.expression( 6346 exp.PeriodForSystemTimeConstraint, 6347 this=seq_get(id_vars, 0), 6348 expression=seq_get(id_vars, 1), 6349 ) 6350 6351 def _parse_primary_key( 6352 self, wrapped_optional: bool = False, in_props: bool = False 6353 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6354 desc = ( 6355 self._match_set((TokenType.ASC, TokenType.DESC)) 6356 and self._prev.token_type == TokenType.DESC 6357 ) 6358 6359 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6360 return self.expression( 6361 exp.PrimaryKeyColumnConstraint, 6362 desc=desc, 6363 options=self._parse_key_constraint_options(), 6364 ) 6365 6366 expressions = self._parse_wrapped_csv( 6367 self._parse_primary_key_part, optional=wrapped_optional 6368 ) 6369 6370 return self.expression( 6371 exp.PrimaryKey, 6372 expressions=expressions, 6373 include=self._parse_index_params(), 6374 options=self._parse_key_constraint_options(), 6375 ) 6376 6377 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6378 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6379 6380 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6381 """ 6382 Parses a datetime column in ODBC format. We parse the column into the corresponding 6383 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6384 same as we did for `DATE('yyyy-mm-dd')`. 6385 6386 Reference: 6387 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6388 """ 6389 self._match(TokenType.VAR) 6390 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6391 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6392 if not self._match(TokenType.R_BRACE): 6393 self.raise_error("Expected }") 6394 return expression 6395 6396 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6397 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6398 return this 6399 6400 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6401 map_token = seq_get(self._tokens, self._index - 2) 6402 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6403 else: 6404 parse_map = False 6405 6406 bracket_kind = self._prev.token_type 6407 if ( 6408 bracket_kind == TokenType.L_BRACE 6409 and self._curr 6410 and self._curr.token_type == TokenType.VAR 6411 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6412 ): 6413 return self._parse_odbc_datetime_literal() 6414 6415 expressions = self._parse_csv( 6416 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6417 ) 6418 6419 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6420 self.raise_error("Expected ]") 6421 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6422 self.raise_error("Expected }") 6423 6424 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6425 if bracket_kind == TokenType.L_BRACE: 6426 this = self.expression( 6427 exp.Struct, 6428 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6429 ) 6430 elif not this: 6431 this = build_array_constructor( 6432 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6433 ) 6434 else: 6435 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6436 if constructor_type: 6437 return build_array_constructor( 6438 constructor_type, 6439 args=expressions, 6440 bracket_kind=bracket_kind, 6441 dialect=self.dialect, 6442 ) 6443 6444 expressions = apply_index_offset( 6445 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6446 ) 6447 this = self.expression( 6448 exp.Bracket, 6449 this=this, 6450 expressions=expressions, 6451 comments=this.pop_comments(), 6452 ) 6453 6454 self._add_comments(this) 6455 return self._parse_bracket(this) 6456 6457 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6458 if self._match(TokenType.COLON): 6459 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6460 return this 6461 6462 def _parse_case(self) -> t.Optional[exp.Expression]: 6463 ifs = [] 6464 default = None 6465 6466 comments = self._prev_comments 6467 expression = self._parse_assignment() 6468 6469 while self._match(TokenType.WHEN): 6470 this = self._parse_assignment() 6471 self._match(TokenType.THEN) 6472 then = self._parse_assignment() 6473 ifs.append(self.expression(exp.If, this=this, true=then)) 6474 6475 if self._match(TokenType.ELSE): 6476 default = self._parse_assignment() 6477 6478 if not self._match(TokenType.END): 6479 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6480 default = exp.column("interval") 6481 else: 6482 self.raise_error("Expected END after CASE", self._prev) 6483 6484 return self.expression( 6485 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6486 ) 6487 6488 def _parse_if(self) -> t.Optional[exp.Expression]: 6489 if self._match(TokenType.L_PAREN): 6490 args = self._parse_csv( 6491 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6492 ) 6493 this = self.validate_expression(exp.If.from_arg_list(args), args) 6494 self._match_r_paren() 6495 else: 6496 index = self._index - 1 6497 6498 if self.NO_PAREN_IF_COMMANDS and index == 0: 6499 return self._parse_as_command(self._prev) 6500 6501 condition = self._parse_assignment() 6502 6503 if not condition: 6504 self._retreat(index) 6505 return None 6506 6507 self._match(TokenType.THEN) 6508 true = self._parse_assignment() 6509 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6510 self._match(TokenType.END) 6511 this = self.expression(exp.If, this=condition, true=true, false=false) 6512 6513 return this 6514 6515 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6516 if not self._match_text_seq("VALUE", "FOR"): 6517 self._retreat(self._index - 1) 6518 return None 6519 6520 return self.expression( 6521 exp.NextValueFor, 6522 this=self._parse_column(), 6523 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6524 ) 6525 6526 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6527 this = self._parse_function() or self._parse_var_or_string(upper=True) 6528 6529 if self._match(TokenType.FROM): 6530 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6531 6532 if not self._match(TokenType.COMMA): 6533 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6534 6535 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6536 6537 def _parse_gap_fill(self) -> exp.GapFill: 6538 self._match(TokenType.TABLE) 6539 this = self._parse_table() 6540 6541 self._match(TokenType.COMMA) 6542 args = [this, *self._parse_csv(self._parse_lambda)] 6543 6544 gap_fill = exp.GapFill.from_arg_list(args) 6545 return self.validate_expression(gap_fill, args) 6546 6547 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6548 this = self._parse_assignment() 6549 6550 if not self._match(TokenType.ALIAS): 6551 if self._match(TokenType.COMMA): 6552 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6553 6554 self.raise_error("Expected AS after CAST") 6555 6556 fmt = None 6557 to = self._parse_types() 6558 6559 default = self._match(TokenType.DEFAULT) 6560 if default: 6561 default = self._parse_bitwise() 6562 self._match_text_seq("ON", "CONVERSION", "ERROR") 6563 6564 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6565 fmt_string = self._parse_string() 6566 fmt = self._parse_at_time_zone(fmt_string) 6567 6568 if not to: 6569 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6570 if to.this in exp.DataType.TEMPORAL_TYPES: 6571 this = self.expression( 6572 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6573 this=this, 6574 format=exp.Literal.string( 6575 format_time( 6576 fmt_string.this if fmt_string else "", 6577 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6578 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6579 ) 6580 ), 6581 safe=safe, 6582 ) 6583 6584 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6585 this.set("zone", fmt.args["zone"]) 6586 return this 6587 elif not to: 6588 self.raise_error("Expected TYPE after CAST") 6589 elif isinstance(to, exp.Identifier): 6590 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6591 elif to.this == exp.DataType.Type.CHAR: 6592 if self._match(TokenType.CHARACTER_SET): 6593 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6594 6595 return self.build_cast( 6596 strict=strict, 6597 this=this, 6598 to=to, 6599 format=fmt, 6600 safe=safe, 6601 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6602 default=default, 6603 ) 6604 6605 def _parse_string_agg(self) -> exp.GroupConcat: 6606 if self._match(TokenType.DISTINCT): 6607 args: t.List[t.Optional[exp.Expression]] = [ 6608 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6609 ] 6610 if self._match(TokenType.COMMA): 6611 args.extend(self._parse_csv(self._parse_assignment)) 6612 else: 6613 args = self._parse_csv(self._parse_assignment) # type: ignore 6614 6615 if self._match_text_seq("ON", "OVERFLOW"): 6616 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6617 if self._match_text_seq("ERROR"): 6618 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6619 else: 6620 self._match_text_seq("TRUNCATE") 6621 on_overflow = self.expression( 6622 exp.OverflowTruncateBehavior, 6623 this=self._parse_string(), 6624 with_count=( 6625 self._match_text_seq("WITH", "COUNT") 6626 or not self._match_text_seq("WITHOUT", "COUNT") 6627 ), 6628 ) 6629 else: 6630 on_overflow = None 6631 6632 index = self._index 6633 if not self._match(TokenType.R_PAREN) and args: 6634 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6635 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6636 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6637 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6638 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6639 6640 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6641 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6642 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6643 if not self._match_text_seq("WITHIN", "GROUP"): 6644 self._retreat(index) 6645 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6646 6647 # The corresponding match_r_paren will be called in parse_function (caller) 6648 self._match_l_paren() 6649 6650 return self.expression( 6651 exp.GroupConcat, 6652 this=self._parse_order(this=seq_get(args, 0)), 6653 separator=seq_get(args, 1), 6654 on_overflow=on_overflow, 6655 ) 6656 6657 def _parse_convert( 6658 self, strict: bool, safe: t.Optional[bool] = None 6659 ) -> t.Optional[exp.Expression]: 6660 this = self._parse_bitwise() 6661 6662 if self._match(TokenType.USING): 6663 to: t.Optional[exp.Expression] = self.expression( 6664 exp.CharacterSet, this=self._parse_var() 6665 ) 6666 elif self._match(TokenType.COMMA): 6667 to = self._parse_types() 6668 else: 6669 to = None 6670 6671 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6672 6673 def _parse_xml_table(self) -> exp.XMLTable: 6674 namespaces = None 6675 passing = None 6676 columns = None 6677 6678 if self._match_text_seq("XMLNAMESPACES", "("): 6679 namespaces = self._parse_xml_namespace() 6680 self._match_text_seq(")", ",") 6681 6682 this = self._parse_string() 6683 6684 if self._match_text_seq("PASSING"): 6685 # The BY VALUE keywords are optional and are provided for semantic clarity 6686 self._match_text_seq("BY", "VALUE") 6687 passing = self._parse_csv(self._parse_column) 6688 6689 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6690 6691 if self._match_text_seq("COLUMNS"): 6692 columns = self._parse_csv(self._parse_field_def) 6693 6694 return self.expression( 6695 exp.XMLTable, 6696 this=this, 6697 namespaces=namespaces, 6698 passing=passing, 6699 columns=columns, 6700 by_ref=by_ref, 6701 ) 6702 6703 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6704 namespaces = [] 6705 6706 while True: 6707 if self._match(TokenType.DEFAULT): 6708 uri = self._parse_string() 6709 else: 6710 uri = self._parse_alias(self._parse_string()) 6711 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6712 if not self._match(TokenType.COMMA): 6713 break 6714 6715 return namespaces 6716 6717 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6718 args = self._parse_csv(self._parse_assignment) 6719 6720 if len(args) < 3: 6721 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6722 6723 return self.expression(exp.DecodeCase, expressions=args) 6724 6725 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6726 self._match_text_seq("KEY") 6727 key = self._parse_column() 6728 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6729 self._match_text_seq("VALUE") 6730 value = self._parse_bitwise() 6731 6732 if not key and not value: 6733 return None 6734 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6735 6736 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6737 if not this or not self._match_text_seq("FORMAT", "JSON"): 6738 return this 6739 6740 return self.expression(exp.FormatJson, this=this) 6741 6742 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6743 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6744 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6745 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6746 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6747 else: 6748 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6749 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6750 6751 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6752 6753 if not empty and not error and not null: 6754 return None 6755 6756 return self.expression( 6757 exp.OnCondition, 6758 empty=empty, 6759 error=error, 6760 null=null, 6761 ) 6762 6763 def _parse_on_handling( 6764 self, on: str, *values: str 6765 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6766 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6767 for value in values: 6768 if self._match_text_seq(value, "ON", on): 6769 return f"{value} ON {on}" 6770 6771 index = self._index 6772 if self._match(TokenType.DEFAULT): 6773 default_value = self._parse_bitwise() 6774 if self._match_text_seq("ON", on): 6775 return default_value 6776 6777 self._retreat(index) 6778 6779 return None 6780 6781 @t.overload 6782 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6783 6784 @t.overload 6785 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6786 6787 def _parse_json_object(self, agg=False): 6788 star = self._parse_star() 6789 expressions = ( 6790 [star] 6791 if star 6792 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6793 ) 6794 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6795 6796 unique_keys = None 6797 if self._match_text_seq("WITH", "UNIQUE"): 6798 unique_keys = True 6799 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6800 unique_keys = False 6801 6802 self._match_text_seq("KEYS") 6803 6804 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6805 self._parse_type() 6806 ) 6807 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6808 6809 return self.expression( 6810 exp.JSONObjectAgg if agg else exp.JSONObject, 6811 expressions=expressions, 6812 null_handling=null_handling, 6813 unique_keys=unique_keys, 6814 return_type=return_type, 6815 encoding=encoding, 6816 ) 6817 6818 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6819 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6820 if not self._match_text_seq("NESTED"): 6821 this = self._parse_id_var() 6822 kind = self._parse_types(allow_identifiers=False) 6823 nested = None 6824 else: 6825 this = None 6826 kind = None 6827 nested = True 6828 6829 path = self._match_text_seq("PATH") and self._parse_string() 6830 nested_schema = nested and self._parse_json_schema() 6831 6832 return self.expression( 6833 exp.JSONColumnDef, 6834 this=this, 6835 kind=kind, 6836 path=path, 6837 nested_schema=nested_schema, 6838 ) 6839 6840 def _parse_json_schema(self) -> exp.JSONSchema: 6841 self._match_text_seq("COLUMNS") 6842 return self.expression( 6843 exp.JSONSchema, 6844 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6845 ) 6846 6847 def _parse_json_table(self) -> exp.JSONTable: 6848 this = self._parse_format_json(self._parse_bitwise()) 6849 path = self._match(TokenType.COMMA) and self._parse_string() 6850 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6851 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6852 schema = self._parse_json_schema() 6853 6854 return exp.JSONTable( 6855 this=this, 6856 schema=schema, 6857 path=path, 6858 error_handling=error_handling, 6859 empty_handling=empty_handling, 6860 ) 6861 6862 def _parse_match_against(self) -> exp.MatchAgainst: 6863 expressions = self._parse_csv(self._parse_column) 6864 6865 self._match_text_seq(")", "AGAINST", "(") 6866 6867 this = self._parse_string() 6868 6869 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6870 modifier = "IN NATURAL LANGUAGE MODE" 6871 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6872 modifier = f"{modifier} WITH QUERY EXPANSION" 6873 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6874 modifier = "IN BOOLEAN MODE" 6875 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6876 modifier = "WITH QUERY EXPANSION" 6877 else: 6878 modifier = None 6879 6880 return self.expression( 6881 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6882 ) 6883 6884 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6885 def _parse_open_json(self) -> exp.OpenJSON: 6886 this = self._parse_bitwise() 6887 path = self._match(TokenType.COMMA) and self._parse_string() 6888 6889 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6890 this = self._parse_field(any_token=True) 6891 kind = self._parse_types() 6892 path = self._parse_string() 6893 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6894 6895 return self.expression( 6896 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6897 ) 6898 6899 expressions = None 6900 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6901 self._match_l_paren() 6902 expressions = self._parse_csv(_parse_open_json_column_def) 6903 6904 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6905 6906 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6907 args = self._parse_csv(self._parse_bitwise) 6908 6909 if self._match(TokenType.IN): 6910 return self.expression( 6911 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6912 ) 6913 6914 if haystack_first: 6915 haystack = seq_get(args, 0) 6916 needle = seq_get(args, 1) 6917 else: 6918 haystack = seq_get(args, 1) 6919 needle = seq_get(args, 0) 6920 6921 return self.expression( 6922 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6923 ) 6924 6925 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6926 args = self._parse_csv(self._parse_table) 6927 return exp.JoinHint(this=func_name.upper(), expressions=args) 6928 6929 def _parse_substring(self) -> exp.Substring: 6930 # Postgres supports the form: substring(string [from int] [for int]) 6931 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6932 6933 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6934 6935 if self._match(TokenType.FROM): 6936 args.append(self._parse_bitwise()) 6937 if self._match(TokenType.FOR): 6938 if len(args) == 1: 6939 args.append(exp.Literal.number(1)) 6940 args.append(self._parse_bitwise()) 6941 6942 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6943 6944 def _parse_trim(self) -> exp.Trim: 6945 # https://www.w3resource.com/sql/character-functions/trim.php 6946 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6947 6948 position = None 6949 collation = None 6950 expression = None 6951 6952 if self._match_texts(self.TRIM_TYPES): 6953 position = self._prev.text.upper() 6954 6955 this = self._parse_bitwise() 6956 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6957 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6958 expression = self._parse_bitwise() 6959 6960 if invert_order: 6961 this, expression = expression, this 6962 6963 if self._match(TokenType.COLLATE): 6964 collation = self._parse_bitwise() 6965 6966 return self.expression( 6967 exp.Trim, this=this, position=position, expression=expression, collation=collation 6968 ) 6969 6970 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6971 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6972 6973 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6974 return self._parse_window(self._parse_id_var(), alias=True) 6975 6976 def _parse_respect_or_ignore_nulls( 6977 self, this: t.Optional[exp.Expression] 6978 ) -> t.Optional[exp.Expression]: 6979 if self._match_text_seq("IGNORE", "NULLS"): 6980 return self.expression(exp.IgnoreNulls, this=this) 6981 if self._match_text_seq("RESPECT", "NULLS"): 6982 return self.expression(exp.RespectNulls, this=this) 6983 return this 6984 6985 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6986 if self._match(TokenType.HAVING): 6987 self._match_texts(("MAX", "MIN")) 6988 max = self._prev.text.upper() != "MIN" 6989 return self.expression( 6990 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6991 ) 6992 6993 return this 6994 6995 def _parse_window( 6996 self, this: t.Optional[exp.Expression], alias: bool = False 6997 ) -> t.Optional[exp.Expression]: 6998 func = this 6999 comments = func.comments if isinstance(func, exp.Expression) else None 7000 7001 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7002 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7003 if self._match_text_seq("WITHIN", "GROUP"): 7004 order = self._parse_wrapped(self._parse_order) 7005 this = self.expression(exp.WithinGroup, this=this, expression=order) 7006 7007 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7008 self._match(TokenType.WHERE) 7009 this = self.expression( 7010 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7011 ) 7012 self._match_r_paren() 7013 7014 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7015 # Some dialects choose to implement and some do not. 7016 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7017 7018 # There is some code above in _parse_lambda that handles 7019 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7020 7021 # The below changes handle 7022 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7023 7024 # Oracle allows both formats 7025 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7026 # and Snowflake chose to do the same for familiarity 7027 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7028 if isinstance(this, exp.AggFunc): 7029 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7030 7031 if ignore_respect and ignore_respect is not this: 7032 ignore_respect.replace(ignore_respect.this) 7033 this = self.expression(ignore_respect.__class__, this=this) 7034 7035 this = self._parse_respect_or_ignore_nulls(this) 7036 7037 # bigquery select from window x AS (partition by ...) 7038 if alias: 7039 over = None 7040 self._match(TokenType.ALIAS) 7041 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7042 return this 7043 else: 7044 over = self._prev.text.upper() 7045 7046 if comments and isinstance(func, exp.Expression): 7047 func.pop_comments() 7048 7049 if not self._match(TokenType.L_PAREN): 7050 return self.expression( 7051 exp.Window, 7052 comments=comments, 7053 this=this, 7054 alias=self._parse_id_var(False), 7055 over=over, 7056 ) 7057 7058 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7059 7060 first = self._match(TokenType.FIRST) 7061 if self._match_text_seq("LAST"): 7062 first = False 7063 7064 partition, order = self._parse_partition_and_order() 7065 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7066 7067 if kind: 7068 self._match(TokenType.BETWEEN) 7069 start = self._parse_window_spec() 7070 self._match(TokenType.AND) 7071 end = self._parse_window_spec() 7072 exclude = ( 7073 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7074 if self._match_text_seq("EXCLUDE") 7075 else None 7076 ) 7077 7078 spec = self.expression( 7079 exp.WindowSpec, 7080 kind=kind, 7081 start=start["value"], 7082 start_side=start["side"], 7083 end=end["value"], 7084 end_side=end["side"], 7085 exclude=exclude, 7086 ) 7087 else: 7088 spec = None 7089 7090 self._match_r_paren() 7091 7092 window = self.expression( 7093 exp.Window, 7094 comments=comments, 7095 this=this, 7096 partition_by=partition, 7097 order=order, 7098 spec=spec, 7099 alias=window_alias, 7100 over=over, 7101 first=first, 7102 ) 7103 7104 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7105 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7106 return self._parse_window(window, alias=alias) 7107 7108 return window 7109 7110 def _parse_partition_and_order( 7111 self, 7112 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7113 return self._parse_partition_by(), self._parse_order() 7114 7115 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7116 self._match(TokenType.BETWEEN) 7117 7118 return { 7119 "value": ( 7120 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7121 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7122 or self._parse_bitwise() 7123 ), 7124 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7125 } 7126 7127 def _parse_alias( 7128 self, this: t.Optional[exp.Expression], explicit: bool = False 7129 ) -> t.Optional[exp.Expression]: 7130 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7131 # so this section tries to parse the clause version and if it fails, it treats the token 7132 # as an identifier (alias) 7133 if self._can_parse_limit_or_offset(): 7134 return this 7135 7136 any_token = self._match(TokenType.ALIAS) 7137 comments = self._prev_comments or [] 7138 7139 if explicit and not any_token: 7140 return this 7141 7142 if self._match(TokenType.L_PAREN): 7143 aliases = self.expression( 7144 exp.Aliases, 7145 comments=comments, 7146 this=this, 7147 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7148 ) 7149 self._match_r_paren(aliases) 7150 return aliases 7151 7152 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7153 self.STRING_ALIASES and self._parse_string_as_identifier() 7154 ) 7155 7156 if alias: 7157 comments.extend(alias.pop_comments()) 7158 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7159 column = this.this 7160 7161 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7162 if not this.comments and column and column.comments: 7163 this.comments = column.pop_comments() 7164 7165 return this 7166 7167 def _parse_id_var( 7168 self, 7169 any_token: bool = True, 7170 tokens: t.Optional[t.Collection[TokenType]] = None, 7171 ) -> t.Optional[exp.Expression]: 7172 expression = self._parse_identifier() 7173 if not expression and ( 7174 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7175 ): 7176 quoted = self._prev.token_type == TokenType.STRING 7177 expression = self._identifier_expression(quoted=quoted) 7178 7179 return expression 7180 7181 def _parse_string(self) -> t.Optional[exp.Expression]: 7182 if self._match_set(self.STRING_PARSERS): 7183 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7184 return self._parse_placeholder() 7185 7186 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7187 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7188 if output: 7189 output.update_positions(self._prev) 7190 return output 7191 7192 def _parse_number(self) -> t.Optional[exp.Expression]: 7193 if self._match_set(self.NUMERIC_PARSERS): 7194 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7195 return self._parse_placeholder() 7196 7197 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7198 if self._match(TokenType.IDENTIFIER): 7199 return self._identifier_expression(quoted=True) 7200 return self._parse_placeholder() 7201 7202 def _parse_var( 7203 self, 7204 any_token: bool = False, 7205 tokens: t.Optional[t.Collection[TokenType]] = None, 7206 upper: bool = False, 7207 ) -> t.Optional[exp.Expression]: 7208 if ( 7209 (any_token and self._advance_any()) 7210 or self._match(TokenType.VAR) 7211 or (self._match_set(tokens) if tokens else False) 7212 ): 7213 return self.expression( 7214 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7215 ) 7216 return self._parse_placeholder() 7217 7218 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7219 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7220 self._advance() 7221 return self._prev 7222 return None 7223 7224 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7225 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7226 7227 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7228 return self._parse_primary() or self._parse_var(any_token=True) 7229 7230 def _parse_null(self) -> t.Optional[exp.Expression]: 7231 if self._match_set(self.NULL_TOKENS): 7232 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7233 return self._parse_placeholder() 7234 7235 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7236 if self._match(TokenType.TRUE): 7237 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7238 if self._match(TokenType.FALSE): 7239 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7240 return self._parse_placeholder() 7241 7242 def _parse_star(self) -> t.Optional[exp.Expression]: 7243 if self._match(TokenType.STAR): 7244 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7245 return self._parse_placeholder() 7246 7247 def _parse_parameter(self) -> exp.Parameter: 7248 this = self._parse_identifier() or self._parse_primary_or_var() 7249 return self.expression(exp.Parameter, this=this) 7250 7251 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7252 if self._match_set(self.PLACEHOLDER_PARSERS): 7253 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7254 if placeholder: 7255 return placeholder 7256 self._advance(-1) 7257 return None 7258 7259 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7260 if not self._match_texts(keywords): 7261 return None 7262 if self._match(TokenType.L_PAREN, advance=False): 7263 return self._parse_wrapped_csv(self._parse_expression) 7264 7265 expression = self._parse_expression() 7266 return [expression] if expression else None 7267 7268 def _parse_csv( 7269 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7270 ) -> t.List[exp.Expression]: 7271 parse_result = parse_method() 7272 items = [parse_result] if parse_result is not None else [] 7273 7274 while self._match(sep): 7275 self._add_comments(parse_result) 7276 parse_result = parse_method() 7277 if parse_result is not None: 7278 items.append(parse_result) 7279 7280 return items 7281 7282 def _parse_tokens( 7283 self, parse_method: t.Callable, expressions: t.Dict 7284 ) -> t.Optional[exp.Expression]: 7285 this = parse_method() 7286 7287 while self._match_set(expressions): 7288 this = self.expression( 7289 expressions[self._prev.token_type], 7290 this=this, 7291 comments=self._prev_comments, 7292 expression=parse_method(), 7293 ) 7294 7295 return this 7296 7297 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7298 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7299 7300 def _parse_wrapped_csv( 7301 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7302 ) -> t.List[exp.Expression]: 7303 return self._parse_wrapped( 7304 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7305 ) 7306 7307 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7308 wrapped = self._match(TokenType.L_PAREN) 7309 if not wrapped and not optional: 7310 self.raise_error("Expecting (") 7311 parse_result = parse_method() 7312 if wrapped: 7313 self._match_r_paren() 7314 return parse_result 7315 7316 def _parse_expressions(self) -> t.List[exp.Expression]: 7317 return self._parse_csv(self._parse_expression) 7318 7319 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7320 return self._parse_select() or self._parse_set_operations( 7321 self._parse_alias(self._parse_assignment(), explicit=True) 7322 if alias 7323 else self._parse_assignment() 7324 ) 7325 7326 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7327 return self._parse_query_modifiers( 7328 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7329 ) 7330 7331 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7332 this = None 7333 if self._match_texts(self.TRANSACTION_KIND): 7334 this = self._prev.text 7335 7336 self._match_texts(("TRANSACTION", "WORK")) 7337 7338 modes = [] 7339 while True: 7340 mode = [] 7341 while self._match(TokenType.VAR): 7342 mode.append(self._prev.text) 7343 7344 if mode: 7345 modes.append(" ".join(mode)) 7346 if not self._match(TokenType.COMMA): 7347 break 7348 7349 return self.expression(exp.Transaction, this=this, modes=modes) 7350 7351 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7352 chain = None 7353 savepoint = None 7354 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7355 7356 self._match_texts(("TRANSACTION", "WORK")) 7357 7358 if self._match_text_seq("TO"): 7359 self._match_text_seq("SAVEPOINT") 7360 savepoint = self._parse_id_var() 7361 7362 if self._match(TokenType.AND): 7363 chain = not self._match_text_seq("NO") 7364 self._match_text_seq("CHAIN") 7365 7366 if is_rollback: 7367 return self.expression(exp.Rollback, savepoint=savepoint) 7368 7369 return self.expression(exp.Commit, chain=chain) 7370 7371 def _parse_refresh(self) -> exp.Refresh: 7372 self._match(TokenType.TABLE) 7373 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7374 7375 def _parse_column_def_with_exists(self): 7376 start = self._index 7377 self._match(TokenType.COLUMN) 7378 7379 exists_column = self._parse_exists(not_=True) 7380 expression = self._parse_field_def() 7381 7382 if not isinstance(expression, exp.ColumnDef): 7383 self._retreat(start) 7384 return None 7385 7386 expression.set("exists", exists_column) 7387 7388 return expression 7389 7390 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7391 if not self._prev.text.upper() == "ADD": 7392 return None 7393 7394 expression = self._parse_column_def_with_exists() 7395 if not expression: 7396 return None 7397 7398 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7399 if self._match_texts(("FIRST", "AFTER")): 7400 position = self._prev.text 7401 column_position = self.expression( 7402 exp.ColumnPosition, this=self._parse_column(), position=position 7403 ) 7404 expression.set("position", column_position) 7405 7406 return expression 7407 7408 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7409 drop = self._match(TokenType.DROP) and self._parse_drop() 7410 if drop and not isinstance(drop, exp.Command): 7411 drop.set("kind", drop.args.get("kind", "COLUMN")) 7412 return drop 7413 7414 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7415 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7416 return self.expression( 7417 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7418 ) 7419 7420 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7421 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7422 self._match_text_seq("ADD") 7423 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7424 return self.expression( 7425 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7426 ) 7427 7428 column_def = self._parse_add_column() 7429 if isinstance(column_def, exp.ColumnDef): 7430 return column_def 7431 7432 exists = self._parse_exists(not_=True) 7433 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7434 return self.expression( 7435 exp.AddPartition, 7436 exists=exists, 7437 this=self._parse_field(any_token=True), 7438 location=self._match_text_seq("LOCATION", advance=False) 7439 and self._parse_property(), 7440 ) 7441 7442 return None 7443 7444 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7445 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7446 or self._match_text_seq("COLUMNS") 7447 ): 7448 schema = self._parse_schema() 7449 7450 return ( 7451 ensure_list(schema) 7452 if schema 7453 else self._parse_csv(self._parse_column_def_with_exists) 7454 ) 7455 7456 return self._parse_csv(_parse_add_alteration) 7457 7458 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7459 if self._match_texts(self.ALTER_ALTER_PARSERS): 7460 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7461 7462 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7463 # keyword after ALTER we default to parsing this statement 7464 self._match(TokenType.COLUMN) 7465 column = self._parse_field(any_token=True) 7466 7467 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7468 return self.expression(exp.AlterColumn, this=column, drop=True) 7469 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7470 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7471 if self._match(TokenType.COMMENT): 7472 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7473 if self._match_text_seq("DROP", "NOT", "NULL"): 7474 return self.expression( 7475 exp.AlterColumn, 7476 this=column, 7477 drop=True, 7478 allow_null=True, 7479 ) 7480 if self._match_text_seq("SET", "NOT", "NULL"): 7481 return self.expression( 7482 exp.AlterColumn, 7483 this=column, 7484 allow_null=False, 7485 ) 7486 7487 if self._match_text_seq("SET", "VISIBLE"): 7488 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7489 if self._match_text_seq("SET", "INVISIBLE"): 7490 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7491 7492 self._match_text_seq("SET", "DATA") 7493 self._match_text_seq("TYPE") 7494 return self.expression( 7495 exp.AlterColumn, 7496 this=column, 7497 dtype=self._parse_types(), 7498 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7499 using=self._match(TokenType.USING) and self._parse_assignment(), 7500 ) 7501 7502 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7503 if self._match_texts(("ALL", "EVEN", "AUTO")): 7504 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7505 7506 self._match_text_seq("KEY", "DISTKEY") 7507 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7508 7509 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7510 if compound: 7511 self._match_text_seq("SORTKEY") 7512 7513 if self._match(TokenType.L_PAREN, advance=False): 7514 return self.expression( 7515 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7516 ) 7517 7518 self._match_texts(("AUTO", "NONE")) 7519 return self.expression( 7520 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7521 ) 7522 7523 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7524 index = self._index - 1 7525 7526 partition_exists = self._parse_exists() 7527 if self._match(TokenType.PARTITION, advance=False): 7528 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7529 7530 self._retreat(index) 7531 return self._parse_csv(self._parse_drop_column) 7532 7533 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7534 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7535 exists = self._parse_exists() 7536 old_column = self._parse_column() 7537 to = self._match_text_seq("TO") 7538 new_column = self._parse_column() 7539 7540 if old_column is None or to is None or new_column is None: 7541 return None 7542 7543 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7544 7545 self._match_text_seq("TO") 7546 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7547 7548 def _parse_alter_table_set(self) -> exp.AlterSet: 7549 alter_set = self.expression(exp.AlterSet) 7550 7551 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7552 "TABLE", "PROPERTIES" 7553 ): 7554 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7555 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7556 alter_set.set("expressions", [self._parse_assignment()]) 7557 elif self._match_texts(("LOGGED", "UNLOGGED")): 7558 alter_set.set("option", exp.var(self._prev.text.upper())) 7559 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7560 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7561 elif self._match_text_seq("LOCATION"): 7562 alter_set.set("location", self._parse_field()) 7563 elif self._match_text_seq("ACCESS", "METHOD"): 7564 alter_set.set("access_method", self._parse_field()) 7565 elif self._match_text_seq("TABLESPACE"): 7566 alter_set.set("tablespace", self._parse_field()) 7567 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7568 alter_set.set("file_format", [self._parse_field()]) 7569 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7570 alter_set.set("file_format", self._parse_wrapped_options()) 7571 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7572 alter_set.set("copy_options", self._parse_wrapped_options()) 7573 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7574 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7575 else: 7576 if self._match_text_seq("SERDE"): 7577 alter_set.set("serde", self._parse_field()) 7578 7579 properties = self._parse_wrapped(self._parse_properties, optional=True) 7580 alter_set.set("expressions", [properties]) 7581 7582 return alter_set 7583 7584 def _parse_alter(self) -> exp.Alter | exp.Command: 7585 start = self._prev 7586 7587 alter_token = self._match_set(self.ALTERABLES) and self._prev 7588 if not alter_token: 7589 return self._parse_as_command(start) 7590 7591 exists = self._parse_exists() 7592 only = self._match_text_seq("ONLY") 7593 this = self._parse_table(schema=True) 7594 check = self._match_text_seq("WITH", "CHECK") 7595 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7596 7597 if self._next: 7598 self._advance() 7599 7600 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7601 if parser: 7602 actions = ensure_list(parser(self)) 7603 not_valid = self._match_text_seq("NOT", "VALID") 7604 options = self._parse_csv(self._parse_property) 7605 7606 if not self._curr and actions: 7607 return self.expression( 7608 exp.Alter, 7609 this=this, 7610 kind=alter_token.text.upper(), 7611 exists=exists, 7612 actions=actions, 7613 only=only, 7614 options=options, 7615 cluster=cluster, 7616 not_valid=not_valid, 7617 check=check, 7618 ) 7619 7620 return self._parse_as_command(start) 7621 7622 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7623 start = self._prev 7624 # https://duckdb.org/docs/sql/statements/analyze 7625 if not self._curr: 7626 return self.expression(exp.Analyze) 7627 7628 options = [] 7629 while self._match_texts(self.ANALYZE_STYLES): 7630 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7631 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7632 else: 7633 options.append(self._prev.text.upper()) 7634 7635 this: t.Optional[exp.Expression] = None 7636 inner_expression: t.Optional[exp.Expression] = None 7637 7638 kind = self._curr and self._curr.text.upper() 7639 7640 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7641 this = self._parse_table_parts() 7642 elif self._match_text_seq("TABLES"): 7643 if self._match_set((TokenType.FROM, TokenType.IN)): 7644 kind = f"{kind} {self._prev.text.upper()}" 7645 this = self._parse_table(schema=True, is_db_reference=True) 7646 elif self._match_text_seq("DATABASE"): 7647 this = self._parse_table(schema=True, is_db_reference=True) 7648 elif self._match_text_seq("CLUSTER"): 7649 this = self._parse_table() 7650 # Try matching inner expr keywords before fallback to parse table. 7651 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7652 kind = None 7653 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7654 else: 7655 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7656 kind = None 7657 this = self._parse_table_parts() 7658 7659 partition = self._try_parse(self._parse_partition) 7660 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7661 return self._parse_as_command(start) 7662 7663 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7664 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7665 "WITH", "ASYNC", "MODE" 7666 ): 7667 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7668 else: 7669 mode = None 7670 7671 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7672 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7673 7674 properties = self._parse_properties() 7675 return self.expression( 7676 exp.Analyze, 7677 kind=kind, 7678 this=this, 7679 mode=mode, 7680 partition=partition, 7681 properties=properties, 7682 expression=inner_expression, 7683 options=options, 7684 ) 7685 7686 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7687 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7688 this = None 7689 kind = self._prev.text.upper() 7690 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7691 expressions = [] 7692 7693 if not self._match_text_seq("STATISTICS"): 7694 self.raise_error("Expecting token STATISTICS") 7695 7696 if self._match_text_seq("NOSCAN"): 7697 this = "NOSCAN" 7698 elif self._match(TokenType.FOR): 7699 if self._match_text_seq("ALL", "COLUMNS"): 7700 this = "FOR ALL COLUMNS" 7701 if self._match_texts("COLUMNS"): 7702 this = "FOR COLUMNS" 7703 expressions = self._parse_csv(self._parse_column_reference) 7704 elif self._match_text_seq("SAMPLE"): 7705 sample = self._parse_number() 7706 expressions = [ 7707 self.expression( 7708 exp.AnalyzeSample, 7709 sample=sample, 7710 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7711 ) 7712 ] 7713 7714 return self.expression( 7715 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7716 ) 7717 7718 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7719 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7720 kind = None 7721 this = None 7722 expression: t.Optional[exp.Expression] = None 7723 if self._match_text_seq("REF", "UPDATE"): 7724 kind = "REF" 7725 this = "UPDATE" 7726 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7727 this = "UPDATE SET DANGLING TO NULL" 7728 elif self._match_text_seq("STRUCTURE"): 7729 kind = "STRUCTURE" 7730 if self._match_text_seq("CASCADE", "FAST"): 7731 this = "CASCADE FAST" 7732 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7733 ("ONLINE", "OFFLINE") 7734 ): 7735 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7736 expression = self._parse_into() 7737 7738 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7739 7740 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7741 this = self._prev.text.upper() 7742 if self._match_text_seq("COLUMNS"): 7743 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7744 return None 7745 7746 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7747 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7748 if self._match_text_seq("STATISTICS"): 7749 return self.expression(exp.AnalyzeDelete, kind=kind) 7750 return None 7751 7752 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7753 if self._match_text_seq("CHAINED", "ROWS"): 7754 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7755 return None 7756 7757 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7758 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7759 this = self._prev.text.upper() 7760 expression: t.Optional[exp.Expression] = None 7761 expressions = [] 7762 update_options = None 7763 7764 if self._match_text_seq("HISTOGRAM", "ON"): 7765 expressions = self._parse_csv(self._parse_column_reference) 7766 with_expressions = [] 7767 while self._match(TokenType.WITH): 7768 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7769 if self._match_texts(("SYNC", "ASYNC")): 7770 if self._match_text_seq("MODE", advance=False): 7771 with_expressions.append(f"{self._prev.text.upper()} MODE") 7772 self._advance() 7773 else: 7774 buckets = self._parse_number() 7775 if self._match_text_seq("BUCKETS"): 7776 with_expressions.append(f"{buckets} BUCKETS") 7777 if with_expressions: 7778 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7779 7780 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7781 TokenType.UPDATE, advance=False 7782 ): 7783 update_options = self._prev.text.upper() 7784 self._advance() 7785 elif self._match_text_seq("USING", "DATA"): 7786 expression = self.expression(exp.UsingData, this=self._parse_string()) 7787 7788 return self.expression( 7789 exp.AnalyzeHistogram, 7790 this=this, 7791 expressions=expressions, 7792 expression=expression, 7793 update_options=update_options, 7794 ) 7795 7796 def _parse_merge(self) -> exp.Merge: 7797 self._match(TokenType.INTO) 7798 target = self._parse_table() 7799 7800 if target and self._match(TokenType.ALIAS, advance=False): 7801 target.set("alias", self._parse_table_alias()) 7802 7803 self._match(TokenType.USING) 7804 using = self._parse_table() 7805 7806 self._match(TokenType.ON) 7807 on = self._parse_assignment() 7808 7809 return self.expression( 7810 exp.Merge, 7811 this=target, 7812 using=using, 7813 on=on, 7814 whens=self._parse_when_matched(), 7815 returning=self._parse_returning(), 7816 ) 7817 7818 def _parse_when_matched(self) -> exp.Whens: 7819 whens = [] 7820 7821 while self._match(TokenType.WHEN): 7822 matched = not self._match(TokenType.NOT) 7823 self._match_text_seq("MATCHED") 7824 source = ( 7825 False 7826 if self._match_text_seq("BY", "TARGET") 7827 else self._match_text_seq("BY", "SOURCE") 7828 ) 7829 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7830 7831 self._match(TokenType.THEN) 7832 7833 if self._match(TokenType.INSERT): 7834 this = self._parse_star() 7835 if this: 7836 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7837 else: 7838 then = self.expression( 7839 exp.Insert, 7840 this=exp.var("ROW") 7841 if self._match_text_seq("ROW") 7842 else self._parse_value(values=False), 7843 expression=self._match_text_seq("VALUES") and self._parse_value(), 7844 ) 7845 elif self._match(TokenType.UPDATE): 7846 expressions = self._parse_star() 7847 if expressions: 7848 then = self.expression(exp.Update, expressions=expressions) 7849 else: 7850 then = self.expression( 7851 exp.Update, 7852 expressions=self._match(TokenType.SET) 7853 and self._parse_csv(self._parse_equality), 7854 ) 7855 elif self._match(TokenType.DELETE): 7856 then = self.expression(exp.Var, this=self._prev.text) 7857 else: 7858 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7859 7860 whens.append( 7861 self.expression( 7862 exp.When, 7863 matched=matched, 7864 source=source, 7865 condition=condition, 7866 then=then, 7867 ) 7868 ) 7869 return self.expression(exp.Whens, expressions=whens) 7870 7871 def _parse_show(self) -> t.Optional[exp.Expression]: 7872 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7873 if parser: 7874 return parser(self) 7875 return self._parse_as_command(self._prev) 7876 7877 def _parse_set_item_assignment( 7878 self, kind: t.Optional[str] = None 7879 ) -> t.Optional[exp.Expression]: 7880 index = self._index 7881 7882 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7883 return self._parse_set_transaction(global_=kind == "GLOBAL") 7884 7885 left = self._parse_primary() or self._parse_column() 7886 assignment_delimiter = self._match_texts(("=", "TO")) 7887 7888 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7889 self._retreat(index) 7890 return None 7891 7892 right = self._parse_statement() or self._parse_id_var() 7893 if isinstance(right, (exp.Column, exp.Identifier)): 7894 right = exp.var(right.name) 7895 7896 this = self.expression(exp.EQ, this=left, expression=right) 7897 return self.expression(exp.SetItem, this=this, kind=kind) 7898 7899 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7900 self._match_text_seq("TRANSACTION") 7901 characteristics = self._parse_csv( 7902 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7903 ) 7904 return self.expression( 7905 exp.SetItem, 7906 expressions=characteristics, 7907 kind="TRANSACTION", 7908 **{"global": global_}, # type: ignore 7909 ) 7910 7911 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7912 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7913 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7914 7915 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7916 index = self._index 7917 set_ = self.expression( 7918 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7919 ) 7920 7921 if self._curr: 7922 self._retreat(index) 7923 return self._parse_as_command(self._prev) 7924 7925 return set_ 7926 7927 def _parse_var_from_options( 7928 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7929 ) -> t.Optional[exp.Var]: 7930 start = self._curr 7931 if not start: 7932 return None 7933 7934 option = start.text.upper() 7935 continuations = options.get(option) 7936 7937 index = self._index 7938 self._advance() 7939 for keywords in continuations or []: 7940 if isinstance(keywords, str): 7941 keywords = (keywords,) 7942 7943 if self._match_text_seq(*keywords): 7944 option = f"{option} {' '.join(keywords)}" 7945 break 7946 else: 7947 if continuations or continuations is None: 7948 if raise_unmatched: 7949 self.raise_error(f"Unknown option {option}") 7950 7951 self._retreat(index) 7952 return None 7953 7954 return exp.var(option) 7955 7956 def _parse_as_command(self, start: Token) -> exp.Command: 7957 while self._curr: 7958 self._advance() 7959 text = self._find_sql(start, self._prev) 7960 size = len(start.text) 7961 self._warn_unsupported() 7962 return exp.Command(this=text[:size], expression=text[size:]) 7963 7964 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7965 settings = [] 7966 7967 self._match_l_paren() 7968 kind = self._parse_id_var() 7969 7970 if self._match(TokenType.L_PAREN): 7971 while True: 7972 key = self._parse_id_var() 7973 value = self._parse_primary() 7974 if not key and value is None: 7975 break 7976 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7977 self._match(TokenType.R_PAREN) 7978 7979 self._match_r_paren() 7980 7981 return self.expression( 7982 exp.DictProperty, 7983 this=this, 7984 kind=kind.this if kind else None, 7985 settings=settings, 7986 ) 7987 7988 def _parse_dict_range(self, this: str) -> exp.DictRange: 7989 self._match_l_paren() 7990 has_min = self._match_text_seq("MIN") 7991 if has_min: 7992 min = self._parse_var() or self._parse_primary() 7993 self._match_text_seq("MAX") 7994 max = self._parse_var() or self._parse_primary() 7995 else: 7996 max = self._parse_var() or self._parse_primary() 7997 min = exp.Literal.number(0) 7998 self._match_r_paren() 7999 return self.expression(exp.DictRange, this=this, min=min, max=max) 8000 8001 def _parse_comprehension( 8002 self, this: t.Optional[exp.Expression] 8003 ) -> t.Optional[exp.Comprehension]: 8004 index = self._index 8005 expression = self._parse_column() 8006 if not self._match(TokenType.IN): 8007 self._retreat(index - 1) 8008 return None 8009 iterator = self._parse_column() 8010 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8011 return self.expression( 8012 exp.Comprehension, 8013 this=this, 8014 expression=expression, 8015 iterator=iterator, 8016 condition=condition, 8017 ) 8018 8019 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8020 if self._match(TokenType.HEREDOC_STRING): 8021 return self.expression(exp.Heredoc, this=self._prev.text) 8022 8023 if not self._match_text_seq("$"): 8024 return None 8025 8026 tags = ["$"] 8027 tag_text = None 8028 8029 if self._is_connected(): 8030 self._advance() 8031 tags.append(self._prev.text.upper()) 8032 else: 8033 self.raise_error("No closing $ found") 8034 8035 if tags[-1] != "$": 8036 if self._is_connected() and self._match_text_seq("$"): 8037 tag_text = tags[-1] 8038 tags.append("$") 8039 else: 8040 self.raise_error("No closing $ found") 8041 8042 heredoc_start = self._curr 8043 8044 while self._curr: 8045 if self._match_text_seq(*tags, advance=False): 8046 this = self._find_sql(heredoc_start, self._prev) 8047 self._advance(len(tags)) 8048 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8049 8050 self._advance() 8051 8052 self.raise_error(f"No closing {''.join(tags)} found") 8053 return None 8054 8055 def _find_parser( 8056 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8057 ) -> t.Optional[t.Callable]: 8058 if not self._curr: 8059 return None 8060 8061 index = self._index 8062 this = [] 8063 while True: 8064 # The current token might be multiple words 8065 curr = self._curr.text.upper() 8066 key = curr.split(" ") 8067 this.append(curr) 8068 8069 self._advance() 8070 result, trie = in_trie(trie, key) 8071 if result == TrieResult.FAILED: 8072 break 8073 8074 if result == TrieResult.EXISTS: 8075 subparser = parsers[" ".join(this)] 8076 return subparser 8077 8078 self._retreat(index) 8079 return None 8080 8081 def _match(self, token_type, advance=True, expression=None): 8082 if not self._curr: 8083 return None 8084 8085 if self._curr.token_type == token_type: 8086 if advance: 8087 self._advance() 8088 self._add_comments(expression) 8089 return True 8090 8091 return None 8092 8093 def _match_set(self, types, advance=True): 8094 if not self._curr: 8095 return None 8096 8097 if self._curr.token_type in types: 8098 if advance: 8099 self._advance() 8100 return True 8101 8102 return None 8103 8104 def _match_pair(self, token_type_a, token_type_b, advance=True): 8105 if not self._curr or not self._next: 8106 return None 8107 8108 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8109 if advance: 8110 self._advance(2) 8111 return True 8112 8113 return None 8114 8115 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8116 if not self._match(TokenType.L_PAREN, expression=expression): 8117 self.raise_error("Expecting (") 8118 8119 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8120 if not self._match(TokenType.R_PAREN, expression=expression): 8121 self.raise_error("Expecting )") 8122 8123 def _match_texts(self, texts, advance=True): 8124 if ( 8125 self._curr 8126 and self._curr.token_type != TokenType.STRING 8127 and self._curr.text.upper() in texts 8128 ): 8129 if advance: 8130 self._advance() 8131 return True 8132 return None 8133 8134 def _match_text_seq(self, *texts, advance=True): 8135 index = self._index 8136 for text in texts: 8137 if ( 8138 self._curr 8139 and self._curr.token_type != TokenType.STRING 8140 and self._curr.text.upper() == text 8141 ): 8142 self._advance() 8143 else: 8144 self._retreat(index) 8145 return None 8146 8147 if not advance: 8148 self._retreat(index) 8149 8150 return True 8151 8152 def _replace_lambda( 8153 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8154 ) -> t.Optional[exp.Expression]: 8155 if not node: 8156 return node 8157 8158 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8159 8160 for column in node.find_all(exp.Column): 8161 typ = lambda_types.get(column.parts[0].name) 8162 if typ is not None: 8163 dot_or_id = column.to_dot() if column.table else column.this 8164 8165 if typ: 8166 dot_or_id = self.expression( 8167 exp.Cast, 8168 this=dot_or_id, 8169 to=typ, 8170 ) 8171 8172 parent = column.parent 8173 8174 while isinstance(parent, exp.Dot): 8175 if not isinstance(parent.parent, exp.Dot): 8176 parent.replace(dot_or_id) 8177 break 8178 parent = parent.parent 8179 else: 8180 if column is node: 8181 node = dot_or_id 8182 else: 8183 column.replace(dot_or_id) 8184 return node 8185 8186 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8187 start = self._prev 8188 8189 # Not to be confused with TRUNCATE(number, decimals) function call 8190 if self._match(TokenType.L_PAREN): 8191 self._retreat(self._index - 2) 8192 return self._parse_function() 8193 8194 # Clickhouse supports TRUNCATE DATABASE as well 8195 is_database = self._match(TokenType.DATABASE) 8196 8197 self._match(TokenType.TABLE) 8198 8199 exists = self._parse_exists(not_=False) 8200 8201 expressions = self._parse_csv( 8202 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8203 ) 8204 8205 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8206 8207 if self._match_text_seq("RESTART", "IDENTITY"): 8208 identity = "RESTART" 8209 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8210 identity = "CONTINUE" 8211 else: 8212 identity = None 8213 8214 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8215 option = self._prev.text 8216 else: 8217 option = None 8218 8219 partition = self._parse_partition() 8220 8221 # Fallback case 8222 if self._curr: 8223 return self._parse_as_command(start) 8224 8225 return self.expression( 8226 exp.TruncateTable, 8227 expressions=expressions, 8228 is_database=is_database, 8229 exists=exists, 8230 cluster=cluster, 8231 identity=identity, 8232 option=option, 8233 partition=partition, 8234 ) 8235 8236 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8237 this = self._parse_ordered(self._parse_opclass) 8238 8239 if not self._match(TokenType.WITH): 8240 return this 8241 8242 op = self._parse_var(any_token=True) 8243 8244 return self.expression(exp.WithOperator, this=this, op=op) 8245 8246 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8247 self._match(TokenType.EQ) 8248 self._match(TokenType.L_PAREN) 8249 8250 opts: t.List[t.Optional[exp.Expression]] = [] 8251 option: exp.Expression | None 8252 while self._curr and not self._match(TokenType.R_PAREN): 8253 if self._match_text_seq("FORMAT_NAME", "="): 8254 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8255 option = self._parse_format_name() 8256 else: 8257 option = self._parse_property() 8258 8259 if option is None: 8260 self.raise_error("Unable to parse option") 8261 break 8262 8263 opts.append(option) 8264 8265 return opts 8266 8267 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8268 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8269 8270 options = [] 8271 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8272 option = self._parse_var(any_token=True) 8273 prev = self._prev.text.upper() 8274 8275 # Different dialects might separate options and values by white space, "=" and "AS" 8276 self._match(TokenType.EQ) 8277 self._match(TokenType.ALIAS) 8278 8279 param = self.expression(exp.CopyParameter, this=option) 8280 8281 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8282 TokenType.L_PAREN, advance=False 8283 ): 8284 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8285 param.set("expressions", self._parse_wrapped_options()) 8286 elif prev == "FILE_FORMAT": 8287 # T-SQL's external file format case 8288 param.set("expression", self._parse_field()) 8289 else: 8290 param.set("expression", self._parse_unquoted_field()) 8291 8292 options.append(param) 8293 self._match(sep) 8294 8295 return options 8296 8297 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8298 expr = self.expression(exp.Credentials) 8299 8300 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8301 expr.set("storage", self._parse_field()) 8302 if self._match_text_seq("CREDENTIALS"): 8303 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8304 creds = ( 8305 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8306 ) 8307 expr.set("credentials", creds) 8308 if self._match_text_seq("ENCRYPTION"): 8309 expr.set("encryption", self._parse_wrapped_options()) 8310 if self._match_text_seq("IAM_ROLE"): 8311 expr.set("iam_role", self._parse_field()) 8312 if self._match_text_seq("REGION"): 8313 expr.set("region", self._parse_field()) 8314 8315 return expr 8316 8317 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8318 return self._parse_field() 8319 8320 def _parse_copy(self) -> exp.Copy | exp.Command: 8321 start = self._prev 8322 8323 self._match(TokenType.INTO) 8324 8325 this = ( 8326 self._parse_select(nested=True, parse_subquery_alias=False) 8327 if self._match(TokenType.L_PAREN, advance=False) 8328 else self._parse_table(schema=True) 8329 ) 8330 8331 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8332 8333 files = self._parse_csv(self._parse_file_location) 8334 credentials = self._parse_credentials() 8335 8336 self._match_text_seq("WITH") 8337 8338 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8339 8340 # Fallback case 8341 if self._curr: 8342 return self._parse_as_command(start) 8343 8344 return self.expression( 8345 exp.Copy, 8346 this=this, 8347 kind=kind, 8348 credentials=credentials, 8349 files=files, 8350 params=params, 8351 ) 8352 8353 def _parse_normalize(self) -> exp.Normalize: 8354 return self.expression( 8355 exp.Normalize, 8356 this=self._parse_bitwise(), 8357 form=self._match(TokenType.COMMA) and self._parse_var(), 8358 ) 8359 8360 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8361 args = self._parse_csv(lambda: self._parse_lambda()) 8362 8363 this = seq_get(args, 0) 8364 decimals = seq_get(args, 1) 8365 8366 return expr_type( 8367 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8368 ) 8369 8370 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8371 star_token = self._prev 8372 8373 if self._match_text_seq("COLUMNS", "(", advance=False): 8374 this = self._parse_function() 8375 if isinstance(this, exp.Columns): 8376 this.set("unpack", True) 8377 return this 8378 8379 return self.expression( 8380 exp.Star, 8381 **{ # type: ignore 8382 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8383 "replace": self._parse_star_op("REPLACE"), 8384 "rename": self._parse_star_op("RENAME"), 8385 }, 8386 ).update_positions(star_token) 8387 8388 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8389 privilege_parts = [] 8390 8391 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8392 # (end of privilege list) or L_PAREN (start of column list) are met 8393 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8394 privilege_parts.append(self._curr.text.upper()) 8395 self._advance() 8396 8397 this = exp.var(" ".join(privilege_parts)) 8398 expressions = ( 8399 self._parse_wrapped_csv(self._parse_column) 8400 if self._match(TokenType.L_PAREN, advance=False) 8401 else None 8402 ) 8403 8404 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8405 8406 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8407 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8408 principal = self._parse_id_var() 8409 8410 if not principal: 8411 return None 8412 8413 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8414 8415 def _parse_grant_revoke_common( 8416 self, 8417 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8418 privileges = self._parse_csv(self._parse_grant_privilege) 8419 8420 self._match(TokenType.ON) 8421 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8422 8423 # Attempt to parse the securable e.g. MySQL allows names 8424 # such as "foo.*", "*.*" which are not easily parseable yet 8425 securable = self._try_parse(self._parse_table_parts) 8426 8427 return privileges, kind, securable 8428 8429 def _parse_grant(self) -> exp.Grant | exp.Command: 8430 start = self._prev 8431 8432 privileges, kind, securable = self._parse_grant_revoke_common() 8433 8434 if not securable or not self._match_text_seq("TO"): 8435 return self._parse_as_command(start) 8436 8437 principals = self._parse_csv(self._parse_grant_principal) 8438 8439 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8440 8441 if self._curr: 8442 return self._parse_as_command(start) 8443 8444 return self.expression( 8445 exp.Grant, 8446 privileges=privileges, 8447 kind=kind, 8448 securable=securable, 8449 principals=principals, 8450 grant_option=grant_option, 8451 ) 8452 8453 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8454 start = self._prev 8455 8456 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8457 8458 privileges, kind, securable = self._parse_grant_revoke_common() 8459 8460 if not securable or not self._match_text_seq("FROM"): 8461 return self._parse_as_command(start) 8462 8463 principals = self._parse_csv(self._parse_grant_principal) 8464 8465 cascade = None 8466 if self._match_texts(("CASCADE", "RESTRICT")): 8467 cascade = self._prev.text.upper() 8468 8469 if self._curr: 8470 return self._parse_as_command(start) 8471 8472 return self.expression( 8473 exp.Revoke, 8474 privileges=privileges, 8475 kind=kind, 8476 securable=securable, 8477 principals=principals, 8478 grant_option=grant_option, 8479 cascade=cascade, 8480 ) 8481 8482 def _parse_overlay(self) -> exp.Overlay: 8483 return self.expression( 8484 exp.Overlay, 8485 **{ # type: ignore 8486 "this": self._parse_bitwise(), 8487 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8488 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8489 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8490 }, 8491 ) 8492 8493 def _parse_format_name(self) -> exp.Property: 8494 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8495 # for FILE_FORMAT = <format_name> 8496 return self.expression( 8497 exp.Property, 8498 this=exp.var("FORMAT_NAME"), 8499 value=self._parse_string() or self._parse_table_parts(), 8500 ) 8501 8502 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8503 args: t.List[exp.Expression] = [] 8504 8505 if self._match(TokenType.DISTINCT): 8506 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8507 self._match(TokenType.COMMA) 8508 8509 args.extend(self._parse_csv(self._parse_assignment)) 8510 8511 return self.expression( 8512 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8513 ) 8514 8515 def _identifier_expression( 8516 self, token: t.Optional[Token] = None, **kwargs: t.Any 8517 ) -> exp.Identifier: 8518 token = token or self._prev 8519 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8520 expression.update_positions(token) 8521 return expression 8522 8523 def _build_pipe_cte( 8524 self, 8525 query: exp.Query, 8526 expressions: t.List[exp.Expression], 8527 alias_cte: t.Optional[exp.TableAlias] = None, 8528 ) -> exp.Select: 8529 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8530 if alias_cte: 8531 new_cte = alias_cte 8532 else: 8533 self._pipe_cte_counter += 1 8534 new_cte = f"__tmp{self._pipe_cte_counter}" 8535 8536 with_ = query.args.get("with") 8537 ctes = with_.pop() if with_ else None 8538 8539 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8540 if ctes: 8541 new_select.set("with", ctes) 8542 8543 return new_select.with_(new_cte, as_=query, copy=False) 8544 8545 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8546 select = self._parse_select(consume_pipe=False) 8547 if not select: 8548 return query 8549 8550 return self._build_pipe_cte( 8551 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8552 ) 8553 8554 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8555 limit = self._parse_limit() 8556 offset = self._parse_offset() 8557 if limit: 8558 curr_limit = query.args.get("limit", limit) 8559 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8560 query.limit(limit, copy=False) 8561 if offset: 8562 curr_offset = query.args.get("offset") 8563 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8564 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8565 8566 return query 8567 8568 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8569 this = self._parse_assignment() 8570 if self._match_text_seq("GROUP", "AND", advance=False): 8571 return this 8572 8573 this = self._parse_alias(this) 8574 8575 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8576 return self._parse_ordered(lambda: this) 8577 8578 return this 8579 8580 def _parse_pipe_syntax_aggregate_group_order_by( 8581 self, query: exp.Select, group_by_exists: bool = True 8582 ) -> exp.Select: 8583 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8584 aggregates_or_groups, orders = [], [] 8585 for element in expr: 8586 if isinstance(element, exp.Ordered): 8587 this = element.this 8588 if isinstance(this, exp.Alias): 8589 element.set("this", this.args["alias"]) 8590 orders.append(element) 8591 else: 8592 this = element 8593 aggregates_or_groups.append(this) 8594 8595 if group_by_exists: 8596 query.select(*aggregates_or_groups, copy=False).group_by( 8597 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8598 copy=False, 8599 ) 8600 else: 8601 query.select(*aggregates_or_groups, append=False, copy=False) 8602 8603 if orders: 8604 return query.order_by(*orders, append=False, copy=False) 8605 8606 return query 8607 8608 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8609 self._match_text_seq("AGGREGATE") 8610 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8611 8612 if self._match(TokenType.GROUP_BY) or ( 8613 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8614 ): 8615 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8616 8617 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8618 8619 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8620 first_setop = self.parse_set_operation(this=query) 8621 if not first_setop: 8622 return None 8623 8624 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8625 expr = self._parse_paren() 8626 return expr.assert_is(exp.Subquery).unnest() if expr else None 8627 8628 first_setop.this.pop() 8629 8630 setops = [ 8631 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8632 *self._parse_csv(_parse_and_unwrap_query), 8633 ] 8634 8635 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8636 with_ = query.args.get("with") 8637 ctes = with_.pop() if with_ else None 8638 8639 if isinstance(first_setop, exp.Union): 8640 query = query.union(*setops, copy=False, **first_setop.args) 8641 elif isinstance(first_setop, exp.Except): 8642 query = query.except_(*setops, copy=False, **first_setop.args) 8643 else: 8644 query = query.intersect(*setops, copy=False, **first_setop.args) 8645 8646 query.set("with", ctes) 8647 8648 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8649 8650 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8651 join = self._parse_join() 8652 if not join: 8653 return None 8654 8655 if isinstance(query, exp.Select): 8656 return query.join(join, copy=False) 8657 8658 return query 8659 8660 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8661 pivots = self._parse_pivots() 8662 if not pivots: 8663 return query 8664 8665 from_ = query.args.get("from") 8666 if from_: 8667 from_.this.set("pivots", pivots) 8668 8669 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8670 8671 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8672 self._match_text_seq("EXTEND") 8673 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8674 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8675 8676 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8677 sample = self._parse_table_sample() 8678 8679 with_ = query.args.get("with") 8680 if with_: 8681 with_.expressions[-1].this.set("sample", sample) 8682 else: 8683 query.set("sample", sample) 8684 8685 return query 8686 8687 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8688 if isinstance(query, exp.Subquery): 8689 query = exp.select("*").from_(query, copy=False) 8690 8691 if not query.args.get("from"): 8692 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8693 8694 while self._match(TokenType.PIPE_GT): 8695 start = self._curr 8696 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8697 if not parser: 8698 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8699 # keywords, making it tricky to disambiguate them without lookahead. The approach 8700 # here is to try and parse a set operation and if that fails, then try to parse a 8701 # join operator. If that fails as well, then the operator is not supported. 8702 parsed_query = self._parse_pipe_syntax_set_operator(query) 8703 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8704 if not parsed_query: 8705 self._retreat(start) 8706 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8707 break 8708 query = parsed_query 8709 else: 8710 query = parser(self, query) 8711 8712 return query 8713 8714 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8715 vars = self._parse_csv(self._parse_id_var) 8716 if not vars: 8717 return None 8718 8719 return self.expression( 8720 exp.DeclareItem, 8721 this=vars, 8722 kind=self._parse_types(), 8723 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8724 ) 8725 8726 def _parse_declare(self) -> exp.Declare | exp.Command: 8727 start = self._prev 8728 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8729 8730 if not expressions or self._curr: 8731 return self._parse_as_command(start) 8732 8733 return self.expression(exp.Declare, expressions=expressions) 8734 8735 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8736 exp_class = exp.Cast if strict else exp.TryCast 8737 8738 if exp_class == exp.TryCast: 8739 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8740 8741 return self.expression(exp_class, **kwargs) 8742 8743 def _parse_json_value(self) -> exp.JSONValue: 8744 this = self._parse_bitwise() 8745 self._match(TokenType.COMMA) 8746 path = self._parse_bitwise() 8747 8748 returning = self._match(TokenType.RETURNING) and self._parse_type() 8749 8750 return self.expression( 8751 exp.JSONValue, 8752 this=this, 8753 path=self.dialect.to_json_path(path), 8754 returning=returning, 8755 on_condition=self._parse_on_condition(), 8756 ) 8757 8758 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8759 def concat_exprs( 8760 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8761 ) -> exp.Expression: 8762 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8763 concat_exprs = [ 8764 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8765 ] 8766 node.set("expressions", concat_exprs) 8767 return node 8768 if len(exprs) == 1: 8769 return exprs[0] 8770 return self.expression(exp.Concat, expressions=args, safe=True) 8771 8772 args = self._parse_csv(self._parse_lambda) 8773 8774 if args: 8775 order = args[-1] if isinstance(args[-1], exp.Order) else None 8776 8777 if order: 8778 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8779 # remove 'expr' from exp.Order and add it back to args 8780 args[-1] = order.this 8781 order.set("this", concat_exprs(order.this, args)) 8782 8783 this = order or concat_exprs(args[0], args) 8784 else: 8785 this = None 8786 8787 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8788 8789 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1574 def __init__( 1575 self, 1576 error_level: t.Optional[ErrorLevel] = None, 1577 error_message_context: int = 100, 1578 max_errors: int = 3, 1579 dialect: DialectType = None, 1580 ): 1581 from sqlglot.dialects import Dialect 1582 1583 self.error_level = error_level or ErrorLevel.IMMEDIATE 1584 self.error_message_context = error_message_context 1585 self.max_errors = max_errors 1586 self.dialect = Dialect.get_or_raise(dialect) 1587 self.reset()
1600 def parse( 1601 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens and returns a list of syntax trees, one tree 1605 per parsed SQL statement. 1606 1607 Args: 1608 raw_tokens: The list of tokens. 1609 sql: The original SQL string, used to produce helpful debug messages. 1610 1611 Returns: 1612 The list of the produced syntax trees. 1613 """ 1614 return self._parse( 1615 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1616 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1618 def parse_into( 1619 self, 1620 expression_types: exp.IntoType, 1621 raw_tokens: t.List[Token], 1622 sql: t.Optional[str] = None, 1623 ) -> t.List[t.Optional[exp.Expression]]: 1624 """ 1625 Parses a list of tokens into a given Expression type. If a collection of Expression 1626 types is given instead, this method will try to parse the token list into each one 1627 of them, stopping at the first for which the parsing succeeds. 1628 1629 Args: 1630 expression_types: The expression type(s) to try and parse the token list into. 1631 raw_tokens: The list of tokens. 1632 sql: The original SQL string, used to produce helpful debug messages. 1633 1634 Returns: 1635 The target Expression. 1636 """ 1637 errors = [] 1638 for expression_type in ensure_list(expression_types): 1639 parser = self.EXPRESSION_PARSERS.get(expression_type) 1640 if not parser: 1641 raise TypeError(f"No parser registered for {expression_type}") 1642 1643 try: 1644 return self._parse(parser, raw_tokens, sql) 1645 except ParseError as e: 1646 e.errors[0]["into_expression"] = expression_type 1647 errors.append(e) 1648 1649 raise ParseError( 1650 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1651 errors=merge_errors(errors), 1652 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1692 def check_errors(self) -> None: 1693 """Logs or raises any found errors, depending on the chosen error level setting.""" 1694 if self.error_level == ErrorLevel.WARN: 1695 for error in self.errors: 1696 logger.error(str(error)) 1697 elif self.error_level == ErrorLevel.RAISE and self.errors: 1698 raise ParseError( 1699 concat_messages(self.errors, self.max_errors), 1700 errors=merge_errors(self.errors), 1701 )
Logs or raises any found errors, depending on the chosen error level setting.
1703 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1704 """ 1705 Appends an error in the list of recorded errors or raises it, depending on the chosen 1706 error level setting. 1707 """ 1708 token = token or self._curr or self._prev or Token.string("") 1709 start = token.start 1710 end = token.end + 1 1711 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1712 highlight = self.sql[start:end] 1713 end_context = self.sql[end : end + self.error_message_context] 1714 1715 error = ParseError.new( 1716 f"{message}. Line {token.line}, Col: {token.col}.\n" 1717 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1718 description=message, 1719 line=token.line, 1720 col=token.col, 1721 start_context=start_context, 1722 highlight=highlight, 1723 end_context=end_context, 1724 ) 1725 1726 if self.error_level == ErrorLevel.IMMEDIATE: 1727 raise error 1728 1729 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1731 def expression( 1732 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1733 ) -> E: 1734 """ 1735 Creates a new, validated Expression. 1736 1737 Args: 1738 exp_class: The expression class to instantiate. 1739 comments: An optional list of comments to attach to the expression. 1740 kwargs: The arguments to set for the expression along with their respective values. 1741 1742 Returns: 1743 The target expression. 1744 """ 1745 instance = exp_class(**kwargs) 1746 instance.add_comments(comments) if comments else self._add_comments(instance) 1747 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1754 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1755 """ 1756 Validates an Expression, making sure that all its mandatory arguments are set. 1757 1758 Args: 1759 expression: The expression to validate. 1760 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1761 1762 Returns: 1763 The validated expression. 1764 """ 1765 if self.error_level != ErrorLevel.IGNORE: 1766 for error_message in expression.error_messages(args): 1767 self.raise_error(error_message) 1768 1769 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4839 def parse_set_operation( 4840 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4841 ) -> t.Optional[exp.Expression]: 4842 start = self._index 4843 _, side_token, kind_token = self._parse_join_parts() 4844 4845 side = side_token.text if side_token else None 4846 kind = kind_token.text if kind_token else None 4847 4848 if not self._match_set(self.SET_OPERATIONS): 4849 self._retreat(start) 4850 return None 4851 4852 token_type = self._prev.token_type 4853 4854 if token_type == TokenType.UNION: 4855 operation: t.Type[exp.SetOperation] = exp.Union 4856 elif token_type == TokenType.EXCEPT: 4857 operation = exp.Except 4858 else: 4859 operation = exp.Intersect 4860 4861 comments = self._prev.comments 4862 4863 if self._match(TokenType.DISTINCT): 4864 distinct: t.Optional[bool] = True 4865 elif self._match(TokenType.ALL): 4866 distinct = False 4867 else: 4868 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4869 if distinct is None: 4870 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4871 4872 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4873 "STRICT", "CORRESPONDING" 4874 ) 4875 if self._match_text_seq("CORRESPONDING"): 4876 by_name = True 4877 if not side and not kind: 4878 kind = "INNER" 4879 4880 on_column_list = None 4881 if by_name and self._match_texts(("ON", "BY")): 4882 on_column_list = self._parse_wrapped_csv(self._parse_column) 4883 4884 expression = self._parse_select( 4885 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4886 ) 4887 4888 return self.expression( 4889 operation, 4890 comments=comments, 4891 this=this, 4892 distinct=distinct, 4893 by_name=by_name, 4894 expression=expression, 4895 side=side, 4896 kind=kind, 4897 on=on_column_list, 4898 )