sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 format_time_lambda, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 parse_date_delta_with_interval, 23 path_to_jsonpath, 24 regexp_replace_sql, 25 rename_func, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from typing_extensions import Literal 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 alias = expression.args.get("alias") 43 44 structs = [ 45 exp.Struct( 46 expressions=[ 47 exp.alias_(value, column_name) 48 for value, column_name in zip( 49 t.expressions, 50 alias.columns 51 if alias and alias.columns 52 else (f"_c{i}" for i in range(len(t.expressions))), 53 ) 54 ] 55 ) 56 for t in expression.find_all(exp.Tuple) 57 ] 58 59 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{this.this} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 kind = expression.args["kind"] 73 returns = expression.find(exp.ReturnsProperty) 74 75 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 return self.create_sql(expression) 84 85 86def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 87 """Remove references to unnest table aliases since bigquery doesn't allow them. 88 89 These are added by the optimizer's qualify_column step. 90 """ 91 from sqlglot.optimizer.scope import find_all_in_scope 92 93 if isinstance(expression, exp.Select): 94 unnest_aliases = { 95 unnest.alias 96 for unnest in find_all_in_scope(expression, exp.Unnest) 97 if isinstance(unnest.parent, (exp.From, exp.Join)) 98 } 99 if unnest_aliases: 100 for column in expression.find_all(exp.Column): 101 if column.table in unnest_aliases: 102 column.set("table", None) 103 elif column.db in unnest_aliases: 104 column.set("db", None) 105 106 return expression 107 108 109# https://issuetracker.google.com/issues/162294746 110# workaround for bigquery bug when grouping by an expression and then ordering 111# WITH x AS (SELECT 1 y) 112# SELECT y + 1 z 113# FROM x 114# GROUP BY x + 1 115# ORDER by z 116def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 117 if isinstance(expression, exp.Select): 118 group = expression.args.get("group") 119 order = expression.args.get("order") 120 121 if group and order: 122 aliases = { 123 select.this: select.args["alias"] 124 for select in expression.selects 125 if isinstance(select, exp.Alias) 126 } 127 128 for e in group.expressions: 129 alias = aliases.get(e) 130 131 if alias: 132 e.replace(exp.column(alias)) 133 134 return expression 135 136 137def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 138 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 139 if isinstance(expression, exp.CTE) and expression.alias_column_names: 140 cte_query = expression.this 141 142 if cte_query.is_star: 143 logger.warning( 144 "Can't push down CTE column names for star queries. Run the query through" 145 " the optimizer or use 'qualify' to expand the star projections first." 146 ) 147 return expression 148 149 column_names = expression.alias_column_names 150 expression.args["alias"].set("columns", None) 151 152 for name, select in zip(column_names, cte_query.selects): 153 to_replace = select 154 155 if isinstance(select, exp.Alias): 156 select = select.this 157 158 # Inner aliases are shadowed by the CTE column names 159 to_replace.replace(exp.alias_(select, name)) 160 161 return expression 162 163 164def _parse_timestamp(args: t.List) -> exp.StrToTime: 165 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 166 this.set("zone", seq_get(args, 2)) 167 return this 168 169 170def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 171 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 172 return expr_type.from_arg_list(args) 173 174 175def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 176 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 177 arg = seq_get(args, 0) 178 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 179 180 181def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 182 return self.sql( 183 exp.Exists( 184 this=exp.select("1") 185 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 186 .where(exp.column("_col").eq(expression.right)) 187 ) 188 ) 189 190 191def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 192 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 193 194 195def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 196 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 197 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 198 unit = expression.args.get("unit") or "DAY" 199 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 200 201 202def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 203 scale = expression.args.get("scale") 204 timestamp = self.sql(expression, "this") 205 if scale in (None, exp.UnixToTime.SECONDS): 206 return f"TIMESTAMP_SECONDS({timestamp})" 207 if scale == exp.UnixToTime.MILLIS: 208 return f"TIMESTAMP_MILLIS({timestamp})" 209 if scale == exp.UnixToTime.MICROS: 210 return f"TIMESTAMP_MICROS({timestamp})" 211 212 return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" 213 214 215def _parse_time(args: t.List) -> exp.Func: 216 if len(args) == 1: 217 return exp.TsOrDsToTime(this=args[0]) 218 if len(args) == 3: 219 return exp.TimeFromParts.from_arg_list(args) 220 221 return exp.Anonymous(this="TIME", expressions=args) 222 223 224class BigQuery(Dialect): 225 WEEK_OFFSET = -1 226 UNNEST_COLUMN_ONLY = True 227 SUPPORTS_USER_DEFINED_TYPES = False 228 SUPPORTS_SEMI_ANTI_JOIN = False 229 LOG_BASE_FIRST = False 230 231 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 232 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 233 234 # bigquery udfs are case sensitive 235 NORMALIZE_FUNCTIONS = False 236 237 TIME_MAPPING = { 238 "%D": "%m/%d/%y", 239 } 240 241 ESCAPE_SEQUENCES = { 242 "\\a": "\a", 243 "\\b": "\b", 244 "\\f": "\f", 245 "\\n": "\n", 246 "\\r": "\r", 247 "\\t": "\t", 248 "\\v": "\v", 249 } 250 251 FORMAT_MAPPING = { 252 "DD": "%d", 253 "MM": "%m", 254 "MON": "%b", 255 "MONTH": "%B", 256 "YYYY": "%Y", 257 "YY": "%y", 258 "HH": "%I", 259 "HH12": "%I", 260 "HH24": "%H", 261 "MI": "%M", 262 "SS": "%S", 263 "SSSSS": "%f", 264 "TZH": "%z", 265 } 266 267 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 268 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 269 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 270 271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 278 # The following check is essentially a heuristic to detect tables based on whether or 279 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 280 if ( 281 not isinstance(parent, exp.UserDefinedFunction) 282 and not (isinstance(parent, exp.Table) and parent.db) 283 and not expression.meta.get("is_table") 284 ): 285 expression.set("this", expression.this.lower()) 286 287 return expression 288 289 class Tokenizer(tokens.Tokenizer): 290 QUOTES = ["'", '"', '"""', "'''"] 291 COMMENTS = ["--", "#", ("/*", "*/")] 292 IDENTIFIERS = ["`"] 293 STRING_ESCAPES = ["\\"] 294 295 HEX_STRINGS = [("0x", ""), ("0X", "")] 296 297 BYTE_STRINGS = [ 298 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 299 ] 300 301 RAW_STRINGS = [ 302 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 303 ] 304 305 KEYWORDS = { 306 **tokens.Tokenizer.KEYWORDS, 307 "ANY TYPE": TokenType.VARIANT, 308 "BEGIN": TokenType.COMMAND, 309 "BEGIN TRANSACTION": TokenType.BEGIN, 310 "BYTES": TokenType.BINARY, 311 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 312 "DECLARE": TokenType.COMMAND, 313 "FLOAT64": TokenType.DOUBLE, 314 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 315 "MODEL": TokenType.MODEL, 316 "NOT DETERMINISTIC": TokenType.VOLATILE, 317 "RECORD": TokenType.STRUCT, 318 "TIMESTAMP": TokenType.TIMESTAMPTZ, 319 } 320 KEYWORDS.pop("DIV") 321 322 class Parser(parser.Parser): 323 PREFIXED_PIVOT_COLUMNS = True 324 325 LOG_DEFAULTS_TO_LN = True 326 327 FUNCTIONS = { 328 **parser.Parser.FUNCTIONS, 329 "DATE": _parse_date, 330 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 331 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 332 "DATE_TRUNC": lambda args: exp.DateTrunc( 333 unit=exp.Literal.string(str(seq_get(args, 1))), 334 this=seq_get(args, 0), 335 ), 336 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 337 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 338 "DIV": binary_from_function(exp.IntDiv), 339 "FORMAT_DATE": lambda args: exp.TimeToStr( 340 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 341 ), 342 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 343 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 344 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 345 ), 346 "MD5": exp.MD5Digest.from_arg_list, 347 "TO_HEX": _parse_to_hex, 348 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 349 [seq_get(args, 1), seq_get(args, 0)] 350 ), 351 "PARSE_TIMESTAMP": _parse_timestamp, 352 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 353 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 354 this=seq_get(args, 0), 355 expression=seq_get(args, 1), 356 position=seq_get(args, 2), 357 occurrence=seq_get(args, 3), 358 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 359 ), 360 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 361 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 362 "SPLIT": lambda args: exp.Split( 363 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1) or exp.Literal.string(","), 366 ), 367 "TIME": _parse_time, 368 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 369 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 370 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 371 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 372 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 373 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 374 ), 375 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 377 ), 378 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 379 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 380 } 381 382 FUNCTION_PARSERS = { 383 **parser.Parser.FUNCTION_PARSERS, 384 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 385 } 386 FUNCTION_PARSERS.pop("TRIM") 387 388 NO_PAREN_FUNCTIONS = { 389 **parser.Parser.NO_PAREN_FUNCTIONS, 390 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 391 } 392 393 NESTED_TYPE_TOKENS = { 394 *parser.Parser.NESTED_TYPE_TOKENS, 395 TokenType.TABLE, 396 } 397 398 ID_VAR_TOKENS = { 399 *parser.Parser.ID_VAR_TOKENS, 400 TokenType.VALUES, 401 } 402 403 PROPERTY_PARSERS = { 404 **parser.Parser.PROPERTY_PARSERS, 405 "NOT DETERMINISTIC": lambda self: self.expression( 406 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 407 ), 408 "OPTIONS": lambda self: self._parse_with_property(), 409 } 410 411 CONSTRAINT_PARSERS = { 412 **parser.Parser.CONSTRAINT_PARSERS, 413 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 414 } 415 416 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 417 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 418 419 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 420 421 STATEMENT_PARSERS = { 422 **parser.Parser.STATEMENT_PARSERS, 423 TokenType.END: lambda self: self._parse_as_command(self._prev), 424 TokenType.FOR: lambda self: self._parse_for_in(), 425 } 426 427 BRACKET_OFFSETS = { 428 "OFFSET": (0, False), 429 "ORDINAL": (1, False), 430 "SAFE_OFFSET": (0, True), 431 "SAFE_ORDINAL": (1, True), 432 } 433 434 def _parse_for_in(self) -> exp.ForIn: 435 this = self._parse_range() 436 self._match_text_seq("DO") 437 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 438 439 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 440 this = super()._parse_table_part(schema=schema) or self._parse_number() 441 442 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 443 if isinstance(this, exp.Identifier): 444 table_name = this.name 445 while self._match(TokenType.DASH, advance=False) and self._next: 446 self._advance(2) 447 table_name += f"-{self._prev.text}" 448 449 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 450 elif isinstance(this, exp.Literal): 451 table_name = this.name 452 453 if self._is_connected() and self._parse_var(any_token=True): 454 table_name += self._prev.text 455 456 this = exp.Identifier(this=table_name, quoted=True) 457 458 return this 459 460 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 461 table = super()._parse_table_parts(schema=schema) 462 if isinstance(table.this, exp.Identifier) and "." in table.name: 463 catalog, db, this, *rest = ( 464 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 465 for x in split_num_words(table.name, ".", 3) 466 ) 467 468 if rest and this: 469 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 470 471 table = exp.Table(this=this, db=db, catalog=catalog) 472 473 return table 474 475 @t.overload 476 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 477 ... 478 479 @t.overload 480 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 481 ... 482 483 def _parse_json_object(self, agg=False): 484 json_object = super()._parse_json_object() 485 array_kv_pair = seq_get(json_object.expressions, 0) 486 487 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 488 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 489 if ( 490 array_kv_pair 491 and isinstance(array_kv_pair.this, exp.Array) 492 and isinstance(array_kv_pair.expression, exp.Array) 493 ): 494 keys = array_kv_pair.this.expressions 495 values = array_kv_pair.expression.expressions 496 497 json_object.set( 498 "expressions", 499 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 500 ) 501 502 return json_object 503 504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 505 bracket = super()._parse_bracket(this) 506 507 if this is bracket: 508 return bracket 509 510 if isinstance(bracket, exp.Bracket): 511 for expression in bracket.expressions: 512 name = expression.name.upper() 513 514 if name not in self.BRACKET_OFFSETS: 515 break 516 517 offset, safe = self.BRACKET_OFFSETS[name] 518 bracket.set("offset", offset) 519 bracket.set("safe", safe) 520 expression.replace(expression.expressions[0]) 521 522 return bracket 523 524 class Generator(generator.Generator): 525 EXPLICIT_UNION = True 526 INTERVAL_ALLOWS_PLURAL_FORM = False 527 JOIN_HINTS = False 528 QUERY_HINTS = False 529 TABLE_HINTS = False 530 LIMIT_FETCH = "LIMIT" 531 RENAME_TABLE_WITH_DB = False 532 NVL2_SUPPORTED = False 533 UNNEST_WITH_ORDINALITY = False 534 COLLATE_IS_FUNC = True 535 LIMIT_ONLY_LITERALS = True 536 SUPPORTS_TABLE_ALIAS_COLUMNS = False 537 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 538 JSON_KEY_VALUE_PAIR_SEP = "," 539 NULL_ORDERING_SUPPORTED = False 540 541 TRANSFORMS = { 542 **generator.Generator.TRANSFORMS, 543 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 544 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 545 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 546 exp.ArrayContains: _array_contains_sql, 547 exp.ArraySize: rename_func("ARRAY_LENGTH"), 548 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 549 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 550 if e.args.get("default") 551 else f"COLLATE {self.sql(e, 'this')}", 552 exp.CountIf: rename_func("COUNTIF"), 553 exp.Create: _create_sql, 554 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 555 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 556 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 557 exp.DateFromParts: rename_func("DATE"), 558 exp.DateStrToDate: datestrtodate_sql, 559 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 560 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 561 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 562 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 563 exp.FromTimeZone: lambda self, e: self.func( 564 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 565 ), 566 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 567 exp.GetPath: path_to_jsonpath(), 568 exp.GroupConcat: rename_func("STRING_AGG"), 569 exp.Hex: rename_func("TO_HEX"), 570 exp.If: if_sql(false_value="NULL"), 571 exp.ILike: no_ilike_sql, 572 exp.IntDiv: rename_func("DIV"), 573 exp.JSONFormat: rename_func("TO_JSON_STRING"), 574 exp.Max: max_or_greatest, 575 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 576 exp.MD5Digest: rename_func("MD5"), 577 exp.Min: min_or_least, 578 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 579 exp.RegexpExtract: lambda self, e: self.func( 580 "REGEXP_EXTRACT", 581 e.this, 582 e.expression, 583 e.args.get("position"), 584 e.args.get("occurrence"), 585 ), 586 exp.RegexpReplace: regexp_replace_sql, 587 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 588 exp.ReturnsProperty: _returnsproperty_sql, 589 exp.Select: transforms.preprocess( 590 [ 591 transforms.explode_to_unnest(), 592 _unqualify_unnest, 593 transforms.eliminate_distinct_on, 594 _alias_ordered_group, 595 transforms.eliminate_semi_and_anti_joins, 596 ] 597 ), 598 exp.SHA2: lambda self, e: self.func( 599 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 600 ), 601 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 602 if e.name == "IMMUTABLE" 603 else "NOT DETERMINISTIC", 604 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 605 exp.StrToTime: lambda self, e: self.func( 606 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 607 ), 608 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 609 exp.TimeFromParts: rename_func("TIME"), 610 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 611 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 612 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 613 exp.TimeStrToTime: timestrtotime_sql, 614 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 615 exp.TsOrDsAdd: _ts_or_ds_add_sql, 616 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 617 exp.TsOrDsToTime: rename_func("TIME"), 618 exp.Unhex: rename_func("FROM_HEX"), 619 exp.UnixDate: rename_func("UNIX_DATE"), 620 exp.UnixToTime: _unix_to_time_sql, 621 exp.Values: _derived_table_values_to_unnest, 622 exp.VariancePop: rename_func("VAR_POP"), 623 } 624 625 TYPE_MAPPING = { 626 **generator.Generator.TYPE_MAPPING, 627 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 628 exp.DataType.Type.BIGINT: "INT64", 629 exp.DataType.Type.BINARY: "BYTES", 630 exp.DataType.Type.BOOLEAN: "BOOL", 631 exp.DataType.Type.CHAR: "STRING", 632 exp.DataType.Type.DECIMAL: "NUMERIC", 633 exp.DataType.Type.DOUBLE: "FLOAT64", 634 exp.DataType.Type.FLOAT: "FLOAT64", 635 exp.DataType.Type.INT: "INT64", 636 exp.DataType.Type.NCHAR: "STRING", 637 exp.DataType.Type.NVARCHAR: "STRING", 638 exp.DataType.Type.SMALLINT: "INT64", 639 exp.DataType.Type.TEXT: "STRING", 640 exp.DataType.Type.TIMESTAMP: "DATETIME", 641 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 642 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 643 exp.DataType.Type.TINYINT: "INT64", 644 exp.DataType.Type.VARBINARY: "BYTES", 645 exp.DataType.Type.VARCHAR: "STRING", 646 exp.DataType.Type.VARIANT: "ANY TYPE", 647 } 648 649 PROPERTIES_LOCATION = { 650 **generator.Generator.PROPERTIES_LOCATION, 651 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 652 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 653 } 654 655 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 656 RESERVED_KEYWORDS = { 657 *generator.Generator.RESERVED_KEYWORDS, 658 "all", 659 "and", 660 "any", 661 "array", 662 "as", 663 "asc", 664 "assert_rows_modified", 665 "at", 666 "between", 667 "by", 668 "case", 669 "cast", 670 "collate", 671 "contains", 672 "create", 673 "cross", 674 "cube", 675 "current", 676 "default", 677 "define", 678 "desc", 679 "distinct", 680 "else", 681 "end", 682 "enum", 683 "escape", 684 "except", 685 "exclude", 686 "exists", 687 "extract", 688 "false", 689 "fetch", 690 "following", 691 "for", 692 "from", 693 "full", 694 "group", 695 "grouping", 696 "groups", 697 "hash", 698 "having", 699 "if", 700 "ignore", 701 "in", 702 "inner", 703 "intersect", 704 "interval", 705 "into", 706 "is", 707 "join", 708 "lateral", 709 "left", 710 "like", 711 "limit", 712 "lookup", 713 "merge", 714 "natural", 715 "new", 716 "no", 717 "not", 718 "null", 719 "nulls", 720 "of", 721 "on", 722 "or", 723 "order", 724 "outer", 725 "over", 726 "partition", 727 "preceding", 728 "proto", 729 "qualify", 730 "range", 731 "recursive", 732 "respect", 733 "right", 734 "rollup", 735 "rows", 736 "select", 737 "set", 738 "some", 739 "struct", 740 "tablesample", 741 "then", 742 "to", 743 "treat", 744 "true", 745 "unbounded", 746 "union", 747 "unnest", 748 "using", 749 "when", 750 "where", 751 "window", 752 "with", 753 "within", 754 } 755 756 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 757 if isinstance(expression.this, exp.TsOrDsToDate): 758 this: exp.Expression = expression.this 759 else: 760 this = expression 761 762 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 763 764 def struct_sql(self, expression: exp.Struct) -> str: 765 args = [] 766 for expr in expression.expressions: 767 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 768 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 769 else: 770 arg = self.sql(expr) 771 772 args.append(arg) 773 774 return self.func("STRUCT", *args) 775 776 def eq_sql(self, expression: exp.EQ) -> str: 777 # Operands of = cannot be NULL in BigQuery 778 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 779 if not isinstance(expression.parent, exp.Update): 780 return "NULL" 781 782 return self.binary(expression, "=") 783 784 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 785 parent = expression.parent 786 787 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 788 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 789 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 790 return self.func( 791 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 792 ) 793 794 return super().attimezone_sql(expression) 795 796 def trycast_sql(self, expression: exp.TryCast) -> str: 797 return self.cast_sql(expression, safe_prefix="SAFE_") 798 799 def cte_sql(self, expression: exp.CTE) -> str: 800 if expression.alias_column_names: 801 self.unsupported("Column names in CTE definition are not supported.") 802 return super().cte_sql(expression) 803 804 def array_sql(self, expression: exp.Array) -> str: 805 first_arg = seq_get(expression.expressions, 0) 806 if isinstance(first_arg, exp.Subqueryable): 807 return f"ARRAY{self.wrap(self.sql(first_arg))}" 808 809 return inline_array_sql(self, expression) 810 811 def bracket_sql(self, expression: exp.Bracket) -> str: 812 this = self.sql(expression, "this") 813 expressions = expression.expressions 814 815 if len(expressions) == 1: 816 arg = expressions[0] 817 if arg.type is None: 818 from sqlglot.optimizer.annotate_types import annotate_types 819 820 arg = annotate_types(arg) 821 822 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 823 # BQ doesn't support bracket syntax with string values 824 return f"{this}.{arg.name}" 825 826 expressions_sql = ", ".join(self.sql(e) for e in expressions) 827 offset = expression.args.get("offset") 828 829 if offset == 0: 830 expressions_sql = f"OFFSET({expressions_sql})" 831 elif offset == 1: 832 expressions_sql = f"ORDINAL({expressions_sql})" 833 elif offset is not None: 834 self.unsupported(f"Unsupported array offset: {offset}") 835 836 if expression.args.get("safe"): 837 expressions_sql = f"SAFE_{expressions_sql}" 838 839 return f"{this}[{expressions_sql}]" 840 841 def transaction_sql(self, *_) -> str: 842 return "BEGIN TRANSACTION" 843 844 def commit_sql(self, *_) -> str: 845 return "COMMIT TRANSACTION" 846 847 def rollback_sql(self, *_) -> str: 848 return "ROLLBACK TRANSACTION" 849 850 def in_unnest_op(self, expression: exp.Unnest) -> str: 851 return self.sql(expression) 852 853 def except_op(self, expression: exp.Except) -> str: 854 if not expression.args.get("distinct", False): 855 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 856 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 857 858 def intersect_op(self, expression: exp.Intersect) -> str: 859 if not expression.args.get("distinct", False): 860 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 861 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 862 863 def with_properties(self, properties: exp.Properties) -> str: 864 return self.properties(properties, prefix=self.seg("OPTIONS")) 865 866 def version_sql(self, expression: exp.Version) -> str: 867 if expression.name == "TIMESTAMP": 868 expression.set("this", "SYSTEM_TIME") 869 return super().version_sql(expression)
225class BigQuery(Dialect): 226 WEEK_OFFSET = -1 227 UNNEST_COLUMN_ONLY = True 228 SUPPORTS_USER_DEFINED_TYPES = False 229 SUPPORTS_SEMI_ANTI_JOIN = False 230 LOG_BASE_FIRST = False 231 232 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 233 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 234 235 # bigquery udfs are case sensitive 236 NORMALIZE_FUNCTIONS = False 237 238 TIME_MAPPING = { 239 "%D": "%m/%d/%y", 240 } 241 242 ESCAPE_SEQUENCES = { 243 "\\a": "\a", 244 "\\b": "\b", 245 "\\f": "\f", 246 "\\n": "\n", 247 "\\r": "\r", 248 "\\t": "\t", 249 "\\v": "\v", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression 289 290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV") 322 323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 462 table = super()._parse_table_parts(schema=schema) 463 if isinstance(table.this, exp.Identifier) and "." in table.name: 464 catalog, db, this, *rest = ( 465 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 466 for x in split_num_words(table.name, ".", 3) 467 ) 468 469 if rest and this: 470 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 471 472 table = exp.Table(this=this, db=db, catalog=catalog) 473 474 return table 475 476 @t.overload 477 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 478 ... 479 480 @t.overload 481 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 482 ... 483 484 def _parse_json_object(self, agg=False): 485 json_object = super()._parse_json_object() 486 array_kv_pair = seq_get(json_object.expressions, 0) 487 488 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 489 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 490 if ( 491 array_kv_pair 492 and isinstance(array_kv_pair.this, exp.Array) 493 and isinstance(array_kv_pair.expression, exp.Array) 494 ): 495 keys = array_kv_pair.this.expressions 496 values = array_kv_pair.expression.expressions 497 498 json_object.set( 499 "expressions", 500 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 501 ) 502 503 return json_object 504 505 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 506 bracket = super()._parse_bracket(this) 507 508 if this is bracket: 509 return bracket 510 511 if isinstance(bracket, exp.Bracket): 512 for expression in bracket.expressions: 513 name = expression.name.upper() 514 515 if name not in self.BRACKET_OFFSETS: 516 break 517 518 offset, safe = self.BRACKET_OFFSETS[name] 519 bracket.set("offset", offset) 520 bracket.set("safe", safe) 521 expression.replace(expression.expressions[0]) 522 523 return bracket 524 525 class Generator(generator.Generator): 526 EXPLICIT_UNION = True 527 INTERVAL_ALLOWS_PLURAL_FORM = False 528 JOIN_HINTS = False 529 QUERY_HINTS = False 530 TABLE_HINTS = False 531 LIMIT_FETCH = "LIMIT" 532 RENAME_TABLE_WITH_DB = False 533 NVL2_SUPPORTED = False 534 UNNEST_WITH_ORDINALITY = False 535 COLLATE_IS_FUNC = True 536 LIMIT_ONLY_LITERALS = True 537 SUPPORTS_TABLE_ALIAS_COLUMNS = False 538 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 539 JSON_KEY_VALUE_PAIR_SEP = "," 540 NULL_ORDERING_SUPPORTED = False 541 542 TRANSFORMS = { 543 **generator.Generator.TRANSFORMS, 544 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 545 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 546 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 547 exp.ArrayContains: _array_contains_sql, 548 exp.ArraySize: rename_func("ARRAY_LENGTH"), 549 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 550 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}", 553 exp.CountIf: rename_func("COUNTIF"), 554 exp.Create: _create_sql, 555 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 556 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 557 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 558 exp.DateFromParts: rename_func("DATE"), 559 exp.DateStrToDate: datestrtodate_sql, 560 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 561 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 562 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 563 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 564 exp.FromTimeZone: lambda self, e: self.func( 565 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 566 ), 567 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 568 exp.GetPath: path_to_jsonpath(), 569 exp.GroupConcat: rename_func("STRING_AGG"), 570 exp.Hex: rename_func("TO_HEX"), 571 exp.If: if_sql(false_value="NULL"), 572 exp.ILike: no_ilike_sql, 573 exp.IntDiv: rename_func("DIV"), 574 exp.JSONFormat: rename_func("TO_JSON_STRING"), 575 exp.Max: max_or_greatest, 576 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 577 exp.MD5Digest: rename_func("MD5"), 578 exp.Min: min_or_least, 579 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 580 exp.RegexpExtract: lambda self, e: self.func( 581 "REGEXP_EXTRACT", 582 e.this, 583 e.expression, 584 e.args.get("position"), 585 e.args.get("occurrence"), 586 ), 587 exp.RegexpReplace: regexp_replace_sql, 588 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 589 exp.ReturnsProperty: _returnsproperty_sql, 590 exp.Select: transforms.preprocess( 591 [ 592 transforms.explode_to_unnest(), 593 _unqualify_unnest, 594 transforms.eliminate_distinct_on, 595 _alias_ordered_group, 596 transforms.eliminate_semi_and_anti_joins, 597 ] 598 ), 599 exp.SHA2: lambda self, e: self.func( 600 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 601 ), 602 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 603 if e.name == "IMMUTABLE" 604 else "NOT DETERMINISTIC", 605 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 606 exp.StrToTime: lambda self, e: self.func( 607 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 608 ), 609 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 610 exp.TimeFromParts: rename_func("TIME"), 611 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 612 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 613 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 614 exp.TimeStrToTime: timestrtotime_sql, 615 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 616 exp.TsOrDsAdd: _ts_or_ds_add_sql, 617 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 618 exp.TsOrDsToTime: rename_func("TIME"), 619 exp.Unhex: rename_func("FROM_HEX"), 620 exp.UnixDate: rename_func("UNIX_DATE"), 621 exp.UnixToTime: _unix_to_time_sql, 622 exp.Values: _derived_table_values_to_unnest, 623 exp.VariancePop: rename_func("VAR_POP"), 624 } 625 626 TYPE_MAPPING = { 627 **generator.Generator.TYPE_MAPPING, 628 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 629 exp.DataType.Type.BIGINT: "INT64", 630 exp.DataType.Type.BINARY: "BYTES", 631 exp.DataType.Type.BOOLEAN: "BOOL", 632 exp.DataType.Type.CHAR: "STRING", 633 exp.DataType.Type.DECIMAL: "NUMERIC", 634 exp.DataType.Type.DOUBLE: "FLOAT64", 635 exp.DataType.Type.FLOAT: "FLOAT64", 636 exp.DataType.Type.INT: "INT64", 637 exp.DataType.Type.NCHAR: "STRING", 638 exp.DataType.Type.NVARCHAR: "STRING", 639 exp.DataType.Type.SMALLINT: "INT64", 640 exp.DataType.Type.TEXT: "STRING", 641 exp.DataType.Type.TIMESTAMP: "DATETIME", 642 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 643 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 644 exp.DataType.Type.TINYINT: "INT64", 645 exp.DataType.Type.VARBINARY: "BYTES", 646 exp.DataType.Type.VARCHAR: "STRING", 647 exp.DataType.Type.VARIANT: "ANY TYPE", 648 } 649 650 PROPERTIES_LOCATION = { 651 **generator.Generator.PROPERTIES_LOCATION, 652 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 653 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 654 } 655 656 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 657 RESERVED_KEYWORDS = { 658 *generator.Generator.RESERVED_KEYWORDS, 659 "all", 660 "and", 661 "any", 662 "array", 663 "as", 664 "asc", 665 "assert_rows_modified", 666 "at", 667 "between", 668 "by", 669 "case", 670 "cast", 671 "collate", 672 "contains", 673 "create", 674 "cross", 675 "cube", 676 "current", 677 "default", 678 "define", 679 "desc", 680 "distinct", 681 "else", 682 "end", 683 "enum", 684 "escape", 685 "except", 686 "exclude", 687 "exists", 688 "extract", 689 "false", 690 "fetch", 691 "following", 692 "for", 693 "from", 694 "full", 695 "group", 696 "grouping", 697 "groups", 698 "hash", 699 "having", 700 "if", 701 "ignore", 702 "in", 703 "inner", 704 "intersect", 705 "interval", 706 "into", 707 "is", 708 "join", 709 "lateral", 710 "left", 711 "like", 712 "limit", 713 "lookup", 714 "merge", 715 "natural", 716 "new", 717 "no", 718 "not", 719 "null", 720 "nulls", 721 "of", 722 "on", 723 "or", 724 "order", 725 "outer", 726 "over", 727 "partition", 728 "preceding", 729 "proto", 730 "qualify", 731 "range", 732 "recursive", 733 "respect", 734 "right", 735 "rollup", 736 "rows", 737 "select", 738 "set", 739 "some", 740 "struct", 741 "tablesample", 742 "then", 743 "to", 744 "treat", 745 "true", 746 "unbounded", 747 "union", 748 "unnest", 749 "using", 750 "when", 751 "where", 752 "window", 753 "with", 754 "within", 755 } 756 757 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 758 if isinstance(expression.this, exp.TsOrDsToDate): 759 this: exp.Expression = expression.this 760 else: 761 this = expression 762 763 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 764 765 def struct_sql(self, expression: exp.Struct) -> str: 766 args = [] 767 for expr in expression.expressions: 768 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 769 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 770 else: 771 arg = self.sql(expr) 772 773 args.append(arg) 774 775 return self.func("STRUCT", *args) 776 777 def eq_sql(self, expression: exp.EQ) -> str: 778 # Operands of = cannot be NULL in BigQuery 779 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 780 if not isinstance(expression.parent, exp.Update): 781 return "NULL" 782 783 return self.binary(expression, "=") 784 785 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 786 parent = expression.parent 787 788 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 789 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 790 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 791 return self.func( 792 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 793 ) 794 795 return super().attimezone_sql(expression) 796 797 def trycast_sql(self, expression: exp.TryCast) -> str: 798 return self.cast_sql(expression, safe_prefix="SAFE_") 799 800 def cte_sql(self, expression: exp.CTE) -> str: 801 if expression.alias_column_names: 802 self.unsupported("Column names in CTE definition are not supported.") 803 return super().cte_sql(expression) 804 805 def array_sql(self, expression: exp.Array) -> str: 806 first_arg = seq_get(expression.expressions, 0) 807 if isinstance(first_arg, exp.Subqueryable): 808 return f"ARRAY{self.wrap(self.sql(first_arg))}" 809 810 return inline_array_sql(self, expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]" 841 842 def transaction_sql(self, *_) -> str: 843 return "BEGIN TRANSACTION" 844 845 def commit_sql(self, *_) -> str: 846 return "COMMIT TRANSACTION" 847 848 def rollback_sql(self, *_) -> str: 849 return "ROLLBACK TRANSACTION" 850 851 def in_unnest_op(self, expression: exp.Unnest) -> str: 852 return self.sql(expression) 853 854 def except_op(self, expression: exp.Except) -> str: 855 if not expression.args.get("distinct", False): 856 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 857 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 858 859 def intersect_op(self, expression: exp.Intersect) -> str: 860 if not expression.args.get("distinct", False): 861 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 862 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 863 864 def with_properties(self, properties: exp.Properties) -> str: 865 return self.properties(properties, prefix=self.seg("OPTIONS")) 866 867 def version_sql(self, expression: exp.Version) -> str: 868 if expression.name == "TIMESTAMP": 869 expression.set("this", "SYSTEM_TIME") 870 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV")
323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 462 table = super()._parse_table_parts(schema=schema) 463 if isinstance(table.this, exp.Identifier) and "." in table.name: 464 catalog, db, this, *rest = ( 465 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 466 for x in split_num_words(table.name, ".", 3) 467 ) 468 469 if rest and this: 470 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 471 472 table = exp.Table(this=this, db=db, catalog=catalog) 473 474 return table 475 476 @t.overload 477 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 478 ... 479 480 @t.overload 481 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 482 ... 483 484 def _parse_json_object(self, agg=False): 485 json_object = super()._parse_json_object() 486 array_kv_pair = seq_get(json_object.expressions, 0) 487 488 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 489 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 490 if ( 491 array_kv_pair 492 and isinstance(array_kv_pair.this, exp.Array) 493 and isinstance(array_kv_pair.expression, exp.Array) 494 ): 495 keys = array_kv_pair.this.expressions 496 values = array_kv_pair.expression.expressions 497 498 json_object.set( 499 "expressions", 500 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 501 ) 502 503 return json_object 504 505 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 506 bracket = super()._parse_bracket(this) 507 508 if this is bracket: 509 return bracket 510 511 if isinstance(bracket, exp.Bracket): 512 for expression in bracket.expressions: 513 name = expression.name.upper() 514 515 if name not in self.BRACKET_OFFSETS: 516 break 517 518 offset, safe = self.BRACKET_OFFSETS[name] 519 bracket.set("offset", offset) 520 bracket.set("safe", safe) 521 expression.replace(expression.expressions[0]) 522 523 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
525 class Generator(generator.Generator): 526 EXPLICIT_UNION = True 527 INTERVAL_ALLOWS_PLURAL_FORM = False 528 JOIN_HINTS = False 529 QUERY_HINTS = False 530 TABLE_HINTS = False 531 LIMIT_FETCH = "LIMIT" 532 RENAME_TABLE_WITH_DB = False 533 NVL2_SUPPORTED = False 534 UNNEST_WITH_ORDINALITY = False 535 COLLATE_IS_FUNC = True 536 LIMIT_ONLY_LITERALS = True 537 SUPPORTS_TABLE_ALIAS_COLUMNS = False 538 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 539 JSON_KEY_VALUE_PAIR_SEP = "," 540 NULL_ORDERING_SUPPORTED = False 541 542 TRANSFORMS = { 543 **generator.Generator.TRANSFORMS, 544 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 545 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 546 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 547 exp.ArrayContains: _array_contains_sql, 548 exp.ArraySize: rename_func("ARRAY_LENGTH"), 549 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 550 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}", 553 exp.CountIf: rename_func("COUNTIF"), 554 exp.Create: _create_sql, 555 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 556 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 557 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 558 exp.DateFromParts: rename_func("DATE"), 559 exp.DateStrToDate: datestrtodate_sql, 560 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 561 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 562 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 563 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 564 exp.FromTimeZone: lambda self, e: self.func( 565 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 566 ), 567 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 568 exp.GetPath: path_to_jsonpath(), 569 exp.GroupConcat: rename_func("STRING_AGG"), 570 exp.Hex: rename_func("TO_HEX"), 571 exp.If: if_sql(false_value="NULL"), 572 exp.ILike: no_ilike_sql, 573 exp.IntDiv: rename_func("DIV"), 574 exp.JSONFormat: rename_func("TO_JSON_STRING"), 575 exp.Max: max_or_greatest, 576 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 577 exp.MD5Digest: rename_func("MD5"), 578 exp.Min: min_or_least, 579 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 580 exp.RegexpExtract: lambda self, e: self.func( 581 "REGEXP_EXTRACT", 582 e.this, 583 e.expression, 584 e.args.get("position"), 585 e.args.get("occurrence"), 586 ), 587 exp.RegexpReplace: regexp_replace_sql, 588 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 589 exp.ReturnsProperty: _returnsproperty_sql, 590 exp.Select: transforms.preprocess( 591 [ 592 transforms.explode_to_unnest(), 593 _unqualify_unnest, 594 transforms.eliminate_distinct_on, 595 _alias_ordered_group, 596 transforms.eliminate_semi_and_anti_joins, 597 ] 598 ), 599 exp.SHA2: lambda self, e: self.func( 600 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 601 ), 602 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 603 if e.name == "IMMUTABLE" 604 else "NOT DETERMINISTIC", 605 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 606 exp.StrToTime: lambda self, e: self.func( 607 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 608 ), 609 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 610 exp.TimeFromParts: rename_func("TIME"), 611 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 612 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 613 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 614 exp.TimeStrToTime: timestrtotime_sql, 615 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 616 exp.TsOrDsAdd: _ts_or_ds_add_sql, 617 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 618 exp.TsOrDsToTime: rename_func("TIME"), 619 exp.Unhex: rename_func("FROM_HEX"), 620 exp.UnixDate: rename_func("UNIX_DATE"), 621 exp.UnixToTime: _unix_to_time_sql, 622 exp.Values: _derived_table_values_to_unnest, 623 exp.VariancePop: rename_func("VAR_POP"), 624 } 625 626 TYPE_MAPPING = { 627 **generator.Generator.TYPE_MAPPING, 628 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 629 exp.DataType.Type.BIGINT: "INT64", 630 exp.DataType.Type.BINARY: "BYTES", 631 exp.DataType.Type.BOOLEAN: "BOOL", 632 exp.DataType.Type.CHAR: "STRING", 633 exp.DataType.Type.DECIMAL: "NUMERIC", 634 exp.DataType.Type.DOUBLE: "FLOAT64", 635 exp.DataType.Type.FLOAT: "FLOAT64", 636 exp.DataType.Type.INT: "INT64", 637 exp.DataType.Type.NCHAR: "STRING", 638 exp.DataType.Type.NVARCHAR: "STRING", 639 exp.DataType.Type.SMALLINT: "INT64", 640 exp.DataType.Type.TEXT: "STRING", 641 exp.DataType.Type.TIMESTAMP: "DATETIME", 642 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 643 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 644 exp.DataType.Type.TINYINT: "INT64", 645 exp.DataType.Type.VARBINARY: "BYTES", 646 exp.DataType.Type.VARCHAR: "STRING", 647 exp.DataType.Type.VARIANT: "ANY TYPE", 648 } 649 650 PROPERTIES_LOCATION = { 651 **generator.Generator.PROPERTIES_LOCATION, 652 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 653 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 654 } 655 656 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 657 RESERVED_KEYWORDS = { 658 *generator.Generator.RESERVED_KEYWORDS, 659 "all", 660 "and", 661 "any", 662 "array", 663 "as", 664 "asc", 665 "assert_rows_modified", 666 "at", 667 "between", 668 "by", 669 "case", 670 "cast", 671 "collate", 672 "contains", 673 "create", 674 "cross", 675 "cube", 676 "current", 677 "default", 678 "define", 679 "desc", 680 "distinct", 681 "else", 682 "end", 683 "enum", 684 "escape", 685 "except", 686 "exclude", 687 "exists", 688 "extract", 689 "false", 690 "fetch", 691 "following", 692 "for", 693 "from", 694 "full", 695 "group", 696 "grouping", 697 "groups", 698 "hash", 699 "having", 700 "if", 701 "ignore", 702 "in", 703 "inner", 704 "intersect", 705 "interval", 706 "into", 707 "is", 708 "join", 709 "lateral", 710 "left", 711 "like", 712 "limit", 713 "lookup", 714 "merge", 715 "natural", 716 "new", 717 "no", 718 "not", 719 "null", 720 "nulls", 721 "of", 722 "on", 723 "or", 724 "order", 725 "outer", 726 "over", 727 "partition", 728 "preceding", 729 "proto", 730 "qualify", 731 "range", 732 "recursive", 733 "respect", 734 "right", 735 "rollup", 736 "rows", 737 "select", 738 "set", 739 "some", 740 "struct", 741 "tablesample", 742 "then", 743 "to", 744 "treat", 745 "true", 746 "unbounded", 747 "union", 748 "unnest", 749 "using", 750 "when", 751 "where", 752 "window", 753 "with", 754 "within", 755 } 756 757 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 758 if isinstance(expression.this, exp.TsOrDsToDate): 759 this: exp.Expression = expression.this 760 else: 761 this = expression 762 763 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 764 765 def struct_sql(self, expression: exp.Struct) -> str: 766 args = [] 767 for expr in expression.expressions: 768 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 769 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 770 else: 771 arg = self.sql(expr) 772 773 args.append(arg) 774 775 return self.func("STRUCT", *args) 776 777 def eq_sql(self, expression: exp.EQ) -> str: 778 # Operands of = cannot be NULL in BigQuery 779 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 780 if not isinstance(expression.parent, exp.Update): 781 return "NULL" 782 783 return self.binary(expression, "=") 784 785 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 786 parent = expression.parent 787 788 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 789 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 790 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 791 return self.func( 792 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 793 ) 794 795 return super().attimezone_sql(expression) 796 797 def trycast_sql(self, expression: exp.TryCast) -> str: 798 return self.cast_sql(expression, safe_prefix="SAFE_") 799 800 def cte_sql(self, expression: exp.CTE) -> str: 801 if expression.alias_column_names: 802 self.unsupported("Column names in CTE definition are not supported.") 803 return super().cte_sql(expression) 804 805 def array_sql(self, expression: exp.Array) -> str: 806 first_arg = seq_get(expression.expressions, 0) 807 if isinstance(first_arg, exp.Subqueryable): 808 return f"ARRAY{self.wrap(self.sql(first_arg))}" 809 810 return inline_array_sql(self, expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]" 841 842 def transaction_sql(self, *_) -> str: 843 return "BEGIN TRANSACTION" 844 845 def commit_sql(self, *_) -> str: 846 return "COMMIT TRANSACTION" 847 848 def rollback_sql(self, *_) -> str: 849 return "ROLLBACK TRANSACTION" 850 851 def in_unnest_op(self, expression: exp.Unnest) -> str: 852 return self.sql(expression) 853 854 def except_op(self, expression: exp.Except) -> str: 855 if not expression.args.get("distinct", False): 856 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 857 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 858 859 def intersect_op(self, expression: exp.Intersect) -> str: 860 if not expression.args.get("distinct", False): 861 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 862 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 863 864 def with_properties(self, properties: exp.Properties) -> str: 865 return self.properties(properties, prefix=self.seg("OPTIONS")) 866 867 def version_sql(self, expression: exp.Version) -> str: 868 if expression.name == "TIMESTAMP": 869 expression.set("this", "SYSTEM_TIME") 870 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
765 def struct_sql(self, expression: exp.Struct) -> str: 766 args = [] 767 for expr in expression.expressions: 768 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 769 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 770 else: 771 arg = self.sql(expr) 772 773 args.append(arg) 774 775 return self.func("STRUCT", *args)
785 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 786 parent = expression.parent 787 788 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 789 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 790 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 791 return self.func( 792 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 793 ) 794 795 return super().attimezone_sql(expression)
812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql