Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sqlglot-integration-tests
6 changes: 5 additions & 1 deletion sqlglot/expressions/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ class Flatten(Expression, Func):

class StringToArray(Expression, Func):
arg_types = {"this": True, "expression": False, "null": False}
_sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING", "STRTOK_TO_ARRAY"]
_sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING"]


class StrtokToArray(Expression, Func):
arg_types = {"this": True, "expression": False}


# Higher-order / lambda
Expand Down
29 changes: 29 additions & 0 deletions sqlglot/generators/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2112,6 +2112,16 @@ class DuckDBGenerator(generator.Generator):
"""
)

STRTOK_TO_ARRAY_TEMPLATE: exp.Expr = exp.maybe_parse(
"""
CASE WHEN :delimiter IS NULL THEN NULL
ELSE LIST_FILTER(
REGEXP_SPLIT_TO_ARRAY(:string, CASE WHEN :delimiter = '' THEN '.^' ELSE CONCAT('[', :escaped, ']') END),
x -> NOT x = ''
) END
"""
)

# Template for STRTOK function transpilation
#
# DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
Expand Down Expand Up @@ -4345,6 +4355,25 @@ def strtok_sql(self, expression: exp.Strtok) -> str:

return self.function_fallback_sql(expression)

def strtoktoarray_sql(self, expression: exp.StrtokToArray) -> str:
string_arg = expression.this
delimiter_arg = expression.args.get("expression") or exp.Literal.string(" ")

escaped = exp.RegexpReplace(
this=delimiter_arg.copy(),
expression=exp.Literal.string(r"([\[\]^.\-*+?(){}|$\\])"),
replacement=exp.Literal.string(r"\\\1"),
modifiers=exp.Literal.string("g"),
)
return self.sql(
exp.replace_placeholders(
self.STRTOK_TO_ARRAY_TEMPLATE.copy(),
string=string_arg,
delimiter=delimiter_arg,
escaped=escaped,
)
)

def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))

Expand Down
1 change: 1 addition & 0 deletions sqlglot/generators/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ class SnowflakeGenerator(generator.Generator):
),
exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)),
exp.StringToArray: rename_func("STRTOK_TO_ARRAY"),
exp.StrtokToArray: rename_func("STRTOK_TO_ARRAY"),
Comment thread
georgesittas marked this conversation as resolved.
exp.Stuff: rename_func("INSERT"),
exp.StPoint: rename_func("ST_MAKEPOINT"),
exp.TimeAdd: date_delta_sql("TIMEADD"),
Expand Down
4 changes: 4 additions & 0 deletions sqlglot/parsers/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,10 @@ class SnowflakeParser(parser.Parser):
delimiter=seq_get(args, 1) or exp.Literal.string(" "),
part_index=seq_get(args, 2) or exp.Literal.number("1"),
),
"STRTOK_TO_ARRAY": lambda args: exp.StrtokToArray(
this=seq_get(args, 0),
expression=seq_get(args, 1) or exp.Literal.string(" "),
),
Comment thread
georgesittas marked this conversation as resolved.
"SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list,
"UNICODE": lambda args: exp.Unicode(this=seq_get(args, 0), empty_is_zero=True),
"WEEKISO": exp.WeekOfYear.from_arg_list,
Expand Down
1 change: 1 addition & 0 deletions sqlglot/typing/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def _annotate_str_to_time(self: TypeAnnotator, expression: exp.StrToTime) -> exp
exp.RegexpExtractAll,
exp.Split,
exp.StringToArray,
exp.StrtokToArray,
)
},
**{
Expand Down
3 changes: 0 additions & 3 deletions tests/dialects/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,9 +975,6 @@ def test_duckdb(self):
)
self.validate_all(
"STRING_TO_ARRAY(x, 'a')",
read={
"snowflake": "STRTOK_TO_ARRAY(x, 'a')",
},
Comment thread
georgesittas marked this conversation as resolved.
write={
"duckdb": "STR_SPLIT(x, 'a')",
"presto": "SPLIT(x, 'a')",
Expand Down
2 changes: 1 addition & 1 deletion tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def test_snowflake(self):
self.validate_identity("SELECT {* EXCLUDE (col1)} FROM my_table")
self.validate_identity("SELECT {* EXCLUDE (col1, col2)} FROM my_table")
self.validate_identity("SELECT a, b, COUNT(*) FROM x GROUP BY ALL LIMIT 100")
self.validate_identity("STRTOK_TO_ARRAY('a b c')")
self.validate_identity("STRTOK_TO_ARRAY('a b c')", "STRTOK_TO_ARRAY('a b c', ' ')")
self.validate_identity("STRTOK_TO_ARRAY('a.b.c', '.')")
self.validate_identity("GET(a, b)")
self.validate_identity("INSERT INTO test VALUES (x'48FAF43B0AFCEF9B63EE3A93EE2AC2')")
Expand Down
Loading