Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sqlglot-integration-tests
6 changes: 5 additions & 1 deletion sqlglot/expressions/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ class Flatten(Expression, Func):

class StringToArray(Expression, Func):
arg_types = {"this": True, "expression": False, "null": False}
_sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING", "STRTOK_TO_ARRAY"]
_sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING"]


class StrtokToArray(Expression, Func):
arg_types = {"this": True, "expression": False}


# Higher-order / lambda
Expand Down
29 changes: 29 additions & 0 deletions sqlglot/generators/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2112,6 +2112,16 @@ class DuckDBGenerator(generator.Generator):
"""
)

STRTOK_TO_ARRAY_TEMPLATE: exp.Expr = exp.maybe_parse(
"""
CASE WHEN :delimiter IS NULL THEN NULL
ELSE LIST_FILTER(
REGEXP_SPLIT_TO_ARRAY(:string, CASE WHEN :delimiter = '' THEN '.^' ELSE CONCAT('[', :escaped, ']') END),
x -> NOT x = ''
) END
"""
)

# Template for STRTOK function transpilation
#
# DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
Expand Down Expand Up @@ -4345,6 +4355,25 @@ def strtok_sql(self, expression: exp.Strtok) -> str:

return self.function_fallback_sql(expression)

def strtoktoarray_sql(self, expression: exp.StrtokToArray) -> str:
string_arg = expression.this
delimiter_arg = expression.args.get("expression") or exp.Literal.string(" ")

escaped = exp.RegexpReplace(
this=delimiter_arg.copy(),
expression=exp.Literal.string(r"([\[\]^.\-*+?(){}|$\\])"),
replacement=exp.Literal.string(r"\\\1"),
modifiers=exp.Literal.string("g"),
)
return self.sql(
exp.replace_placeholders(
self.STRTOK_TO_ARRAY_TEMPLATE.copy(),
string=string_arg,
delimiter=delimiter_arg,
escaped=escaped,
)
)

def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))

Expand Down
1 change: 1 addition & 0 deletions sqlglot/generators/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ class SnowflakeGenerator(generator.Generator):
),
exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)),
exp.StringToArray: rename_func("STRTOK_TO_ARRAY"),
exp.StrtokToArray: rename_func("STRTOK_TO_ARRAY"),
Comment thread
georgesittas marked this conversation as resolved.
exp.Stuff: rename_func("INSERT"),
exp.StPoint: rename_func("ST_MAKEPOINT"),
exp.TimeAdd: date_delta_sql("TIMEADD"),
Expand Down
4 changes: 4 additions & 0 deletions sqlglot/parsers/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,10 @@ class SnowflakeParser(parser.Parser):
delimiter=seq_get(args, 1) or exp.Literal.string(" "),
part_index=seq_get(args, 2) or exp.Literal.number("1"),
),
"STRTOK_TO_ARRAY": lambda args: exp.StrtokToArray(
this=seq_get(args, 0),
expression=seq_get(args, 1) or exp.Literal.string(" "),
),
Comment thread
georgesittas marked this conversation as resolved.
"SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list,
"UNICODE": lambda args: exp.Unicode(this=seq_get(args, 0), empty_is_zero=True),
"WEEKISO": exp.WeekOfYear.from_arg_list,
Expand Down
1 change: 1 addition & 0 deletions sqlglot/typing/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def _annotate_str_to_time(self: TypeAnnotator, expression: exp.StrToTime) -> exp
exp.RegexpExtractAll,
exp.Split,
exp.StringToArray,
exp.StrtokToArray,
)
},
**{
Expand Down
3 changes: 0 additions & 3 deletions tests/dialects/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,9 +975,6 @@ def test_duckdb(self):
)
self.validate_all(
"STRING_TO_ARRAY(x, 'a')",
read={
"snowflake": "STRTOK_TO_ARRAY(x, 'a')",
},
Comment thread
georgesittas marked this conversation as resolved.
write={
"duckdb": "STR_SPLIT(x, 'a')",
"presto": "SPLIT(x, 'a')",
Expand Down
2 changes: 1 addition & 1 deletion tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def test_snowflake(self):
self.validate_identity("SELECT {* EXCLUDE (col1)} FROM my_table")
self.validate_identity("SELECT {* EXCLUDE (col1, col2)} FROM my_table")
self.validate_identity("SELECT a, b, COUNT(*) FROM x GROUP BY ALL LIMIT 100")
self.validate_identity("STRTOK_TO_ARRAY('a b c')")
self.validate_identity("STRTOK_TO_ARRAY('a b c')", "STRTOK_TO_ARRAY('a b c', ' ')")
self.validate_identity("STRTOK_TO_ARRAY('a.b.c', '.')")
self.validate_identity("GET(a, b)")
self.validate_identity("INSERT INTO test VALUES (x'48FAF43B0AFCEF9B63EE3A93EE2AC2')")
Expand Down
Loading