diff --git a/sqlglot/expressions/properties.py b/sqlglot/expressions/properties.py index e229153273..9c8c61b3ce 100644 --- a/sqlglot/expressions/properties.py +++ b/sqlglot/expressions/properties.py @@ -481,6 +481,11 @@ class StorageHandlerProperty(Property): arg_types = {"this": True} +class UsingProperty(Property): + # kind: JAR, FILE, or ARCHIVE; this: the resource path (string literal) + arg_types = {"this": True, "kind": True} + + class TemporaryProperty(Property): arg_types = {"this": False} diff --git a/sqlglot/generator.py b/sqlglot/generator.py index fb5f6ff737..0f0b6513e1 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -743,6 +743,7 @@ class Generator: exp.TransformModelProperty: exp.Properties.Location.POST_SCHEMA, exp.MergeTreeTTL: exp.Properties.Location.POST_SCHEMA, exp.UnloggedProperty: exp.Properties.Location.POST_CREATE, + exp.UsingProperty: exp.Properties.Location.POST_EXPRESSION, exp.UsingTemplateProperty: exp.Properties.Location.POST_SCHEMA, exp.ViewAttributeProperty: exp.Properties.Location.POST_SCHEMA, exp.VirtualProperty: exp.Properties.Location.POST_CREATE, diff --git a/sqlglot/generators/hive.py b/sqlglot/generators/hive.py index b6d33b4d68..826934336d 100644 --- a/sqlglot/generators/hive.py +++ b/sqlglot/generators/hive.py @@ -531,6 +531,10 @@ def timetostr_sql(self, expression: exp.TimeToStr) -> str: return self.func("DATE_FORMAT", this, self.format_time(expression)) + def usingproperty_sql(self, expression: exp.UsingProperty) -> str: + kind = expression.args.get("kind") + return f"USING {kind} {self.sql(expression, 'this')}" + def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str: if isinstance(expression.this, exp.InputOutputFormat): this = self.sql(expression, "this") diff --git a/sqlglot/parsers/hive.py b/sqlglot/parsers/hive.py index 2652edd1e0..51a8a2cf45 100644 --- a/sqlglot/parsers/hive.py +++ b/sqlglot/parsers/hive.py @@ -120,6 +120,7 @@ class HiveParser(parser.Parser): "SERDEPROPERTIES": lambda self: exp.SerdeProperties( expressions=self._parse_wrapped_csv(self._parse_property) ), + "USING": lambda self: self._parse_using_property(), } ALTER_PARSERS = { @@ -237,6 +238,13 @@ def _parse_alter_table_change(self) -> t.Optional[exp.Expr]: exp.AlterColumn(this=this, rename_to=column_new, dtype=dtype, comment=comment) ) + def _parse_using_property(self) -> exp.Property: + if self._match_texts(("JAR", "FILE", "ARCHIVE")): + kind = self._prev.text.upper() + return exp.UsingProperty(this=self._parse_string(), kind=kind) + + return self._parse_property_assignment(exp.FileFormatProperty) + def _parse_partition_and_order( self, ) -> t.Tuple[t.List[exp.Expr], t.Optional[exp.Expr]]: diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 551e4d5b5d..20cfafa26f 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -1108,3 +1108,61 @@ def test_percentile(self): quantile_expr.assert_is(exp.Quantile) quantile_expr.this.assert_is(exp.Column) quantile_expr.args.get("quantile").assert_is(exp.Literal) + + def test_create_function_using(self): + # USING JAR + self.validate_identity( + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING JAR 'hdfs://path/to/my.jar'" + ) + + # OR REPLACE TEMPORARY with USING JAR + self.validate_identity( + "CREATE OR REPLACE TEMPORARY FUNCTION some_func AS 'my_jar.SomeFunctionUDF' USING JAR 's3://bucket/my.jar'" + ) + + # USING FILE + self.validate_identity( + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING FILE 'hdfs://path/to/file.py'" + ) + + # USING ARCHIVE + self.validate_identity( + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING ARCHIVE 'hdfs://path/to/archive.zip'" + ) + + # Verify the AST node is a Create with UsingProperty + expr = self.parse_one( + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING JAR 'hdfs://path/to/my.jar'" + ) + self.assertIsInstance(expr, exp.Create) + using_prop = expr.find(exp.UsingProperty) + self.assertIsNotNone(using_prop) + self.assertEqual(using_prop.args["kind"], "JAR") + self.assertEqual(using_prop.this.this, "hdfs://path/to/my.jar") + + # Verify programmatic construction + create = exp.Create( + this=exp.Table(this=exp.to_identifier("my_func")), + kind="FUNCTION", + expression=exp.Literal.string("com.example.MyFunc"), + properties=exp.Properties( + expressions=[ + exp.UsingProperty(this=exp.Literal.string("s3://bucket/new.jar"), kind="JAR") + ] + ), + ) + self.assertEqual( + create.sql(dialect="hive"), + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING JAR 's3://bucket/new.jar'", + ) + + # Verify programmatic modification of the JAR path + expr = self.parse_one( + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING JAR 'hdfs://old/path.jar'" + ) + using_prop = expr.find(exp.UsingProperty) + using_prop.set("this", exp.Literal.string("hdfs://new/path.jar")) + self.assertEqual( + expr.sql(dialect="hive"), + "CREATE FUNCTION my_func AS 'com.example.MyFunc' USING JAR 'hdfs://new/path.jar'", + )