-
Notifications
You must be signed in to change notification settings - Fork 29.3k
[SPARK-57459][SQL] Support nanosecond-precision timestamp types in the Avro datasource (v1 and v2) #56825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
[SPARK-57459][SQL] Support nanosecond-precision timestamp types in the Avro datasource (v1 and v2) #56825
Changes from all commits
1d66513
c030188
90867e5
af1bc84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -213,6 +213,28 @@ private[sql] class AvroDeserializer( | |
| s"Avro logical type $other cannot be converted to SQL type ${TimeType().sql}.") | ||
| } | ||
|
|
||
| case (LONG, t: TimestampLTZNanosType) => avroType.getLogicalType match { | ||
| // The timestamp-nanos logical type stores epoch-nanoseconds (Long), while the value is | ||
| // represented internally as (epochMicros, nanosWithinMicro). Floor semantics keep | ||
| // nanosWithinMicro in [0, 999] for pre-epoch values. Nanos timestamps are always proleptic | ||
| // Gregorian, so they are exempt from datetime rebasing. | ||
| case _: LogicalTypes.TimestampNanos => (updater, ordinal, value) => | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need to use fully qualified names in these cases? Same for AvroSerializer? |
||
| updater.set(ordinal, | ||
| DateTimeUtils.epochNanosToTimestampNanos(value.asInstanceOf[Long], t.precision)) | ||
| case other => throw new IncompatibleSchemaException(errorPrefix + | ||
| s"Avro logical type $other cannot be converted to SQL type " + | ||
| s"${TimestampLTZNanosType().sql}.") | ||
| } | ||
|
|
||
| case (LONG, t: TimestampNTZNanosType) => avroType.getLogicalType match { | ||
| case _: LogicalTypes.LocalTimestampNanos => (updater, ordinal, value) => | ||
| updater.set(ordinal, | ||
| DateTimeUtils.epochNanosToTimestampNanos(value.asInstanceOf[Long], t.precision)) | ||
| case other => throw new IncompatibleSchemaException(errorPrefix + | ||
| s"Avro logical type $other cannot be converted to SQL type " + | ||
| s"${TimestampNTZNanosType().sql}.") | ||
| } | ||
|
|
||
| // Before we upgrade Avro to 1.8 for logical type support, spark-avro converts Long to Date. | ||
| // For backward compatibility, we still keep this conversion. | ||
| case (LONG, DateType) => (updater, ordinal, value) => | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -123,9 +123,6 @@ private[sql] object AvroUtils extends Logging { | |
|
|
||
| case _: GeometryType | _: GeographyType => false | ||
|
|
||
| // Nanosecond-capable timestamps are not yet supported by this datasource. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we also update |
||
| case _: AnyTimestampNanoType => false | ||
|
|
||
| case _: AtomicType => true | ||
|
|
||
| case st: StructType => st.forall { f => supportsDataType(f.dataType) } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test coverage is a bit narrower than ORC/Parquet, for example we're missing cases like: