2828#include " core/block/block.h"
2929#include " core/block/column_numbers.h"
3030#include " core/column/column_nullable.h"
31+ #include " core/column/column_struct.h"
32+ #include " core/column/column_vector.h"
3133#include " core/custom_allocator.h"
3234#include " exec/operator/operator.h"
3335#include " exprs/table_function/table_function_factory.h"
@@ -264,30 +266,156 @@ Status TableFunctionLocalState::_get_expanded_block_block_fast_path(
264266 const auto & offsets = *_block_fast_path_ctx.offsets_ptr ;
265267 const auto child_rows = cast_set<int64_t >(offsets.size ());
266268
267- std::vector<uint32_t > row_ids;
268- row_ids.reserve (remaining_capacity);
269- uint64_t first_nested_idx = 0 ;
270- uint64_t expected_next_nested_idx = 0 ;
271- bool found_nested_range = false ;
272-
273269 int64_t child_row = _block_fast_path_row;
274270 uint64_t in_row_offset = _block_fast_path_in_row_offset;
275271 int produced_rows = 0 ;
276272
277- while (produced_rows < remaining_capacity && child_row < child_rows) {
278- if (_block_fast_path_ctx.array_nullmap_data &&
279- _block_fast_path_ctx.array_nullmap_data [child_row]) {
280- // NULL array row: skip it here. Slow path will handle output semantics if needed.
281- child_row++;
282- in_row_offset = 0 ;
283- continue ;
273+ const bool is_outer = _fns[0 ]->is_outer ();
274+ const bool is_posexplode = _block_fast_path_ctx.generate_row_index ;
275+ auto & out_col = columns[p._child_slots .size ()];
276+
277+ // Decompose posexplode struct output column if needed
278+ ColumnStruct* struct_col_ptr = nullptr ;
279+ ColumnUInt8* outer_struct_nullmap_ptr = nullptr ;
280+ IColumn* value_col_ptr = nullptr ;
281+ ColumnInt32* pos_col_ptr = nullptr ;
282+ if (is_posexplode) {
283+ if (out_col->is_nullable ()) {
284+ auto * nullable = assert_cast<ColumnNullable*>(out_col.get ());
285+ struct_col_ptr = assert_cast<ColumnStruct*>(nullable->get_nested_column_ptr ().get ());
286+ outer_struct_nullmap_ptr =
287+ assert_cast<ColumnUInt8*>(nullable->get_null_map_column_ptr ().get ());
288+ } else {
289+ struct_col_ptr = assert_cast<ColumnStruct*>(out_col.get ());
290+ }
291+ pos_col_ptr = assert_cast<ColumnInt32*>(&struct_col_ptr->get_column (0 ));
292+ value_col_ptr = &struct_col_ptr->get_column (1 );
293+ }
294+ // Segment tracking: accumulate contiguous nested ranges, flush on boundaries.
295+ // Array column offsets are monotonically non-decreasing, so nested data across child rows
296+ // is always contiguous (even with NULL/empty rows that contribute zero elements).
297+ struct ExpandSegmentContext {
298+ std::vector<uint32_t >
299+ seg_row_ids; // row ids of non table-function columns to replicate for this segment
300+ std::vector<int32_t >
301+ seg_positions; // for posexplode, the position values to write for this segment
302+ int64_t seg_nested_start = -1 ; // start offset in the nested column of this segment
303+ int seg_nested_count =
304+ 0 ; // number of nested rows in this segment (can be > child row count due to multiple elements per row)
305+ };
306+ ExpandSegmentContext segment_ctx;
307+ segment_ctx.seg_row_ids .reserve (remaining_capacity);
308+ if (is_posexplode) {
309+ segment_ctx.seg_positions .reserve (remaining_capacity);
310+ }
311+
312+ auto reset_expand_segment_ctx = [&segment_ctx, is_posexplode]() {
313+ segment_ctx.seg_nested_start = -1 ;
314+ segment_ctx.seg_nested_count = 0 ;
315+ segment_ctx.seg_row_ids .clear ();
316+ if (is_posexplode) {
317+ segment_ctx.seg_positions .clear ();
318+ }
319+ };
320+
321+ // Flush accumulated contiguous segment to output columns
322+ auto flush_segment = [&]() {
323+ if (segment_ctx.seg_nested_count == 0 ) {
324+ return ;
325+ }
326+
327+ // Non-TF columns: replicate each child row for every output element
328+ for (auto index : p._output_slot_indexs ) {
329+ auto src_column = _child_block->get_by_position (index).column ;
330+ columns[index]->insert_indices_from (
331+ *src_column, segment_ctx.seg_row_ids .data (),
332+ segment_ctx.seg_row_ids .data () + segment_ctx.seg_row_ids .size ());
333+ }
334+
335+ if (is_posexplode) {
336+ // Write positions
337+ pos_col_ptr->insert_many_raw_data (
338+ reinterpret_cast <const char *>(segment_ctx.seg_positions .data ()),
339+ segment_ctx.seg_positions .size ());
340+ // Write nested values to the struct's value sub-column
341+ DCHECK (value_col_ptr->is_nullable ())
342+ << " posexplode fast path requires nullable value column" ;
343+ auto * val_nullable = assert_cast<ColumnNullable*>(value_col_ptr);
344+ val_nullable->get_nested_column_ptr ()->insert_range_from (
345+ *_block_fast_path_ctx.nested_col , segment_ctx.seg_nested_start ,
346+ segment_ctx.seg_nested_count );
347+ auto * val_nullmap =
348+ assert_cast<ColumnUInt8*>(val_nullable->get_null_map_column_ptr ().get ());
349+ auto & val_nullmap_data = val_nullmap->get_data ();
350+ const size_t old_size = val_nullmap_data.size ();
351+ val_nullmap_data.resize (old_size + segment_ctx.seg_nested_count );
352+ if (_block_fast_path_ctx.nested_nullmap_data != nullptr ) {
353+ memcpy (val_nullmap_data.data () + old_size,
354+ _block_fast_path_ctx.nested_nullmap_data + segment_ctx.seg_nested_start ,
355+ segment_ctx.seg_nested_count * sizeof (UInt8));
356+ } else {
357+ memset (val_nullmap_data.data () + old_size, 0 ,
358+ segment_ctx.seg_nested_count * sizeof (UInt8));
359+ }
360+ // Struct-level null map: these rows are not null
361+ if (outer_struct_nullmap_ptr) {
362+ outer_struct_nullmap_ptr->insert_many_defaults (segment_ctx.seg_nested_count );
363+ }
364+ } else if (out_col->is_nullable ()) {
365+ auto * out_nullable = assert_cast<ColumnNullable*>(out_col.get ());
366+ out_nullable->get_nested_column_ptr ()->insert_range_from (
367+ *_block_fast_path_ctx.nested_col , segment_ctx.seg_nested_start ,
368+ segment_ctx.seg_nested_count );
369+ auto * nullmap_column =
370+ assert_cast<ColumnUInt8*>(out_nullable->get_null_map_column_ptr ().get ());
371+ auto & nullmap_data = nullmap_column->get_data ();
372+ const size_t old_size = nullmap_data.size ();
373+ nullmap_data.resize (old_size + segment_ctx.seg_nested_count );
374+ if (_block_fast_path_ctx.nested_nullmap_data != nullptr ) {
375+ memcpy (nullmap_data.data () + old_size,
376+ _block_fast_path_ctx.nested_nullmap_data + segment_ctx.seg_nested_start ,
377+ segment_ctx.seg_nested_count * sizeof (UInt8));
378+ } else {
379+ memset (nullmap_data.data () + old_size, 0 ,
380+ segment_ctx.seg_nested_count * sizeof (UInt8));
381+ }
382+ } else {
383+ out_col->insert_range_from (*_block_fast_path_ctx.nested_col ,
384+ segment_ctx.seg_nested_start , segment_ctx.seg_nested_count );
284385 }
386+ reset_expand_segment_ctx ();
387+ };
388+
389+ // Emit one NULL output row for an outer-null/empty child row
390+ auto emit_outer_null = [&](int64_t cr) {
391+ for (auto index : p._output_slot_indexs ) {
392+ auto src_column = _child_block->get_by_position (index).column ;
393+ columns[index]->insert_from (*src_column, cr);
394+ }
395+ out_col->insert_default ();
396+ };
397+ // Walk through child rows, accumulating contiguous segments into the output,
398+ // then when hitting a null/empty row or reaching the end,
399+ // flush the segment using bulk operations.
400+ // For outer-null rows, insert a NULL and copy the non-table-function columns directly.
401+ // This naturally handles both outer and non-outer modes since non-outer mode
402+ // just won't produce any null outputs.
403+ // For posexplode, generate position indices alongside this.
404+ while (produced_rows < remaining_capacity && child_row < child_rows) {
405+ const bool is_null_row = _block_fast_path_ctx.array_nullmap_data &&
406+ _block_fast_path_ctx.array_nullmap_data [child_row];
285407
286408 const uint64_t prev_off = child_row == 0 ? 0 : offsets[child_row - 1 ];
287- const uint64_t cur_off = offsets[child_row];
409+ const uint64_t cur_off = is_null_row ? prev_off : offsets[child_row];
288410 const uint64_t nested_len = cur_off - prev_off;
289411
290- if (in_row_offset >= nested_len) {
412+ if (is_null_row || in_row_offset >= nested_len) {
413+ // for outer functions, emit null row for NULL or empty array rows
414+ if (is_outer && in_row_offset == 0 && (is_null_row || nested_len == 0 )) {
415+ flush_segment ();
416+ emit_outer_null (child_row);
417+ produced_rows++;
418+ }
291419 child_row++;
292420 in_row_offset = 0 ;
293421 continue ;
@@ -301,57 +429,37 @@ Status TableFunctionLocalState::_get_expanded_block_block_fast_path(
301429 DCHECK_LE (nested_start + take_count, cur_off);
302430 DCHECK_LE (nested_start + take_count, _block_fast_path_ctx.nested_col ->size ());
303431
304- if (!found_nested_range) {
305- found_nested_range = true ;
306- first_nested_idx = nested_start;
307- expected_next_nested_idx = nested_start;
432+ if (segment_ctx.seg_nested_count == 0 ) {
433+ segment_ctx.seg_nested_start = nested_start;
434+ } else {
435+ // Nested data from an array column is always contiguous: offsets are monotonically
436+ // non-decreasing, so skipping NULL/empty rows doesn't create gaps.
437+ DCHECK_EQ (static_cast <uint64_t >(segment_ctx.seg_nested_start +
438+ segment_ctx.seg_nested_count ),
439+ nested_start)
440+ << " nested data must be contiguous across child rows" ;
308441 }
309- DCHECK_EQ (nested_start, expected_next_nested_idx);
310442
311443 // Map each produced output row back to its source child row for copying non-table-function
312444 // columns via insert_indices_from().
313445 for (int j = 0 ; j < take_count; ++j) {
314- row_ids.push_back (cast_set<uint32_t >(child_row));
446+ segment_ctx.seg_row_ids .push_back (cast_set<uint32_t >(child_row));
447+ if (is_posexplode) {
448+ segment_ctx.seg_positions .push_back (cast_set<int32_t >(in_row_offset + j));
449+ }
315450 }
316451
452+ segment_ctx.seg_nested_count += take_count;
317453 produced_rows += take_count;
318- expected_next_nested_idx += take_count;
319454 in_row_offset += take_count;
320455 if (in_row_offset >= nested_len) {
321456 child_row++;
322457 in_row_offset = 0 ;
323458 }
324459 }
325460
326- if (produced_rows > 0 ) {
327- for (auto index : p._output_slot_indexs ) {
328- auto src_column = _child_block->get_by_position (index).column ;
329- columns[index]->insert_indices_from (*src_column, row_ids.data (),
330- row_ids.data () + produced_rows);
331- }
332-
333- auto & out_col = columns[p._child_slots .size ()];
334- if (out_col->is_nullable ()) {
335- auto * out_nullable = assert_cast<ColumnNullable*>(out_col.get ());
336- out_nullable->get_nested_column_ptr ()->insert_range_from (
337- *_block_fast_path_ctx.nested_col , first_nested_idx, produced_rows);
338- auto * nullmap_column =
339- assert_cast<ColumnUInt8*>(out_nullable->get_null_map_column_ptr ().get ());
340- auto & nullmap_data = nullmap_column->get_data ();
341- const size_t old_size = nullmap_data.size ();
342- nullmap_data.resize (old_size + produced_rows);
343- if (_block_fast_path_ctx.nested_nullmap_data != nullptr ) {
344- memcpy (nullmap_data.data () + old_size,
345- _block_fast_path_ctx.nested_nullmap_data + first_nested_idx,
346- produced_rows * sizeof (UInt8));
347- } else {
348- memset (nullmap_data.data () + old_size, 0 , produced_rows * sizeof (UInt8));
349- }
350- } else {
351- out_col->insert_range_from (*_block_fast_path_ctx.nested_col , first_nested_idx,
352- produced_rows);
353- }
354- }
461+ // Flush any remaining segment
462+ flush_segment ();
355463
356464 _block_fast_path_row = child_row;
357465 _block_fast_path_in_row_offset = in_row_offset;
0 commit comments