Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 37 additions & 39 deletions src/main/cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/filling.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
Expand Down Expand Up @@ -430,18 +431,17 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
}

auto [result_bitmask, null_count] =
cudf::detail::valid_if(null_mask.begin(),
null_mask.end(),
cuda::std::identity{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());

return std::make_unique<cudf::column>(
cudf::data_type{cudf::type_to_id<T>()},
num_rows,
data.release(),
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{},
null_count);
cudf::bools_to_mask(cudf::device_span<bool const>(null_mask),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());

return std::make_unique<cudf::column>(cudf::data_type{cudf::type_to_id<T>()},
num_rows,
data.release(),
profile.get_null_frequency().has_value()
? std::move(*result_bitmask.release())
: rmm::device_buffer{},
null_count);
}

struct valid_or_zero {
Expand Down Expand Up @@ -513,18 +513,17 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
num_rows,
string_generator{chars.data(), engine});
auto [result_bitmask, null_count] =
cudf::detail::valid_if(null_mask.begin(),
null_mask.end() - 1,
cuda::std::identity{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
cudf::bools_to_mask(cudf::device_span<bool const>(null_mask.data(), num_rows),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());

return cudf::make_strings_column(
num_rows,
std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
chars.release(),
null_count,
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
profile.get_null_frequency().has_value() ? std::move(*result_bitmask.release())
: rmm::device_buffer{});
}

/**
Expand Down Expand Up @@ -637,13 +636,12 @@ std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profi
auto [null_mask, null_count] = [&]() {
if (profile.get_null_frequency().has_value()) {
auto valids = valid_dist(engine, num_rows);
return cudf::detail::valid_if(valids.begin(),
valids.end(),
cuda::std::identity{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
return cudf::bools_to_mask(cudf::device_span<bool const>(valids),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
}
return std::pair<rmm::device_buffer, cudf::size_type>{};
return std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type>{
std::make_unique<rmm::device_buffer>(), cudf::size_type{0}};
}();

// Adopt remaining children as evenly as possible
Expand All @@ -658,7 +656,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profi
current_child += children_to_adopt.size();

*current_parent = cudf::make_structs_column(
num_rows, std::move(children_to_adopt), null_count, std::move(null_mask));
num_rows, std::move(children_to_adopt), null_count, std::move(*null_mask.release()));
}

if (lvl == 1) {
Expand Down Expand Up @@ -727,18 +725,16 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
rmm::device_buffer{},
0);

auto [null_mask, null_count] =
cudf::detail::valid_if(valids.begin(),
valids.end(),
cuda::std::identity{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
list_column = cudf::make_lists_column(
auto [null_mask, null_count] = cudf::bools_to_mask(cudf::device_span<bool const>(valids),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
list_column = cudf::make_lists_column(
num_rows,
std::move(offsets_column),
std::move(current_child_column),
profile.get_null_frequency().has_value() ? null_count : 0, // cudf::UNKNOWN_NULL_COUNT,
profile.get_null_frequency().has_value() ? std::move(null_mask) : rmm::device_buffer{});
profile.get_null_frequency().has_value() ? std::move(*null_mask.release())
: rmm::device_buffer{});
}
return list_column; // return the top-level column
}
Expand Down Expand Up @@ -848,11 +844,13 @@ std::pair<rmm::device_buffer, cudf::size_type> create_random_null_mask(
} else if (*null_probability == 1.0) {
return {cudf::create_null_mask(size, cudf::mask_state::ALL_NULL), size};
} else {
return cudf::detail::valid_if(thrust::make_counting_iterator<cudf::size_type>(0),
thrust::make_counting_iterator<cudf::size_type>(size),
bool_generator{seed, 1.0 - *null_probability},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
rmm::device_uvector<bool> valids(size, cudf::get_default_stream());
thrust::tabulate(
thrust::device, valids.begin(), valids.end(), bool_generator{seed, 1.0 - *null_probability});
auto [mask, null_count] = cudf::bools_to_mask(cudf::device_span<bool const>(valids),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource_ref());
return {std::move(*mask.release()), null_count};
}
}

Expand Down
17 changes: 12 additions & 5 deletions src/main/cpp/src/from_json_to_raw_map.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/cuda_memcpy.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/io/detail/json.hpp>
#include <cudf/io/detail/tokenize_json.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/transform.hpp>
#include <cudf/utilities/memory_resource.hpp>
#include <cudf/utilities/span.hpp>

Expand Down Expand Up @@ -770,10 +770,17 @@ std::pair<rmm::device_buffer, cudf::size_type> create_null_mask(
});
}

auto const valid_it = should_be_nullified->view().begin<bool>();
auto [null_mask, null_count] = cudf::detail::valid_if(
valid_it, valid_it + should_be_nullified->size(), thrust::logical_not<bool>{}, stream, mr);
return {null_count > 0 ? std::move(null_mask) : rmm::device_buffer{0, stream, mr}, null_count};
rmm::device_uvector<bool> valids(should_be_nullified->size(), stream);
auto const nullify_it = should_be_nullified->view().begin<bool>();
thrust::transform(rmm::exec_policy_nosync(stream),
nullify_it,
nullify_it + should_be_nullified->size(),
valids.begin(),
thrust::logical_not<bool>{});
auto [null_mask, null_count] =
cudf::bools_to_mask(cudf::device_span<bool const>(valids), stream, mr);
return {null_count > 0 ? std::move(*null_mask.release()) : rmm::device_buffer{0, stream, mr},
null_count};
}

} // namespace
Expand Down
14 changes: 11 additions & 3 deletions src/main/cpp/src/get_json_object.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
Expand Down Expand Up @@ -1093,8 +1093,16 @@ std::vector<std::unique_ptr<cudf::column>> get_json_object_batch(
if (h_error_check[idx]) {
oob_indices.emplace_back(idx);

out_null_masks_and_null_counts.emplace_back(
cudf::detail::valid_if(out_sview.begin(), out_sview.end(), validator, stream, mr));
auto [mask, null_count] = [&] {
rmm::device_uvector<bool> valids(out_sview.size(), stream);
thrust::transform(rmm::exec_policy_nosync(stream),
out_sview.begin(),
out_sview.end(),
valids.begin(),
validator);
return cudf::bools_to_mask(cudf::device_span<bool const>(valids), stream, mr);
}();
out_null_masks_and_null_counts.emplace_back(std::move(*mask.release()), null_count);

// The string sizes computed in the previous kernel call will be used to allocate a new char
// buffer to store the output.
Expand Down
2 changes: 1 addition & 1 deletion src/main/cpp/src/histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ std::unique_ptr<cudf::column> create_histogram_if_valid(cudf::column_view const&
if (frequencies[idx] < 0) { *check_invalid = 1; }
if (frequencies[idx] == 0) { *check_zero = 1; }

check_valid[idx] = static_cast<int8_t>(frequencies[idx] > 0);
check_valid[idx] = frequencies[idx] > 0;
});

auto const h_checks = cudf::detail::make_std_vector(check_invalid_and_zero, stream);
Expand Down