class DuckDB::DataChunk
The DuckDB::DataChunk represents a chunk of data for table function output.
During table function execution, data chunks are used to return rows.
Example:
done = false table_function.init { |_init_info| done = false } table_function.execute do |func_info, output| if done output.size = 0 # Signal completion else # High-level API output.set_value(0, 0, 42) # column 0, row 0, value 42 output.set_value(1, 0, 'Alice') # column 1, row 0, value 'Alice' output.size = 1 done = true end end
Public Class Methods
Source
static VALUE data_chunk_initialize(int argc, VALUE *argv, VALUE self) {
rubyDuckDBDataChunk *ctx;
VALUE logical_types;
idx_t column_count;
duckdb_logical_type *types;
long i;
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
rb_scan_args(argc, argv, "01", &logical_types);
if (NIL_P(logical_types)) {
return self;
}
Check_Type(logical_types, T_ARRAY);
if (ctx->owned && ctx->data_chunk) {
duckdb_destroy_data_chunk(&(ctx->data_chunk));
ctx->owned = false;
}
column_count = (idx_t)RARRAY_LEN(logical_types);
types = ALLOC_N(duckdb_logical_type, column_count);
for (i = 0; i < RARRAY_LEN(logical_types); i++) {
VALUE logical_type = rb_ary_entry(logical_types, i);
rubyDuckDBLogicalType *logical_type_ctx = rbduckdb_get_struct_logical_type(logical_type);
types[i] = logical_type_ctx->logical_type;
}
ctx->data_chunk = duckdb_create_data_chunk(types, column_count);
xfree(types);
if (!ctx->data_chunk) {
rb_raise(eDuckDBError, "Failed to create data chunk");
}
ctx->owned = true;
return self;
}
Public Instance Methods
Source
static VALUE data_chunk_column_count(VALUE self) {
rubyDuckDBDataChunk *ctx;
idx_t count;
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
count = duckdb_data_chunk_get_column_count(ctx->data_chunk);
return ULL2NUM(count);
}
Returns the number of columns in the data chunk.
data_chunk.column_count # => 2
Source
static VALUE data_chunk_get_vector(VALUE self, VALUE col_idx) {
rubyDuckDBDataChunk *ctx;
idx_t idx;
duckdb_vector vector;
VALUE vector_obj;
rubyDuckDBVector *vector_ctx;
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
idx = NUM2ULL(col_idx);
vector = duckdb_data_chunk_get_vector(ctx->data_chunk, idx);
// Create Vector wrapper
vector_obj = rb_class_new_instance(0, NULL, cDuckDBVector);
vector_ctx = rbduckdb_get_struct_vector(vector_obj);
vector_ctx->vector = vector;
return vector_obj;
}
Gets the vector at the specified column index.
vector = data_chunk.get_vector(0)
Source
# File lib/duckdb/data_chunk.rb, line 104 def reset _reset # duckdb_data_chunk_reset may invalidate previously returned data pointers, # so drop the cache; vector/type caches remain valid across resets. @data_cache = nil self end
Resets the data chunk so it can be reused for another batch of rows.
@return [DuckDB::DataChunk] self
Source
# File lib/duckdb/data_chunk.rb, line 48 def set_value(col_idx, row_idx, value) vector = cached_vector(col_idx) type_id = cached_type_id(col_idx, vector) # Handle NULL if value.nil? vector.set_validity(row_idx, false) return value end case type_id when :boolean MemoryHelper.write_boolean(cached_data(col_idx, vector), row_idx, value) when :tinyint MemoryHelper.write_tinyint(cached_data(col_idx, vector), row_idx, value) when :smallint MemoryHelper.write_smallint(cached_data(col_idx, vector), row_idx, value) when :integer MemoryHelper.write_integer(cached_data(col_idx, vector), row_idx, value) when :bigint MemoryHelper.write_bigint(cached_data(col_idx, vector), row_idx, value) when :utinyint MemoryHelper.write_utinyint(cached_data(col_idx, vector), row_idx, value) when :usmallint MemoryHelper.write_usmallint(cached_data(col_idx, vector), row_idx, value) when :uinteger MemoryHelper.write_uinteger(cached_data(col_idx, vector), row_idx, value) when :ubigint MemoryHelper.write_ubigint(cached_data(col_idx, vector), row_idx, value) when :float MemoryHelper.write_float(cached_data(col_idx, vector), row_idx, value) when :double MemoryHelper.write_double(cached_data(col_idx, vector), row_idx, value) when :varchar vector.assign_string_element(row_idx, value.to_s) when :blob vector.assign_string_element_len(row_idx, value.to_s) when :timestamp MemoryHelper.write_timestamp(cached_data(col_idx, vector), row_idx, value) when :timestamp_tz MemoryHelper.write_timestamp_tz(cached_data(col_idx, vector), row_idx, value) when :date MemoryHelper.write_date(cached_data(col_idx, vector), row_idx, value) else raise ArgumentError, "Unsupported type for DataChunk#set_value: #{type_id} for value `#{value.inspect}`" end value end
Sets a value at the specified column and row index. Type conversion is automatic based on the column’s logical type.
@param col_idx [Integer] Column index (0-based) @param row_idx [Integer] Row index (0-based) @param value [Object] Value to set (Integer, Float, String, Time, Date, nil) @return [Object] The value that was set
@example Set integer value
output.set_value(0, 0, 42)
@example Set string value
output.set_value(1, 0, 'hello')
@example Set NULL value
output.set_value(0, 1, nil)
rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
Source
static VALUE data_chunk_size(VALUE self) {
rubyDuckDBDataChunk *ctx;
idx_t size;
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
size = duckdb_data_chunk_get_size(ctx->data_chunk);
return ULL2NUM(size);
}
Returns the current number of tuples in the data chunk.
data_chunk.size # => 100
Source
static VALUE data_chunk_set_size(VALUE self, VALUE size) {
rubyDuckDBDataChunk *ctx;
idx_t sz;
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
sz = NUM2ULL(size);
duckdb_data_chunk_set_size(ctx->data_chunk, sz);
return size;
}
Sets the number of tuples in the data chunk.
data_chunk.size = 50