class DuckDB::TableFunction
The DuckDB::TableFunction encapsulates a DuckDB table function.
NOTE: DuckDB::TableFunction is experimental now.
require 'duckdb' db = DuckDB::Database.new conn = db.connect # Low-level API: tf = DuckDB::TableFunction.new tf.name = 'my_function' tf.add_parameter(DuckDB::LogicalType::BIGINT) tf.bind do |bind_info| bind_info.add_result_column('value', DuckDB::LogicalType::BIGINT) end tf.execute do |func_info, output| # Fill output data... 0 # Return 0 to signal done end conn.register_table_function(tf) # High-level API (recommended): tf = DuckDB::TableFunction.create( name: 'my_function', parameters: [DuckDB::LogicalType::BIGINT], columns: { 'value' => DuckDB::LogicalType::BIGINT } ) do |func_info, output| # Fill output data... 0 # Return row count (0 when done) end
Public Class Methods
Source
# File lib/duckdb/table_function.rb, line 174 def add_table_adapter(klass, adapter) @table_adapters[klass] = adapter end
Registers a table adapter for a Ruby class.
The adapter is used by +DuckDB::Connection#expose_as_table+ to convert instances of klass into a DuckDB table function. The adapter must respond to +call(object, name, columns: nil)+ and return a DuckDB::TableFunction.
Implementing a Table Adapter¶ ↑
An adapter is any object that responds to +call(object, name, columns: nil)+. The columns: keyword argument allows callers to override the column schema; the adapter should fall back to its own schema detection when it is nil.
The execute block passed to DuckDB::TableFunction.create must:
-
Write one batch of rows into
outputper call -
Return the number of rows written as an
Integer -
Return
0to signal that all data has been exhausted
@example Minimal adapter for CSV objects
class CSVTableAdapter def call(csv, name, columns: nil) columns ||= infer_columns(csv) DuckDB::TableFunction.create(name:, columns:) do |_func_info, output| row = csv.readline if row row.each_with_index { |cell, i| output.set_value(i, 0, cell[1]) } 1 # wrote one row else csv.rewind 0 # signal end of data end end end private def infer_columns(csv) headers = csv.first.headers csv.rewind headers.each_with_object({}) { |h, hsh| hsh[h] = DuckDB::LogicalType::VARCHAR } end end # Register and use: DuckDB::TableFunction.add_table_adapter(CSV, CSVTableAdapter.new) con.execute('SET threads=1') con.expose_as_table(csv, 'csv_table') con.query('SELECT * FROM csv_table()').to_a
@param klass [Class] the Ruby class to register an adapter for (e.g. CSV) @param adapter [#call] the adapter object @return [void]
Source
# File lib/duckdb/table_function.rb, line 79 def create(name:, columns:, parameters: nil, &) raise ArgumentError, 'name is required' unless name raise ArgumentError, 'columns are required' unless columns raise ArgumentError, 'block is required' unless block_given? tf = new tf.name = name # Add parameters (positional or named) if parameters case parameters when Array parameters.each { |type| tf.add_parameter(type) } when Hash parameters.each { |param_name, type| tf.add_named_parameter(param_name, type) } else raise ArgumentError, 'parameters must be Array or Hash' end end # Set bind callback to add result columns tf.bind do |bind_info| columns.each do |col_name, col_type| bind_info.add_result_column(col_name, col_type) end end # Set init callback (required by DuckDB) tf.init do |_init_info| # No-op end # Set execute callback - user's block returns row count tf.execute do |func_info, output| size = yield(func_info, output) output.size = Integer(size) end tf end
Creates a new table function with a declarative API.
@param name [String] The name of the table function @param parameters [Array<LogicalType>, Hash<String, LogicalType>] Function parameters (optional) @param columns [Hash<String, LogicalType>] Output columns (required) @yield [func_info, output] The execute block that generates data @yieldparam func_info [FunctionInfo] Function execution context @yieldparam output [DataChunk] Output data chunk to fill @yieldreturn [Integer] Number of rows generated (0 when done) @return [TableFunction] The configured table function
@example Simple range function
tf = TableFunction.create( name: 'my_range', parameters: [LogicalType::BIGINT], columns: { 'value' => LogicalType::BIGINT } ) do |func_info, output| # Generate data... 0 # Signal done end
@example Function that returns data
tf = TableFunction.create( name: 'my_function', columns: { 'value' => LogicalType::BIGINT } ) do |func_info, output| vec = output.get_vector(0) # Fill vector... 3 # Return row count end
rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
Source
static VALUE duckdb_table_function_initialize(VALUE self) {
rubyDuckDBTableFunction *ctx;
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
ctx->table_function = duckdb_create_table_function();
if (!ctx->table_function) {
rb_raise(eDuckDBError, "Failed to create table function");
}
ctx->bind_proc = Qnil;
ctx->init_proc = Qnil;
ctx->execute_proc = Qnil;
// Set extra_info to the C struct pointer (safe with GC compaction)
// Store ctx instead of self - ctx is xmalloc'd and won't move during GC
duckdb_table_function_set_extra_info(ctx->table_function, ctx, NULL);
return self;
}
Creates a new table function.
tf = DuckDB::TableFunction.new tf.name = "my_function" # ... configure tf ...
Source
# File lib/duckdb/table_function.rb, line 186 def table_adapter_for(klass) @table_adapters[klass] end
Returns the table adapter registered for the given class, or nil if none.
@param klass [Class] the Ruby class to look up @return [#call, nil] the registered adapter, or nil if not found
@example
adapter = DuckDB::TableFunction.table_adapter_for(CSV)
Public Instance Methods
Source
static VALUE rbduckdb_table_function_add_named_parameter(VALUE self, VALUE name, VALUE logical_type) {
rubyDuckDBTableFunction *ctx;
rubyDuckDBLogicalType *ctx_logical_type;
const char *param_name;
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
if (!ctx->table_function) {
rb_raise(eDuckDBError, "Table function is destroyed");
}
param_name = StringValueCStr(name);
ctx_logical_type = get_struct_logical_type(logical_type);
duckdb_table_function_add_named_parameter(ctx->table_function, param_name, ctx_logical_type->logical_type);
return self;
}
Adds a named parameter to the table function.
tf.add_named_parameter("limit", DuckDB::LogicalType::BIGINT)
Source
static VALUE rbduckdb_table_function_add_parameter(VALUE self, VALUE logical_type) {
rubyDuckDBTableFunction *ctx;
rubyDuckDBLogicalType *ctx_logical_type;
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
if (!ctx->table_function) {
rb_raise(eDuckDBError, "Table function is destroyed");
}
ctx_logical_type = get_struct_logical_type(logical_type);
duckdb_table_function_add_parameter(ctx->table_function, ctx_logical_type->logical_type);
return self;
}
Adds a positional parameter to the table function.
tf.add_parameter(DuckDB::LogicalType::BIGINT) tf.add_parameter(DuckDB::LogicalType::VARCHAR)
Source
static VALUE rbduckdb_table_function_set_bind(VALUE self) {
rubyDuckDBTableFunction *ctx;
if (!rb_block_given_p()) {
rb_raise(rb_eArgError, "block is required");
}
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
if (!ctx->table_function) {
rb_raise(eDuckDBError, "Table function is destroyed");
}
ctx->bind_proc = rb_block_proc();
duckdb_table_function_set_bind(ctx->table_function, table_function_bind_callback);
return self;
}
Sets the bind callback for the table function. The callback is called when the function is used in a query.
table_function.bind do |bind_info| bind_info.add_result_column('id', DuckDB::LogicalType::BIGINT) bind_info.add_result_column('name', DuckDB::LogicalType::VARCHAR) end
Source
static VALUE rbduckdb_table_function_set_execute(VALUE self) {
rubyDuckDBTableFunction *ctx;
if (!rb_block_given_p()) {
rb_raise(rb_eArgError, "block is required for execute");
}
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
ctx->execute_proc = rb_block_proc();
duckdb_table_function_set_function(ctx->table_function, table_function_execute_callback);
return self;
}
Sets the execute callback for the table function. The callback is invoked during query execution to generate output rows.
table_function.execute do |func_info, output| output.size = 10 vec = output.get_vector(0) # Write data... end
Source
static VALUE rbduckdb_table_function_set_init(VALUE self) {
rubyDuckDBTableFunction *ctx;
if (!rb_block_given_p()) {
rb_raise(rb_eArgError, "block is required for init");
}
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
if (!ctx->table_function) {
rb_raise(eDuckDBError, "Table function is destroyed");
}
ctx->init_proc = rb_block_proc();
duckdb_table_function_set_init(ctx->table_function, table_function_init_callback);
return self;
}
Sets the init callback for the table function. The callback is invoked once during query initialization to set up execution state.
table_function.init do |init_info| # Initialize execution state end
Source
static VALUE rbduckdb_table_function_set_name(VALUE self, VALUE name) {
rubyDuckDBTableFunction *ctx;
const char *func_name;
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
if (!ctx->table_function) {
rb_raise(eDuckDBError, "Table function is destroyed");
}
func_name = StringValueCStr(name);
duckdb_table_function_set_name(ctx->table_function, func_name);
return name;
}
Sets the name of the table function.
tf.name = "my_function"