diff --git a/be/src/core/data_type_serde/data_type_quantilestate_serde.h b/be/src/core/data_type_serde/data_type_quantilestate_serde.h index 730a7a64a78726..32a089a58acae6 100644 --- a/be/src/core/data_type_serde/data_type_quantilestate_serde.h +++ b/be/src/core/data_type_serde/data_type_quantilestate_serde.h @@ -64,15 +64,17 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe { } Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const override { - return Status::NotSupported("deserialize_one_cell_from_text with type " + - column.get_name()); + auto& data_column = assert_cast(column); + QuantileState quantile_state(slice); + data_column.insert_value(std::move(quantile_state)); + return Status::OK(); } Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const override { - return Status::NotSupported("deserialize_column_from_text_vector with type " + - column.get_name()); + DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); + return Status::OK(); } Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start, diff --git a/be/src/exec/connector/jni_connector.h b/be/src/exec/connector/jni_connector.h deleted file mode 100644 index 40549963cfd080..00000000000000 --- a/be/src/exec/connector/jni_connector.h +++ /dev/null @@ -1,482 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "core/data_type/data_type.h" -#include "core/data_type/define_primitive_type.h" -#include "core/data_type/primitive_type.h" -#include "core/string_ref.h" -#include "exprs/aggregate/aggregate_function.h" -#include "runtime/runtime_profile.h" -#include "storage/olap_scan_common.h" -#include "storage/olap_utils.h" -#include "util/jni-util.h" -#include "util/profile_collector.h" -#include "util/string_util.h" - -namespace doris { -#include "common/compile_check_begin.h" -class RuntimeState; - -class Block; -template -class ColumnDecimal; -template -class ColumnVector; -} // namespace doris - -namespace doris { - -/** - * Connector to java jni scanner, which should extend org.apache.doris.common.jni.JniScanner - */ -class JniConnector : public ProfileCollector { -public: - class TableMetaAddress { - private: - long* _meta_ptr; - int _meta_index; - - public: - TableMetaAddress() { - _meta_ptr = nullptr; - _meta_index = 0; - } - - TableMetaAddress(long meta_addr) { - _meta_ptr = static_cast(reinterpret_cast(meta_addr)); - _meta_index = 0; - } - - void set_meta(long meta_addr) { - _meta_ptr = static_cast(reinterpret_cast(meta_addr)); - _meta_index = 0; - } - - long next_meta_as_long() { return _meta_ptr[_meta_index++]; } - - void* next_meta_as_ptr() { return reinterpret_cast(_meta_ptr[_meta_index++]); } - }; - - /** - * The predicates that can be pushed down to java side. - * Reference to java class org.apache.doris.common.jni.vec.ScanPredicate - */ - template - struct ScanPredicate { - ScanPredicate() = default; - ~ScanPredicate() = default; - std::string column_name; - SQLFilterOp op; - std::vector values; - int scale; - - ScanPredicate(const std::string column_name) : column_name(std::move(column_name)) {} - - ScanPredicate(const ScanPredicate& other) - : column_name(other.column_name), op(other.op), scale(other.scale) { - for (auto v : other.values) { - values.emplace_back(v); - } - } - - int length() { - // name_length(4) + column_name + operator(4) + scale(4) + num_values(4) - int len = 4 + static_cast(column_name.size()) + 4 + 4 + 4; - if constexpr (std::is_same_v) { - for (const StringRef* s : values) { - // string_length(4) + string - len += static_cast(4 + s->size); - } - } else { - int type_len = sizeof(CppType); - // value_length(4) + value - len += static_cast((4 + type_len) * values.size()); - } - return len; - } - - /** - * The value ranges can be stored as byte array as following format: - * number_filters(4) | length(4) | column_name | op(4) | scale(4) | num_values(4) | value_length(4) | value | ... - * The read method is implemented in org.apache.doris.common.jni.vec.ScanPredicate#parseScanPredicates - */ - int write(std::unique_ptr& predicates, int origin_length) { - int num_filters = 0; - if (origin_length != 0) { - num_filters = *reinterpret_cast(predicates.get()); - } else { - origin_length = 4; - } - num_filters += 1; - int new_length = origin_length + length(); - char* new_bytes = new char[new_length]; - if (origin_length != 4) { - memcpy(new_bytes, predicates.get(), origin_length); - } - *reinterpret_cast(new_bytes) = num_filters; - - char* char_ptr = new_bytes + origin_length; - *reinterpret_cast(char_ptr) = static_cast(column_name.size()); - char_ptr += 4; - memcpy(char_ptr, column_name.data(), column_name.size()); - char_ptr += static_cast(column_name.size()); - *reinterpret_cast(char_ptr) = op; - char_ptr += 4; - *reinterpret_cast(char_ptr) = scale; - char_ptr += 4; - *reinterpret_cast(char_ptr) = static_cast(values.size()); - char_ptr += 4; - if constexpr (std::is_same_v) { - for (const StringRef* s : values) { - *reinterpret_cast(char_ptr) = static_cast(s->size); - char_ptr += 4; - memcpy(char_ptr, s->data, s->size); - char_ptr += static_cast(s->size); - } - } else { - // FIXME: it can not handle decimal type correctly. - // but this logic is deprecated and not used. - // so may be deleted or fixed later. - for (const CppType* v : values) { - int type_len = sizeof(CppType); - *reinterpret_cast(char_ptr) = type_len; - char_ptr += 4; - *reinterpret_cast(char_ptr) = *v; - char_ptr += type_len; - } - } - - predicates.reset(new_bytes); - return new_length; - } - }; - - /** - * Use configuration map to provide scan information. The java side should determine how the parameters - * are parsed. For example, using "required_fields=col0,col1,...,colN" to provide the scan fields. - * @param connector_class Java scanner class - * @param scanner_params Provided configuration map - * @param column_names Fields to read, also the required_fields in scanner_params - */ - JniConnector(std::string connector_class, std::map scanner_params, - std::vector column_names, int64_t self_split_weight = -1) - : _connector_class(std::move(connector_class)), - _scanner_params(std::move(scanner_params)), - _column_names(std::move(column_names)), - _self_split_weight(static_cast(self_split_weight)) { - // Use java class name as connector name - _connector_name = split(_connector_class, "/").back(); - } - - /** - * Just use to get the table schema. - * @param connector_class Java scanner class - * @param scanner_params Provided configuration map - */ - JniConnector(std::string connector_class, std::map scanner_params) - : _connector_class(std::move(connector_class)), - _scanner_params(std::move(scanner_params)) { - _is_table_schema = true; - } - - ~JniConnector() override = default; - - /** - * Open java scanner, and get the following scanner methods by jni: - * 1. getNextBatchMeta: read next batch and return the address of meta information - * 2. close: close java scanner, and release jni resources - * 3. releaseColumn: release a single column - * 4. releaseTable: release current batch, which will also release columns and meta information - */ - Status open(RuntimeState* state, RuntimeProfile* profile); - - /** - * Should call before open, parse the pushed down filters. The value ranges can be stored as byte array in heap: - * number_filters(4) | length(4) | column_name | op(4) | scale(4) | num_values(4) | value_length(4) | value | ... - * Then, pass the byte array address in configuration map, like "push_down_predicates=${address}" - */ - Status init(); - - /** - * Call java side function JniScanner.getNextBatchMeta. The columns information are stored as long array: - * | number of rows | - * | null indicator start address of fixed length column-A | - * | data column start address of the fixed length column-A | - * | ... | - * | null indicator start address of variable length column-B | - * | offset column start address of the variable length column-B | - * | data column start address of the variable length column-B | - * | ... | - */ - Status get_next_block(Block* block, size_t* read_rows, bool* eof); - - /** - * Get performance metrics from java scanner - */ - Status get_statistics(JNIEnv* env, std::map* result); - - /** - * Call java side function JniScanner.getTableSchema. - * - * The schema information are stored as json format - */ - Status get_table_schema(std::string& table_schema_str); - - /** - * Close scanner and release jni resources. - */ - Status close(); - - /** - * Set column name to block index map from FileScanner to avoid repeated map creation. - */ - void set_col_name_to_block_idx( - const std::unordered_map* col_name_to_block_idx) { - _col_name_to_block_idx = col_name_to_block_idx; - } - - static std::string get_jni_type(const DataTypePtr& data_type); - static std::string get_jni_type_with_different_string(const DataTypePtr& data_type); - - static Status to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments, - std::unique_ptr& meta); - - static Status to_java_table(Block* block, std::unique_ptr& meta); - - static std::pair parse_table_schema(Block* block, - const ColumnNumbers& arguments, - bool ignore_column_name = true); - - static std::pair parse_table_schema(Block* block); - - static Status fill_block(Block* block, const ColumnNumbers& arguments, long table_address); - -protected: - void _collect_profile_before_close() override; - -private: - std::string _connector_name; - std::string _connector_class; - std::map _scanner_params; - std::vector _column_names; - int32_t _self_split_weight; - bool _is_table_schema = false; - - RuntimeState* _state = nullptr; - RuntimeProfile* _profile = nullptr; - RuntimeProfile::Counter* _open_scanner_time = nullptr; - RuntimeProfile::Counter* _java_scan_time = nullptr; - RuntimeProfile::Counter* _java_append_data_time = nullptr; - RuntimeProfile::Counter* _java_create_vector_table_time = nullptr; - RuntimeProfile::Counter* _fill_block_time = nullptr; - std::map _scanner_profile; - RuntimeProfile::ConditionCounter* _max_time_split_weight_counter = nullptr; - - int64_t _jni_scanner_open_watcher = 0; - int64_t _java_scan_watcher = 0; - int64_t _fill_block_watcher = 0; - - size_t _has_read = 0; - - bool _closed = false; - bool _scanner_opened = false; - - Jni::GlobalClass _jni_scanner_cls; - Jni::GlobalObject _jni_scanner_obj; - Jni::MethodId _jni_scanner_open; - Jni::MethodId _jni_scanner_get_append_data_time; - Jni::MethodId _jni_scanner_get_create_vector_table_time; - Jni::MethodId _jni_scanner_get_next_batch; - Jni::MethodId _jni_scanner_get_table_schema; - Jni::MethodId _jni_scanner_close; - Jni::MethodId _jni_scanner_release_column; - Jni::MethodId _jni_scanner_release_table; - Jni::MethodId _jni_scanner_get_statistics; - - TableMetaAddress _table_meta; - - int _predicates_length = 0; - std::unique_ptr _predicates; - - // Column name to block index map, passed from FileScanner to avoid repeated map creation - const std::unordered_map* _col_name_to_block_idx = nullptr; - - /** - * Set the address of meta information, which is returned by org.apache.doris.common.jni.JniScanner#getNextBatchMeta - */ - void _set_meta(long meta_addr) { _table_meta.set_meta(meta_addr); } - - Status _init_jni_scanner(JNIEnv* env, int batch_size); - - Status _fill_block(Block* block, size_t num_rows); - - static Status _fill_column(TableMetaAddress& address, ColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows); - - static Status _fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - size_t num_rows); - - static Status _fill_varbinary_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - size_t num_rows); - - static Status _fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows); - - static Status _fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows); - - static Status _fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows); - - static Status _fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, - std::vector& meta_data); - - template - requires(!std::is_same_v && - !std::is_same_v && - !std::is_same_v && - !std::is_same_v && - !std::is_same_v && - !std::is_same_v) - static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, - size_t num_rows) { - auto& column_data = assert_cast(*doris_column).get_data(); - size_t origin_size = column_data.size(); - column_data.resize(origin_size + num_rows); - memcpy(column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); - return Status::OK(); - } - - template - requires(std::is_same_v || - std::is_same_v) - static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, - size_t num_rows) { - auto& column_data = assert_cast(*doris_column).get_data(); - size_t origin_size = column_data.size(); - column_data.resize(origin_size + num_rows); - memcpy((int64_t*)column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); - return Status::OK(); - } - - template - requires(std::is_same_v) - static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, - size_t num_rows) { - auto& column_data = assert_cast(*doris_column).get_data(); - size_t origin_size = column_data.size(); - column_data.resize(origin_size + num_rows); - memcpy((uint32_t*)column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); - return Status::OK(); - } - - template - requires(std::is_same_v || - std::is_same_v) - static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, - size_t num_rows) { - auto& column_data = assert_cast(*doris_column).get_data(); - size_t origin_size = column_data.size(); - column_data.resize(origin_size + num_rows); - memcpy((uint64_t*)column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); - return Status::OK(); - } - - template - requires(std::is_same_v) - static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, - size_t num_rows) { - auto& column_data = assert_cast(*doris_column).get_data(); - size_t origin_size = column_data.size(); - column_data.resize(origin_size + num_rows); - for (size_t i = 0; i < num_rows; i++) { - column_data[origin_size + i] = DecimalV2Value(ptr[i]); - } - return Status::OK(); - } - - template - static long _get_fixed_length_column_address(const IColumn& doris_column) { - return (long)assert_cast(doris_column).get_data().data(); - } - - template - void _parse_value_range(const ColumnValueRange& col_val_range, - const std::string& column_name) { - using CppType = std::conditional_t::CppType>; - - if (col_val_range.is_fixed_value_range()) { - ScanPredicate in_predicate(column_name); - in_predicate.op = SQLFilterOp::FILTER_IN; - in_predicate.scale = col_val_range.scale(); - for (const auto& value : col_val_range.get_fixed_value_set()) { - in_predicate.values.emplace_back(&value); - } - if (!in_predicate.values.empty()) { - _predicates_length = in_predicate.write(_predicates, _predicates_length); - } - return; - } - - const CppType high_value = col_val_range.get_range_max_value(); - const CppType low_value = col_val_range.get_range_min_value(); - const SQLFilterOp high_op = col_val_range.get_range_high_op(); - const SQLFilterOp low_op = col_val_range.get_range_low_op(); - - // orc can only push down is_null. When col_value_range._contain_null = true, only indicating that - // value can be null, not equals null, so ignore _contain_null in col_value_range - if (col_val_range.is_high_value_maximum() && high_op == SQLFilterOp::FILTER_LESS_OR_EQUAL && - col_val_range.is_low_value_minimum() && low_op == SQLFilterOp::FILTER_LARGER_OR_EQUAL) { - return; - } - - if (low_value < high_value) { - if (!col_val_range.is_low_value_minimum() || - SQLFilterOp::FILTER_LARGER_OR_EQUAL != low_op) { - ScanPredicate low_predicate(column_name); - low_predicate.scale = col_val_range.scale(); - low_predicate.op = low_op; - low_predicate.values.emplace_back(col_val_range.get_range_min_value_ptr()); - _predicates_length = low_predicate.write(_predicates, _predicates_length); - } - if (!col_val_range.is_high_value_maximum() || - SQLFilterOp::FILTER_LESS_OR_EQUAL != high_op) { - ScanPredicate high_predicate(column_name); - high_predicate.scale = col_val_range.scale(); - high_predicate.op = high_op; - high_predicate.values.emplace_back(col_val_range.get_range_max_value_ptr()); - _predicates_length = high_predicate.write(_predicates, _predicates_length); - } - } - } -}; -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/exec/connector/vjdbc_connector.cpp b/be/src/exec/connector/vjdbc_connector.cpp deleted file mode 100644 index 0d9c95c3d33675..00000000000000 --- a/be/src/exec/connector/vjdbc_connector.cpp +++ /dev/null @@ -1,687 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/connector/vjdbc_connector.h" - -#include - -#include -// IWYU pragma: no_include -#include // IWYU pragma: keep -#include -#include -#include - -#include "absl/strings/substitute.h" -#include "cloud/config.h" -#include "common/logging.h" -#include "common/status.h" -#include "core/block/block.h" -#include "core/column/column_nullable.h" -#include "core/data_type/data_type_nullable.h" -#include "core/data_type/data_type_string.h" -#include "exec/connector/jni_connector.h" -#include "exec/table_connector.h" -#include "exprs/function/simple_function_factory.h" -#include "exprs/vexpr.h" -#include "jni.h" -#include "runtime/descriptors.h" -#include "runtime/plugin/cloud_plugin_downloader.h" -#include "runtime/runtime_profile.h" -#include "runtime/runtime_state.h" -#include "runtime/user_function_cache.h" -#include "util/jni-util.h" - -namespace doris { -#include "common/compile_check_begin.h" -const char* JDBC_EXECUTOR_FACTORY_CLASS = "org/apache/doris/jdbc/JdbcExecutorFactory"; -const char* JDBC_EXECUTOR_CTOR_SIGNATURE = "([B)V"; -const char* JDBC_EXECUTOR_STMT_WRITE_SIGNATURE = "(Ljava/util/Map;)I"; -const char* JDBC_EXECUTOR_HAS_NEXT_SIGNATURE = "()Z"; -const char* JDBC_EXECUTOR_CLOSE_SIGNATURE = "()V"; -const char* JDBC_EXECUTOR_TRANSACTION_SIGNATURE = "()V"; - -JdbcConnector::JdbcConnector(const JdbcConnectorParam& param) - : TableConnector(param.tuple_desc, param.use_transaction, param.table_name, - param.query_string), - _conn_param(param), - _closed(false) {} - -JdbcConnector::~JdbcConnector() { - if (!_closed) { - static_cast(close()); - } -} - -Status JdbcConnector::close(Status /*unused*/) { - SCOPED_RAW_TIMER(&_jdbc_statistic._connector_close_timer); - if (_closed) { - return Status::OK(); - } - if (!_is_open) { - _closed = true; - return Status::OK(); - } - - // Try to abort transaction and call Java close(), but don't block cleanup - if (_is_in_transaction) { - Status abort_status = abort_trans(); - if (!abort_status.ok()) { - LOG(WARNING) << "Failed to abort transaction: " << abort_status.to_string(); - } - } - - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - RETURN_IF_ERROR( - _executor_obj.call_nonvirtual_void_method(env, _executor_clazz, _executor_close_id) - .call()); - _closed = true; - return Status::OK(); -} - -Status JdbcConnector::open(RuntimeState* state, bool read) { - if (_is_open) { - LOG(INFO) << "this scanner of jdbc already opened"; - return Status::OK(); - } - - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - RETURN_IF_ERROR(Jni::Util::get_jni_scanner_class(env, JDBC_EXECUTOR_FACTORY_CLASS, - &_executor_factory_clazz)); - - RETURN_IF_ERROR(_executor_factory_clazz.get_static_method( - env, "getExecutorClass", "(Lorg/apache/doris/thrift/TOdbcTableType;)Ljava/lang/String;", - &_executor_factory_ctor_id)); - - Jni::LocalObject jtable_type; - RETURN_IF_ERROR(_get_java_table_type(env, _conn_param.table_type, &jtable_type)); - - Jni::LocalString executor_name; - RETURN_IF_ERROR( - _executor_factory_clazz.call_static_object_method(env, _executor_factory_ctor_id) - .with_arg(jtable_type) - .call(&executor_name)); - - Jni::LocalStringBufferGuard executor_name_str; - RETURN_IF_ERROR(executor_name.get_string_chars(env, &executor_name_str)); - - RETURN_IF_ERROR( - Jni::Util::get_jni_scanner_class(env, executor_name_str.get(), &_executor_clazz)); - -#undef GET_BASIC_JAVA_CLAZZ - RETURN_IF_ERROR(_register_func_id(env)); - - std::string driver_path; - RETURN_IF_ERROR(_get_real_url(_conn_param.driver_path, &driver_path)); - - TJdbcExecutorCtorParams ctor_params; - ctor_params.__set_statement(_sql_str); - ctor_params.__set_catalog_id(_conn_param.catalog_id); - ctor_params.__set_jdbc_url(_conn_param.jdbc_url); - ctor_params.__set_jdbc_user(_conn_param.user); - ctor_params.__set_jdbc_password(_conn_param.passwd); - ctor_params.__set_jdbc_driver_class(_conn_param.driver_class); - ctor_params.__set_driver_path(driver_path); - ctor_params.__set_jdbc_driver_checksum(_conn_param.driver_checksum); - if (state == nullptr) { - ctor_params.__set_batch_size(read ? 1 : 0); - } else { - ctor_params.__set_batch_size(read ? state->batch_size() : 0); - } - ctor_params.__set_op(read ? TJdbcOperation::READ : TJdbcOperation::WRITE); - ctor_params.__set_table_type(_conn_param.table_type); - ctor_params.__set_connection_pool_min_size(_conn_param.connection_pool_min_size); - ctor_params.__set_connection_pool_max_size(_conn_param.connection_pool_max_size); - ctor_params.__set_connection_pool_max_wait_time(_conn_param.connection_pool_max_wait_time); - ctor_params.__set_connection_pool_max_life_time(_conn_param.connection_pool_max_life_time); - ctor_params.__set_connection_pool_cache_clear_time( - config::jdbc_connection_pool_cache_clear_time_sec); - ctor_params.__set_connection_pool_keep_alive(_conn_param.connection_pool_keep_alive); - ctor_params.__set_is_tvf(_conn_param.is_tvf); - - Jni::LocalArray ctor_params_bytes; - RETURN_IF_ERROR(Jni::Util::SerializeThriftMsg(env, &ctor_params, &ctor_params_bytes)); - - { - SCOPED_RAW_TIMER(&_jdbc_statistic._init_connector_timer); - RETURN_IF_ERROR(_executor_clazz.new_object(env, _executor_ctor_id) - .with_arg(ctor_params_bytes) - .call(&_executor_obj)); - } - _is_open = true; - RETURN_IF_ERROR(begin_trans()); - - return Status::OK(); -} - -Status JdbcConnector::test_connection() { - RETURN_IF_ERROR(open(nullptr, true)); - - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - return _executor_obj - .call_nonvirtual_void_method(env, _executor_clazz, _executor_test_connection_id) - .call(); -} - -Status JdbcConnector::clean_datasource() { - if (!_is_open) { - return Status::OK(); - } - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - return _executor_obj - .call_nonvirtual_void_method(env, _executor_clazz, _executor_clean_datasource_id) - .call(); -} - -Status JdbcConnector::query() { - if (!_is_open) { - return Status::InternalError("Query before open of JdbcConnector."); - } - // check materialize num equal - auto materialize_num = _tuple_desc->slots().size(); - - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - { - SCOPED_RAW_TIMER(&_jdbc_statistic._execte_read_timer); - - jint colunm_count = 0; - auto st = _executor_obj.call_nonvirtual_int_method(env, _executor_clazz, _executor_read_id) - .call(&colunm_count); - if (!st.ok()) { - return Status::InternalError("GetJniExceptionMsg meet error, query={}, msg={}", - _conn_param.query_string, st.to_string()); - } - if (colunm_count < materialize_num) { - return Status::InternalError( - "JDBC query returned fewer columns ({}) than required ({}).", colunm_count, - materialize_num); - } - } - - LOG(INFO) << "JdbcConnector::query has exec success: " << _sql_str; - return Status::OK(); -} - -Status JdbcConnector::get_next(bool* eos, Block* block, int batch_size) { - SCOPED_RAW_TIMER(&_jdbc_statistic._get_data_timer); // Timer for the entire method - - if (!_is_open) { - return Status::InternalError("get_next before open of jdbc connector."); - } - - JNIEnv* env = nullptr; - { - SCOPED_RAW_TIMER(&_jdbc_statistic._jni_setup_timer); // Timer for setting up JNI environment - RETURN_IF_ERROR(Jni::Env::Get(&env)); - } // _jni_setup_timer stops when going out of this scope - - jboolean has_next = JNI_FALSE; - { - SCOPED_RAW_TIMER(&_jdbc_statistic._has_next_timer); // Timer for hasNext check - - RETURN_IF_ERROR( - _executor_obj - .call_nonvirtual_boolean_method(env, _executor_clazz, _executor_has_next_id) - .call(&has_next)); - } // _has_next_timer stops here - - if (has_next != JNI_TRUE) { - *eos = true; - return Status::OK(); - } - - auto column_size = _tuple_desc->slots().size(); - auto slots = _tuple_desc->slots(); - - Jni::LocalObject map; - { - SCOPED_RAW_TIMER(&_jdbc_statistic._prepare_params_timer); // Timer for preparing params - RETURN_IF_ERROR(_get_reader_params(block, env, column_size, &map)); - } // _prepare_params_timer stops here - - long address = 0; - { - SCOPED_RAW_TIMER( - &_jdbc_statistic - ._read_and_fill_vector_table_timer); // Timer for getBlockAddress call - RETURN_IF_ERROR(_executor_obj.call_long_method(env, _executor_get_block_address_id) - .with_arg(batch_size) - .with_arg(map) - .call(&address)); - } // _get_block_address_timer stops here - - std::vector all_columns; - for (uint32_t i = 0; i < column_size; ++i) { - all_columns.push_back(i); - } - - Status fill_block_status; - { - SCOPED_RAW_TIMER(&_jdbc_statistic._fill_block_timer); // Timer for fill_block - fill_block_status = JniConnector::fill_block(block, all_columns, address); - } // _fill_block_timer stops here - - if (!fill_block_status) { - return fill_block_status; - } - - Status cast_status; - { - SCOPED_RAW_TIMER(&_jdbc_statistic._cast_timer); // Timer for casting process - cast_status = _cast_string_to_special(block, env, column_size); - } // _cast_timer stops here - - return Status::OK(); -} - -Status JdbcConnector::append(Block* block, const VExprContextSPtrs& output_vexpr_ctxs, - uint32_t start_send_row, uint32_t* num_rows_sent, - TOdbcTableType::type table_type) { - RETURN_IF_ERROR(exec_stmt_write(block, output_vexpr_ctxs, num_rows_sent)); - COUNTER_UPDATE(_sent_rows_counter, *num_rows_sent); - return Status::OK(); -} - -Status JdbcConnector::exec_stmt_write(Block* block, const VExprContextSPtrs& output_vexpr_ctxs, - uint32_t* num_rows_sent) { - SCOPED_TIMER(_result_send_timer); - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - // prepare table meta information - std::unique_ptr meta_data; - RETURN_IF_ERROR(JniConnector::to_java_table(block, meta_data)); - long meta_address = (long)meta_data.get(); - auto table_schema = JniConnector::parse_table_schema(block); - - // prepare constructor parameters - std::map write_params = {{"meta_address", std::to_string(meta_address)}, - {"required_fields", table_schema.first}, - {"columns_types", table_schema.second}}; - Jni::LocalObject hashmap_object; - RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, write_params, &hashmap_object)); - - RETURN_IF_ERROR( - _executor_obj.call_nonvirtual_int_method(env, _executor_clazz, _executor_stmt_write_id) - .with_arg(hashmap_object) - .call()); - - *num_rows_sent = static_cast(block->rows()); - return Status::OK(); -} - -Status JdbcConnector::begin_trans() { - if (_use_tranaction) { - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - RETURN_IF_ERROR( - _executor_obj - .call_nonvirtual_void_method(env, _executor_clazz, _executor_begin_trans_id) - .call()); - _is_in_transaction = true; - } - return Status::OK(); -} - -Status JdbcConnector::abort_trans() { - if (!_is_in_transaction) { - return Status::InternalError("Abort transaction before begin trans."); - } - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - RETURN_IF_ERROR( - _executor_obj - .call_nonvirtual_void_method(env, _executor_clazz, _executor_abort_trans_id) - .call()); - return Status::OK(); -} - -Status JdbcConnector::finish_trans() { - if (_use_tranaction && _is_in_transaction) { - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - RETURN_IF_ERROR(_executor_obj - .call_nonvirtual_void_method(env, _executor_clazz, - _executor_finish_trans_id) - .call()); - - _is_in_transaction = false; - } - return Status::OK(); -} - -Status JdbcConnector::_register_func_id(JNIEnv* env) { - RETURN_IF_ERROR(_executor_clazz.get_method(env, "", JDBC_EXECUTOR_CTOR_SIGNATURE, - &_executor_ctor_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "write", JDBC_EXECUTOR_STMT_WRITE_SIGNATURE, - &_executor_stmt_write_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "read", "()I", &_executor_read_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "close", JDBC_EXECUTOR_CLOSE_SIGNATURE, - &_executor_close_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "hasNext", JDBC_EXECUTOR_HAS_NEXT_SIGNATURE, - &_executor_has_next_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "getBlockAddress", "(ILjava/util/Map;)J", - &_executor_get_block_address_id)); - RETURN_IF_ERROR( - _executor_clazz.get_method(env, "getCurBlockRows", "()I", &_executor_block_rows_id)); - - RETURN_IF_ERROR(_executor_clazz.get_method( - env, "openTrans", JDBC_EXECUTOR_TRANSACTION_SIGNATURE, &_executor_begin_trans_id)); - RETURN_IF_ERROR(_executor_clazz.get_method( - env, "commitTrans", JDBC_EXECUTOR_TRANSACTION_SIGNATURE, &_executor_finish_trans_id)); - RETURN_IF_ERROR(_executor_clazz.get_method( - env, "rollbackTrans", JDBC_EXECUTOR_TRANSACTION_SIGNATURE, &_executor_abort_trans_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "testConnection", "()V", - &_executor_test_connection_id)); - RETURN_IF_ERROR(_executor_clazz.get_method(env, "cleanDataSource", "()V", - &_executor_clean_datasource_id)); - - return Status::OK(); -} - -Status JdbcConnector::_get_reader_params(Block* block, JNIEnv* env, size_t column_size, - Jni::LocalObject* ans) { - std::ostringstream columns_nullable; - std::ostringstream columns_replace_string; - std::ostringstream required_fields; - std::ostringstream columns_types; - - for (int i = 0; i < column_size; ++i) { - auto* slot = _tuple_desc->slots()[i]; - auto type = slot->type(); - // Record if column is nullable - columns_nullable << (slot->is_nullable() ? "true" : "false") << ","; - // Check column type and replace accordingly - std::string replace_type = "not_replace"; - if (type->get_primitive_type() == PrimitiveType::TYPE_BITMAP) { - replace_type = "bitmap"; - } else if (type->get_primitive_type() == PrimitiveType::TYPE_HLL) { - replace_type = "hll"; - } else if (type->get_primitive_type() == PrimitiveType::TYPE_JSONB) { - replace_type = "jsonb"; - } - columns_replace_string << replace_type << ","; - if (replace_type != "not_replace") { - block->get_by_position(i).column = std::make_shared() - ->create_column() - ->convert_to_full_column_if_const(); - block->get_by_position(i).type = std::make_shared(); - if (slot->is_nullable()) { - block->get_by_position(i).column = make_nullable(block->get_by_position(i).column); - block->get_by_position(i).type = make_nullable(block->get_by_position(i).type); - } - } - // Record required fields and column types - std::string field = slot->col_name(); - std::string jni_type; - if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_BITMAP || - slot->type()->get_primitive_type() == PrimitiveType::TYPE_HLL || - slot->type()->get_primitive_type() == PrimitiveType::TYPE_JSONB) { - jni_type = "string"; - } else { - jni_type = JniConnector::get_jni_type_with_different_string(slot->type()); - } - required_fields << (i != 0 ? "," : "") << field; - columns_types << (i != 0 ? "#" : "") << jni_type; - } - - std::map reader_params = {{"is_nullable", columns_nullable.str()}, - {"replace_string", columns_replace_string.str()}, - {"required_fields", required_fields.str()}, - {"columns_types", columns_types.str()}}; - return Jni::Util::convert_to_java_map(env, reader_params, ans); -} - -Status JdbcConnector::_cast_string_to_special(Block* block, JNIEnv* env, size_t column_size) { - for (size_t column_index = 0; column_index < column_size; ++column_index) { - auto* slot_desc = _tuple_desc->slots()[column_index]; - jint num_rows = 0; - RETURN_IF_ERROR( - _executor_obj - .call_nonvirtual_int_method(env, _executor_clazz, _executor_block_rows_id) - .call(&num_rows)); - - if (slot_desc->type()->get_primitive_type() == PrimitiveType::TYPE_HLL) { - RETURN_IF_ERROR(_cast_string_to_hll(slot_desc, block, static_cast(column_index), - static_cast(num_rows))); - } else if (slot_desc->type()->get_primitive_type() == PrimitiveType::TYPE_JSONB) { - RETURN_IF_ERROR(_cast_string_to_json(slot_desc, block, static_cast(column_index), - static_cast(num_rows))); - } else if (slot_desc->type()->get_primitive_type() == PrimitiveType::TYPE_BITMAP) { - RETURN_IF_ERROR(_cast_string_to_bitmap(slot_desc, block, static_cast(column_index), - static_cast(num_rows))); - } - } - return Status::OK(); -} - -Status JdbcConnector::_cast_string_to_hll(const SlotDescriptor* slot_desc, Block* block, - int column_index, int rows) { - _map_column_idx_to_cast_idx_hll[column_index] = - static_cast(_input_hll_string_types.size()); - if (slot_desc->is_nullable()) { - _input_hll_string_types.push_back(make_nullable(std::make_shared())); - } else { - _input_hll_string_types.push_back(std::make_shared()); - } - - DataTypePtr _target_data_type = slot_desc->get_data_type_ptr(); - std::string _target_data_type_name = _target_data_type->get_name(); - DataTypePtr _cast_param_data_type = _target_data_type; - ColumnPtr _cast_param = _cast_param_data_type->create_column_const_with_default_value(1); - - auto& input_col = block->get_by_position(column_index).column; - - ColumnsWithTypeAndName argument_template; - argument_template.reserve(2); - argument_template.emplace_back( - std::move(input_col), - _input_hll_string_types[_map_column_idx_to_cast_idx_hll[column_index]], - "java.sql.String"); - argument_template.emplace_back(_cast_param, _cast_param_data_type, _target_data_type_name); - FunctionBasePtr func_cast = SimpleFunctionFactory::instance().get_function( - "CAST", argument_template, make_nullable(_target_data_type)); - - Block cast_block(argument_template); - int result_idx = cast_block.columns(); - cast_block.insert({nullptr, make_nullable(_target_data_type), "cast_result"}); - RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, rows)); - - auto res_col = cast_block.get_by_position(result_idx).column; - block->get_by_position(column_index).type = _target_data_type; - if (_target_data_type->is_nullable()) { - block->replace_by_position(column_index, res_col); - } else { - auto nested_ptr = - reinterpret_cast(res_col.get())->get_nested_column_ptr(); - block->replace_by_position(column_index, nested_ptr); - } - - return Status::OK(); -} - -Status JdbcConnector::_cast_string_to_bitmap(const SlotDescriptor* slot_desc, Block* block, - int column_index, int rows) { - _map_column_idx_to_cast_idx_bitmap[column_index] = - static_cast(_input_bitmap_string_types.size()); - if (slot_desc->is_nullable()) { - _input_bitmap_string_types.push_back(make_nullable(std::make_shared())); - } else { - _input_bitmap_string_types.push_back(std::make_shared()); - } - - DataTypePtr _target_data_type = slot_desc->get_data_type_ptr(); - std::string _target_data_type_name = _target_data_type->get_name(); - DataTypePtr _cast_param_data_type = _target_data_type; - ColumnPtr _cast_param = _cast_param_data_type->create_column_const_with_default_value(1); - - auto& input_col = block->get_by_position(column_index).column; - - ColumnsWithTypeAndName argument_template; - argument_template.reserve(2); - argument_template.emplace_back( - std::move(input_col), - _input_bitmap_string_types[_map_column_idx_to_cast_idx_bitmap[column_index]], - "java.sql.String"); - argument_template.emplace_back(_cast_param, _cast_param_data_type, _target_data_type_name); - FunctionBasePtr func_cast = SimpleFunctionFactory::instance().get_function( - "CAST", argument_template, make_nullable(_target_data_type)); - - Block cast_block(argument_template); - int result_idx = cast_block.columns(); - cast_block.insert({nullptr, make_nullable(_target_data_type), "cast_result"}); - RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, rows)); - - auto res_col = cast_block.get_by_position(result_idx).column; - block->get_by_position(column_index).type = _target_data_type; - if (_target_data_type->is_nullable()) { - block->replace_by_position(column_index, res_col); - } else { - auto nested_ptr = - reinterpret_cast(res_col.get())->get_nested_column_ptr(); - block->replace_by_position(column_index, nested_ptr); - } - - return Status::OK(); -} - -// Deprecated, this code is retained only for compatibility with query problems that may be encountered when upgrading the version that maps JSON to JSONB to this version, and will be deleted in subsequent versions. -Status JdbcConnector::_cast_string_to_json(const SlotDescriptor* slot_desc, Block* block, - int column_index, int rows) { - _map_column_idx_to_cast_idx_json[column_index] = - static_cast(_input_json_string_types.size()); - if (slot_desc->is_nullable()) { - _input_json_string_types.push_back(make_nullable(std::make_shared())); - } else { - _input_json_string_types.push_back(std::make_shared()); - } - DataTypePtr _target_data_type = slot_desc->get_data_type_ptr(); - std::string _target_data_type_name = _target_data_type->get_name(); - DataTypePtr _cast_param_data_type = _target_data_type; - ColumnPtr _cast_param = - _cast_param_data_type->create_column_const(1, Field::create_field("{}")); - - auto& input_col = block->get_by_position(column_index).column; - - ColumnsWithTypeAndName argument_template; - argument_template.reserve(2); - argument_template.emplace_back( - std::move(input_col), - _input_json_string_types[_map_column_idx_to_cast_idx_json[column_index]], - "java.sql.String"); - argument_template.emplace_back(_cast_param, _cast_param_data_type, _target_data_type_name); - FunctionBasePtr func_cast = SimpleFunctionFactory::instance().get_function( - "CAST", argument_template, make_nullable(_target_data_type)); - - Block cast_block(argument_template); - int result_idx = cast_block.columns(); - cast_block.insert({nullptr, make_nullable(_target_data_type), "cast_result"}); - RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, rows)); - - auto res_col = cast_block.get_by_position(result_idx).column; - block->get_by_position(column_index).type = _target_data_type; - if (_target_data_type->is_nullable()) { - block->replace_by_position(column_index, res_col); - } else { - auto nested_ptr = - reinterpret_cast(res_col.get())->get_nested_column_ptr(); - block->replace_by_position(column_index, nested_ptr); - } - - return Status::OK(); -} - -Status JdbcConnector::_get_java_table_type(JNIEnv* env, TOdbcTableType::type table_type, - Jni::LocalObject* java_enum_obj) { - Jni::LocalClass enum_class; - RETURN_IF_ERROR( - Jni::Util::find_class(env, "org/apache/doris/thrift/TOdbcTableType", &enum_class)); - - Jni::MethodId find_by_value_method; - RETURN_IF_ERROR(enum_class.get_static_method(env, "findByValue", - "(I)Lorg/apache/doris/thrift/TOdbcTableType;", - &find_by_value_method)); - - return enum_class.call_static_object_method(env, find_by_value_method) - .with_arg(static_cast(table_type)) - .call(java_enum_obj); -} - -Status JdbcConnector::_get_real_url(const std::string& url, std::string* result_url) { - if (url.find(":/") == std::string::npos) { - return _check_and_return_default_driver_url(url, result_url); - } - *result_url = url; - return Status::OK(); -} - -Status JdbcConnector::_check_and_return_default_driver_url(const std::string& url, - std::string* result_url) { - const char* doris_home = std::getenv("DORIS_HOME"); - std::string default_url = std::string(doris_home) + "/plugins/jdbc_drivers"; - std::string default_old_url = std::string(doris_home) + "/jdbc_drivers"; - - if (config::jdbc_drivers_dir == default_url) { - // If true, which means user does not set `jdbc_drivers_dir` and use the default one. - // Because in 2.1.8, we change the default value of `jdbc_drivers_dir` - // from `DORIS_HOME/jdbc_drivers` to `DORIS_HOME/plugins/jdbc_drivers`, - // so we need to check the old default dir for compatibility. - std::string target_path = default_url + "/" + url; - std::string old_target_path = default_old_url + "/" + url; - if (std::filesystem::exists(target_path)) { - // File exists in new default directory - *result_url = "file://" + target_path; - return Status::OK(); - } else if (std::filesystem::exists(old_target_path)) { - // File exists in old default directory - *result_url = "file://" + old_target_path; - return Status::OK(); - } else if (config::is_cloud_mode()) { - // Cloud mode: try to download from cloud to new default directory - std::string downloaded_path; - Status status = CloudPluginDownloader::download_from_cloud( - CloudPluginDownloader::PluginType::JDBC_DRIVERS, url, target_path, - &downloaded_path); - if (status.ok() && !downloaded_path.empty()) { - *result_url = "file://" + downloaded_path; - return Status::OK(); - } - // Download failed, log warning but continue to fallback - LOG(WARNING) << "Failed to download JDBC driver from cloud: " << status.to_string() - << ", fallback to old directory"; - } else { - return Status::InternalError("JDBC driver file does not exist: " + url); - } - } else { - // User specified custom directory - use directly - *result_url = "file://" + config::jdbc_drivers_dir + "/" + url; - } - return Status::OK(); -} -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/exec/connector/vjdbc_connector.h b/be/src/exec/connector/vjdbc_connector.h deleted file mode 100644 index b51e153f761cef..00000000000000 --- a/be/src/exec/connector/vjdbc_connector.h +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - -#include "common/status.h" -#include "core/data_type/data_type.h" -#include "exec/table_connector.h" -#include "exprs/aggregate/aggregate_function.h" -#include "util/jni-util.h" - -namespace doris { -class RuntimeState; -class SlotDescriptor; -class TupleDescriptor; - -class Block; -class IColumn; -class VExprContext; - -struct JdbcConnectorParam { - // use -1 as default value to find error earlier. - int64_t catalog_id = -1; - std::string driver_path; - std::string driver_class; - std::string resource_name; - std::string driver_checksum; - std::string jdbc_url; - std::string user; - std::string passwd; - std::string query_string; - std::string table_name; - bool use_transaction = false; - TOdbcTableType::type table_type; - bool is_tvf = false; - int32_t connection_pool_min_size = -1; - int32_t connection_pool_max_size = -1; - int32_t connection_pool_max_wait_time = -1; - int32_t connection_pool_max_life_time = -1; - bool connection_pool_keep_alive = false; - - const TupleDescriptor* tuple_desc = nullptr; -}; - -class JdbcConnector : public TableConnector { -public: - struct JdbcStatistic { - int64_t _load_jar_timer = 0; - int64_t _init_connector_timer = 0; - int64_t _get_data_timer = 0; - int64_t _read_and_fill_vector_table_timer = 0; - int64_t _jni_setup_timer = 0; - int64_t _has_next_timer = 0; - int64_t _prepare_params_timer = 0; - int64_t _fill_block_timer = 0; - int64_t _cast_timer = 0; - int64_t _check_type_timer = 0; - int64_t _execte_read_timer = 0; - int64_t _connector_close_timer = 0; - }; - - JdbcConnector(const JdbcConnectorParam& param); - - ~JdbcConnector() override; - - Status open(RuntimeState* state, bool read = false); - - Status query() override; - - Status get_next(bool* eos, Block* block, int batch_size); - - Status append(Block* block, const VExprContextSPtrs& _output_vexpr_ctxs, - uint32_t start_send_row, uint32_t* num_rows_sent, - TOdbcTableType::type table_type = TOdbcTableType::MYSQL) override; - - Status exec_stmt_write(Block* block, const VExprContextSPtrs& output_vexpr_ctxs, - uint32_t* num_rows_sent) override; - - // use in JDBC transaction - Status begin_trans() override; // should be call after connect and before query or init_to_write - Status abort_trans() override; // should be call after transaction abort - Status finish_trans() override; // should be call after transaction commit - - Status init_to_write(doris::RuntimeProfile* operator_profile) override { - init_profile(operator_profile); - return Status::OK(); - } - - JdbcStatistic& get_jdbc_statistic() { return _jdbc_statistic; } - - Status close(Status s = Status::OK()) override; - - Status test_connection(); - Status clean_datasource(); - -protected: - JdbcConnectorParam _conn_param; - -private: - Status _register_func_id(JNIEnv* env); - - Status _get_reader_params(Block* block, JNIEnv* env, size_t column_size, Jni::LocalObject* ans); - - Status _cast_string_to_special(Block* block, JNIEnv* env, size_t column_size); - Status _cast_string_to_hll(const SlotDescriptor* slot_desc, Block* block, int column_index, - int rows); - Status _cast_string_to_bitmap(const SlotDescriptor* slot_desc, Block* block, int column_index, - int rows); - Status _cast_string_to_json(const SlotDescriptor* slot_desc, Block* block, int column_index, - int rows); - - Status _get_java_table_type(JNIEnv* env, TOdbcTableType::type table_type, - Jni::LocalObject* java_enum_obj); - - Status _get_real_url(const std::string& url, std::string* result_url); - Status _check_and_return_default_driver_url(const std::string& url, std::string* result_url); - - bool _closed = false; - - Jni::GlobalClass _executor_factory_clazz; - Jni::GlobalClass _executor_clazz; - Jni::GlobalObject _executor_obj; - Jni::MethodId _executor_factory_ctor_id; - Jni::MethodId _executor_ctor_id; - Jni::MethodId _executor_stmt_write_id; - Jni::MethodId _executor_read_id; - Jni::MethodId _executor_has_next_id; - Jni::MethodId _executor_get_block_address_id; - Jni::MethodId _executor_block_rows_id; - Jni::MethodId _executor_close_id; - Jni::MethodId _executor_begin_trans_id; - Jni::MethodId _executor_finish_trans_id; - Jni::MethodId _executor_abort_trans_id; - Jni::MethodId _executor_test_connection_id; - Jni::MethodId _executor_clean_datasource_id; - - std::map _map_column_idx_to_cast_idx_hll; - std::vector _input_hll_string_types; - - std::map _map_column_idx_to_cast_idx_bitmap; - std::vector _input_bitmap_string_types; - - std::map _map_column_idx_to_cast_idx_json; - std::vector _input_json_string_types; - - JdbcStatistic _jdbc_statistic; -}; - -} // namespace doris diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index cd8a57126baf94..e05affc86fe72e 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -47,7 +47,6 @@ #include "core/data_type/data_type_string.h" #include "core/string_ref.h" #include "exec/common/stringop_substring.h" -#include "exec/operator/file_scan_operator.h" #include "exec/rowid_fetcher.h" #include "exec/scan/scan_node.h" #include "exprs/aggregate/aggregate_function.h" @@ -58,7 +57,6 @@ #include "exprs/vexpr_fwd.h" #include "exprs/vslot_ref.h" #include "format/arrow/arrow_stream_reader.h" -#include "format/avro/avro_jni_reader.h" #include "format/csv/csv_reader.h" #include "format/json/new_json_reader.h" #include "format/native/native_reader.h" @@ -68,7 +66,7 @@ #include "format/table/hudi_jni_reader.h" #include "format/table/hudi_reader.h" #include "format/table/iceberg_reader.h" -#include "format/table/lakesoul_jni_reader.h" +#include "format/table/jdbc_jni_reader.h" #include "format/table/max_compute_jni_reader.h" #include "format/table/paimon_cpp_reader.h" #include "format/table/paimon_jni_reader.h" @@ -83,7 +81,6 @@ #include "runtime/descriptors.h" #include "runtime/runtime_profile.h" #include "runtime/runtime_state.h" -#include "storage/segment/column_reader.h" namespace cctz { class time_zone; @@ -1013,17 +1010,21 @@ Status FileScanner::_get_next_reader() { range.table_format_params.hudi_params, _file_slot_descs, _state, _profile); init_status = ((HudiJniReader*)_cur_reader.get())->init_reader(); - } else if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "lakesoul") { - _cur_reader = - LakeSoulJniReader::create_unique(range.table_format_params.lakesoul_params, - _file_slot_descs, _state, _profile); - init_status = ((LakeSoulJniReader*)_cur_reader.get())->init_reader(); + } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "trino_connector") { _cur_reader = TrinoConnectorJniReader::create_unique(_file_slot_descs, _state, _profile, range); init_status = ((TrinoConnectorJniReader*)(_cur_reader.get()))->init_reader(); + } else if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "jdbc") { + // Extract jdbc params from table_format_params + std::map jdbc_params( + range.table_format_params.jdbc_params.begin(), + range.table_format_params.jdbc_params.end()); + _cur_reader = JdbcJniReader::create_unique(_file_slot_descs, _state, _profile, + jdbc_params); + init_status = ((JdbcJniReader*)(_cur_reader.get()))->init_reader(); } // Set col_name_to_block_idx for JNI readers to avoid repeated map creation if (_cur_reader) { @@ -1111,17 +1112,7 @@ Status FileScanner::_get_next_reader() { ->init_reader(_col_default_value_ctx, _is_load); break; } - case TFileFormatType::FORMAT_AVRO: { - _cur_reader = AvroJNIReader::create_unique(_state, _profile, *_params, _file_slot_descs, - range); - init_status = ((AvroJNIReader*)(_cur_reader.get()))->init_reader(); - // Set col_name_to_block_idx for JNI readers to avoid repeated map creation - if (_cur_reader) { - static_cast(_cur_reader.get()) - ->set_col_name_to_block_idx(&_src_block_name_to_idx); - } - break; - } + case TFileFormatType::FORMAT_WAL: { _cur_reader = WalReader::create_unique(_state); init_status = ((WalReader*)(_cur_reader.get()))->init_reader(_output_tuple_desc); diff --git a/be/src/exec/scan/file_scanner.h b/be/src/exec/scan/file_scanner.h index e8c4adf3c1468c..cf9a94e7643541 100644 --- a/be/src/exec/scan/file_scanner.h +++ b/be/src/exec/scan/file_scanner.h @@ -38,7 +38,7 @@ #include "io/io_common.h" #include "runtime/descriptors.h" #include "runtime/runtime_profile.h" -#include "storage/olap_scan_common.h" +#include "storage/olap_common.h" namespace doris { class RuntimeState; diff --git a/be/src/exec/scan/jdbc_scanner.cpp b/be/src/exec/scan/jdbc_scanner.cpp index 5f451c6fe6a940..87e32c3807f77c 100644 --- a/be/src/exec/scan/jdbc_scanner.cpp +++ b/be/src/exec/scan/jdbc_scanner.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/scan/jdbc_scanner.h" +#include "jdbc_scanner.h" #include #include @@ -24,14 +24,12 @@ #include "common/logging.h" #include "core/block/block.h" -#include "core/block/column_with_type_and_name.h" -#include "core/column/column.h" -#include "core/data_type/data_type.h" -#include "exec/connector/vjdbc_connector.h" #include "exprs/vexpr_context.h" +#include "format/table/jdbc_jni_reader.h" #include "runtime/descriptors.h" #include "runtime/runtime_profile.h" #include "runtime/runtime_state.h" +#include "util/jdbc_utils.h" namespace doris { @@ -45,16 +43,49 @@ JdbcScanner::JdbcScanner(RuntimeState* state, doris::JDBCScanLocalState* local_s _tuple_desc(nullptr), _table_type(table_type), _is_tvf(is_tvf) { - _init_profile(local_state->_scanner_profile); _has_prepared = false; } +std::map JdbcScanner::_build_jdbc_params( + const TupleDescriptor* tuple_desc) { + const JdbcTableDescriptor* jdbc_table = + static_cast(tuple_desc->table_desc()); + + std::map params; + params["jdbc_url"] = jdbc_table->jdbc_url(); + params["jdbc_user"] = jdbc_table->jdbc_user(); + params["jdbc_password"] = jdbc_table->jdbc_passwd(); + params["jdbc_driver_class"] = jdbc_table->jdbc_driver_class(); + // Resolve jdbc_driver_url to absolute file:// URL + // FE sends just the JAR filename; we need to resolve it to a full path. + std::string driver_url; + auto resolve_st = JdbcUtils::resolve_driver_url(jdbc_table->jdbc_driver_url(), &driver_url); + if (!resolve_st.ok()) { + LOG(WARNING) << "Failed to resolve JDBC driver URL: " << resolve_st.to_string(); + driver_url = jdbc_table->jdbc_driver_url(); + } + params["jdbc_driver_url"] = driver_url; + params["jdbc_driver_checksum"] = jdbc_table->jdbc_driver_checksum(); + params["query_sql"] = _query_string; + params["catalog_id"] = std::to_string(jdbc_table->jdbc_catalog_id()); + params["table_type"] = _odbc_table_type_to_string(_table_type); + params["connection_pool_min_size"] = std::to_string(jdbc_table->connection_pool_min_size()); + params["connection_pool_max_size"] = std::to_string(jdbc_table->connection_pool_max_size()); + params["connection_pool_max_wait_time"] = + std::to_string(jdbc_table->connection_pool_max_wait_time()); + params["connection_pool_max_life_time"] = + std::to_string(jdbc_table->connection_pool_max_life_time()); + params["connection_pool_keep_alive"] = + jdbc_table->connection_pool_keep_alive() ? "true" : "false"; + return params; +} + Status JdbcScanner::init(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << "JdbcScanner::init"; RETURN_IF_ERROR(Scanner::init(state, conjuncts)); if (state == nullptr) { - return Status::InternalError("input pointer is NULL of VJdbcScanNode::init."); + return Status::InternalError("input pointer is NULL of JdbcScanner::init."); } // get tuple desc @@ -67,53 +98,39 @@ Status JdbcScanner::init(RuntimeState* state, const VExprContextSPtrs& conjuncts const JdbcTableDescriptor* jdbc_table = static_cast(_tuple_desc->table_desc()); if (jdbc_table == nullptr) { - return Status::InternalError("jdbc table pointer is NULL of VJdbcScanNode::init."); - } - _jdbc_param.catalog_id = jdbc_table->jdbc_catalog_id(); - _jdbc_param.driver_class = jdbc_table->jdbc_driver_class(); - _jdbc_param.driver_path = jdbc_table->jdbc_driver_url(); - _jdbc_param.resource_name = jdbc_table->jdbc_resource_name(); - _jdbc_param.driver_checksum = jdbc_table->jdbc_driver_checksum(); - _jdbc_param.jdbc_url = jdbc_table->jdbc_url(); - _jdbc_param.user = jdbc_table->jdbc_user(); - _jdbc_param.passwd = jdbc_table->jdbc_passwd(); - _jdbc_param.tuple_desc = _tuple_desc; - _jdbc_param.query_string = std::move(_query_string); - _jdbc_param.use_transaction = false; // not useful for scanner but only sink. - _jdbc_param.table_type = _table_type; - _jdbc_param.is_tvf = _is_tvf; - _jdbc_param.connection_pool_min_size = jdbc_table->connection_pool_min_size(); - _jdbc_param.connection_pool_max_size = jdbc_table->connection_pool_max_size(); - _jdbc_param.connection_pool_max_life_time = jdbc_table->connection_pool_max_life_time(); - _jdbc_param.connection_pool_max_wait_time = jdbc_table->connection_pool_max_wait_time(); - _jdbc_param.connection_pool_keep_alive = jdbc_table->connection_pool_keep_alive(); - - _local_state->scanner_profile()->add_info_string("JdbcDriverClass", _jdbc_param.driver_class); - _local_state->scanner_profile()->add_info_string("JdbcDriverUrl", _jdbc_param.driver_path); - _local_state->scanner_profile()->add_info_string("JdbcUrl", _jdbc_param.jdbc_url); - _local_state->scanner_profile()->add_info_string("QuerySql", _jdbc_param.query_string); - - _jdbc_connector.reset(new (std::nothrow) JdbcConnector(_jdbc_param)); - if (_jdbc_connector == nullptr) { - return Status::InternalError("new a jdbc scanner failed."); + return Status::InternalError("jdbc table pointer is NULL of JdbcScanner::init."); } + _local_state->scanner_profile()->add_info_string("JdbcDriverClass", + jdbc_table->jdbc_driver_class()); + _local_state->scanner_profile()->add_info_string("JdbcDriverUrl", + jdbc_table->jdbc_driver_url()); + _local_state->scanner_profile()->add_info_string("JdbcUrl", jdbc_table->jdbc_url()); + _local_state->scanner_profile()->add_info_string("QuerySql", _query_string); + + // Build reader params from tuple descriptor + auto jdbc_params = _build_jdbc_params(_tuple_desc); + + // Pass _tuple_desc->slots() directly. JniReader stores _file_slot_descs as a reference, + // so we must pass a vector whose lifetime outlives the reader (i.e., _tuple_desc->slots()). + // Previously a local vector was passed, causing a dangling reference after init() returned. + _jni_reader = JdbcJniReader::create_unique(_tuple_desc->slots(), state, _profile, jdbc_params); + return Status::OK(); } Status JdbcScanner::_open_impl(RuntimeState* state) { VLOG_CRITICAL << "JdbcScanner::open"; if (state == nullptr) { - return Status::InternalError("input pointer is NULL of VJdbcScanNode::open."); + return Status::InternalError("input pointer is NULL of JdbcScanner::open."); } if (!_has_prepared) { - return Status::InternalError("used before initialize of VJdbcScanNode::open."); + return Status::InternalError("used before initialize of JdbcScanner::open."); } RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(Scanner::_open_impl(state)); - RETURN_IF_ERROR(_jdbc_connector->open(state, true)); - RETURN_IF_ERROR(_jdbc_connector->query()); + RETURN_IF_ERROR(_jni_reader->init_reader()); return Status::OK(); } @@ -124,12 +141,11 @@ Status JdbcScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof } if (!_has_prepared) { - return Status::InternalError("used before initialize of VJdbcScanNode::get_next."); + return Status::InternalError("used before initialize of JdbcScanner::get_next."); } - if (_jdbc_eos == true) { + if (_jdbc_eos) { *eof = true; - _update_profile(); return Status::OK(); } @@ -139,57 +155,28 @@ Status JdbcScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof do { RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(_jdbc_connector->get_next(&_jdbc_eos, block, state->batch_size())); + size_t read_rows = 0; + bool reader_eof = false; + RETURN_IF_ERROR(_jni_reader->get_next_block(block, &read_rows, &reader_eof)); - if (_jdbc_eos == true) { + if (reader_eof) { + _jdbc_eos = true; if (block->rows() == 0) { - _update_profile(); *eof = true; } break; } - VLOG_ROW << "NewJdbcScanNode output rows: " << block->rows(); + VLOG_ROW << "JdbcScanner output rows: " << block->rows(); } while (block->rows() == 0 && !(*eof)); return Status::OK(); } -void JdbcScanner::_init_profile(const std::shared_ptr& profile) { - _load_jar_timer = ADD_TIMER(profile, "LoadJarTime"); - _init_connector_timer = ADD_TIMER(profile, "InitConnectorTime"); - _check_type_timer = ADD_TIMER(profile, "CheckTypeTime"); - _get_data_timer = ADD_TIMER(profile, "GetDataTime"); - _read_and_fill_vector_table_timer = - ADD_CHILD_TIMER(profile, "ReadAndFillVectorTableTime", "GetDataTime"); - _jni_setup_timer = ADD_CHILD_TIMER(profile, "JniSetupTime", "GetDataTime"); - _has_next_timer = ADD_CHILD_TIMER(profile, "HasNextTime", "GetDataTime"); - _prepare_params_timer = ADD_CHILD_TIMER(profile, "PrepareParamsTime", "GetDataTime"); - _fill_block_timer = ADD_CHILD_TIMER(profile, "FillBlockTime", "GetDataTime"); - _cast_timer = ADD_CHILD_TIMER(profile, "CastTime", "GetDataTime"); - _execte_read_timer = ADD_TIMER(profile, "ExecteReadTime"); - _connector_close_timer = ADD_TIMER(profile, "ConnectorCloseTime"); -} - -void JdbcScanner::_update_profile() { - JdbcConnector::JdbcStatistic& jdbc_statistic = _jdbc_connector->get_jdbc_statistic(); - COUNTER_UPDATE(_load_jar_timer, jdbc_statistic._load_jar_timer); - COUNTER_UPDATE(_init_connector_timer, jdbc_statistic._init_connector_timer); - COUNTER_UPDATE(_check_type_timer, jdbc_statistic._check_type_timer); - COUNTER_UPDATE(_get_data_timer, jdbc_statistic._get_data_timer); - COUNTER_UPDATE(_jni_setup_timer, jdbc_statistic._jni_setup_timer); - COUNTER_UPDATE(_has_next_timer, jdbc_statistic._has_next_timer); - COUNTER_UPDATE(_prepare_params_timer, jdbc_statistic._prepare_params_timer); - COUNTER_UPDATE(_read_and_fill_vector_table_timer, - jdbc_statistic._read_and_fill_vector_table_timer); - COUNTER_UPDATE(_fill_block_timer, jdbc_statistic._fill_block_timer); - COUNTER_UPDATE(_cast_timer, jdbc_statistic._cast_timer); - COUNTER_UPDATE(_execte_read_timer, jdbc_statistic._execte_read_timer); - COUNTER_UPDATE(_connector_close_timer, jdbc_statistic._connector_close_timer); -} - Status JdbcScanner::close(RuntimeState* state) { RETURN_IF_ERROR(Scanner::close(state)); - RETURN_IF_ERROR(_jdbc_connector->close()); + if (_jni_reader) { + RETURN_IF_ERROR(_jni_reader->close()); + } return Status::OK(); } } // namespace doris diff --git a/be/src/exec/scan/jdbc_scanner.h b/be/src/exec/scan/jdbc_scanner.h index 5b96855fa15a02..506cc6276f8efb 100644 --- a/be/src/exec/scan/jdbc_scanner.h +++ b/be/src/exec/scan/jdbc_scanner.h @@ -20,15 +20,16 @@ #include #include +#include #include #include #include "common/factory_creator.h" #include "common/global_types.h" #include "common/status.h" -#include "exec/connector/vjdbc_connector.h" #include "exec/operator/jdbc_scan_operator.h" #include "exec/scan/scanner.h" +#include "format/table/jdbc_jni_reader.h" #include "runtime/runtime_profile.h" namespace doris { @@ -38,11 +39,27 @@ class TupleDescriptor; class Block; class VExprContext; +/** + * DEPRECATED: This class is transitional and should be removed once JDBC scanning + * is fully integrated into the FileScanner path. + * + * JdbcScanner is the pipeline-level scanner for JDBC data sources. + * It delegates to JdbcJniReader internally, which uses the unified + * JniReader → JdbcJniScanner (Java) path for data reading. + * + * Prerequisites before deletion: + * 1. FE: Change JdbcScanNode to generate FileScanNode plan with TFileFormatType::FORMAT_JDBC, + * so JDBC scans flow through FileScanner instead of JDBCScanLocalState → JdbcScanner. + * 2. BE: Add FORMAT_JDBC case in FileScanner::_create_reader() to create JdbcJniReader + * (similar to Paimon/Hudi/MaxCompute/TrinoConnector). + * 3. BE: Remove JDBCScanLocalState / jdbc_scan_operator.h/cpp which depend on this class. + * 4. After the above, this file (jdbc_scanner.h/cpp) can be safely deleted. + */ class JdbcScanner : public Scanner { ENABLE_FACTORY_CREATOR(JdbcScanner); public: - friend class JdbcConnector; + friend class JdbcJniReader; JdbcScanner(RuntimeState* state, doris::JDBCScanLocalState* parent, int64_t limit, const TupleId& tuple_id, const std::string& query_string, @@ -55,22 +72,41 @@ class JdbcScanner : public Scanner { protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; - RuntimeProfile::Counter* _load_jar_timer = nullptr; - RuntimeProfile::Counter* _init_connector_timer = nullptr; - RuntimeProfile::Counter* _get_data_timer = nullptr; - RuntimeProfile::Counter* _jni_setup_timer = nullptr; - RuntimeProfile::Counter* _has_next_timer = nullptr; - RuntimeProfile::Counter* _prepare_params_timer = nullptr; - RuntimeProfile::Counter* _cast_timer = nullptr; - RuntimeProfile::Counter* _read_and_fill_vector_table_timer = nullptr; - RuntimeProfile::Counter* _fill_block_timer = nullptr; - RuntimeProfile::Counter* _check_type_timer = nullptr; - RuntimeProfile::Counter* _execte_read_timer = nullptr; - RuntimeProfile::Counter* _connector_close_timer = nullptr; - private: - void _init_profile(const std::shared_ptr& profile); - void _update_profile(); + // Build JDBC params from TupleDescriptor for JdbcJniReader + std::map _build_jdbc_params(const TupleDescriptor* tuple_desc); + + // Convert TOdbcTableType enum to string for JdbcTypeHandlerFactory + static std::string _odbc_table_type_to_string(TOdbcTableType::type type) { + switch (type) { + case TOdbcTableType::MYSQL: + return "MYSQL"; + case TOdbcTableType::ORACLE: + return "ORACLE"; + case TOdbcTableType::POSTGRESQL: + return "POSTGRESQL"; + case TOdbcTableType::SQLSERVER: + return "SQLSERVER"; + case TOdbcTableType::CLICKHOUSE: + return "CLICKHOUSE"; + case TOdbcTableType::SAP_HANA: + return "SAP_HANA"; + case TOdbcTableType::TRINO: + return "TRINO"; + case TOdbcTableType::PRESTO: + return "PRESTO"; + case TOdbcTableType::OCEANBASE: + return "OCEANBASE"; + case TOdbcTableType::OCEANBASE_ORACLE: + return "OCEANBASE_ORACLE"; + case TOdbcTableType::DB2: + return "DB2"; + case TOdbcTableType::GBASE: + return "GBASE"; + default: + return "MYSQL"; + } + } bool _jdbc_eos; @@ -80,11 +116,10 @@ class JdbcScanner : public Scanner { std::string _query_string; // Descriptor of tuples read from JDBC table. const TupleDescriptor* _tuple_desc = nullptr; - // the sql query database type: like mysql, PG... + // the sql query database type: like mysql, PG.. TOdbcTableType::type _table_type; bool _is_tvf; - // Scanner of JDBC. - std::unique_ptr _jdbc_connector; - JdbcConnectorParam _jdbc_param; + // Unified JNI reader + std::unique_ptr _jni_reader; }; } // namespace doris diff --git a/be/src/exec/sink/writer/vjdbc_table_writer.cpp b/be/src/exec/sink/writer/vjdbc_table_writer.cpp index acb197ea1a3b3c..7f97a7c0745a55 100644 --- a/be/src/exec/sink/writer/vjdbc_table_writer.cpp +++ b/be/src/exec/sink/writer/vjdbc_table_writer.cpp @@ -15,44 +15,56 @@ // specific language governing permissions and limitations // under the License. -#include "exec/sink/writer/vjdbc_table_writer.h" +#include "vjdbc_table_writer.h" #include #include #include -#include "core/binary_cast.hpp" +#include "common/logging.h" #include "core/block/block.h" #include "exprs/vexpr.h" #include "exprs/vexpr_context.h" +#include "runtime/runtime_state.h" +#include "util/jdbc_utils.h" namespace doris { -JdbcConnectorParam VJdbcTableWriter::create_connect_param(const doris::TDataSink& t_sink) { +std::map VJdbcTableWriter::_build_writer_params(const TDataSink& t_sink) { const TJdbcTableSink& t_jdbc_sink = t_sink.jdbc_table_sink; + std::map params; - JdbcConnectorParam jdbc_param; - - jdbc_param.catalog_id = t_jdbc_sink.jdbc_table.catalog_id; - jdbc_param.jdbc_url = t_jdbc_sink.jdbc_table.jdbc_url; - jdbc_param.user = t_jdbc_sink.jdbc_table.jdbc_user; - jdbc_param.passwd = t_jdbc_sink.jdbc_table.jdbc_password; - jdbc_param.driver_class = t_jdbc_sink.jdbc_table.jdbc_driver_class; - jdbc_param.driver_path = t_jdbc_sink.jdbc_table.jdbc_driver_url; - jdbc_param.driver_checksum = t_jdbc_sink.jdbc_table.jdbc_driver_checksum; - jdbc_param.resource_name = t_jdbc_sink.jdbc_table.jdbc_resource_name; - jdbc_param.table_type = t_jdbc_sink.table_type; - jdbc_param.query_string = t_jdbc_sink.insert_sql; - jdbc_param.table_name = t_jdbc_sink.jdbc_table.jdbc_table_name; - jdbc_param.use_transaction = t_jdbc_sink.use_transaction; - jdbc_param.connection_pool_min_size = t_jdbc_sink.jdbc_table.connection_pool_min_size; - jdbc_param.connection_pool_max_size = t_jdbc_sink.jdbc_table.connection_pool_max_size; - jdbc_param.connection_pool_max_wait_time = t_jdbc_sink.jdbc_table.connection_pool_max_wait_time; - jdbc_param.connection_pool_max_life_time = t_jdbc_sink.jdbc_table.connection_pool_max_life_time; - jdbc_param.connection_pool_keep_alive = t_jdbc_sink.jdbc_table.connection_pool_keep_alive; - - return jdbc_param; + params["jdbc_url"] = t_jdbc_sink.jdbc_table.jdbc_url; + params["jdbc_user"] = t_jdbc_sink.jdbc_table.jdbc_user; + params["jdbc_password"] = t_jdbc_sink.jdbc_table.jdbc_password; + params["jdbc_driver_class"] = t_jdbc_sink.jdbc_table.jdbc_driver_class; + // Resolve jdbc_driver_url to absolute file:// URL + std::string driver_url; + auto resolve_st = + JdbcUtils::resolve_driver_url(t_jdbc_sink.jdbc_table.jdbc_driver_url, &driver_url); + if (!resolve_st.ok()) { + LOG(WARNING) << "Failed to resolve JDBC driver URL: " << resolve_st.to_string(); + driver_url = t_jdbc_sink.jdbc_table.jdbc_driver_url; + } + params["jdbc_driver_url"] = driver_url; + + params["jdbc_driver_checksum"] = t_jdbc_sink.jdbc_table.jdbc_driver_checksum; + params["insert_sql"] = t_jdbc_sink.insert_sql; + params["use_transaction"] = t_jdbc_sink.use_transaction ? "true" : "false"; + params["catalog_id"] = std::to_string(t_jdbc_sink.jdbc_table.catalog_id); + params["connection_pool_min_size"] = + std::to_string(t_jdbc_sink.jdbc_table.connection_pool_min_size); + params["connection_pool_max_size"] = + std::to_string(t_jdbc_sink.jdbc_table.connection_pool_max_size); + params["connection_pool_max_wait_time"] = + std::to_string(t_jdbc_sink.jdbc_table.connection_pool_max_wait_time); + params["connection_pool_max_life_time"] = + std::to_string(t_jdbc_sink.jdbc_table.connection_pool_max_life_time); + params["connection_pool_keep_alive"] = + t_jdbc_sink.jdbc_table.connection_pool_keep_alive ? "true" : "false"; + + return params; } VJdbcTableWriter::VJdbcTableWriter(const TDataSink& t_sink, @@ -60,22 +72,40 @@ VJdbcTableWriter::VJdbcTableWriter(const TDataSink& t_sink, std::shared_ptr dep, std::shared_ptr fin_dep) : AsyncResultWriter(output_expr_ctxs, dep, fin_dep), - JdbcConnector(create_connect_param(t_sink)) {} + _writer_params(_build_writer_params(t_sink)), + _use_transaction(t_sink.jdbc_table_sink.use_transaction) {} + +Status VJdbcTableWriter::open(RuntimeState* state, RuntimeProfile* operator_profile) { + _writer = std::make_unique( + state, _vec_output_expr_ctxs, "org/apache/doris/jdbc/JdbcJniWriter", _writer_params); + return _writer->open(); +} Status VJdbcTableWriter::write(RuntimeState* state, Block& block) { Block output_block; RETURN_IF_ERROR(_projection_block(block, &output_block)); - auto num_rows = output_block.rows(); - - uint32_t start_send_row = 0; - uint32_t num_row_sent = 0; - while (start_send_row < num_rows) { - RETURN_IF_ERROR(append(&output_block, _vec_output_expr_ctxs, start_send_row, &num_row_sent, - _conn_param.table_type)); - start_send_row += num_row_sent; - num_row_sent = 0; + + if (output_block.rows() == 0) { + return Status::OK(); + } + + return _writer->write(output_block); +} + +Status VJdbcTableWriter::finish(RuntimeState* state) { + if (!_use_transaction || !_writer) { + return Status::OK(); } + // Transaction commit is handled in JdbcJniWriter.close() on the Java side. + // When useTransaction=true, close() calls conn.commit() before closing the connection. + return Status::OK(); +} + +Status VJdbcTableWriter::close(Status s) { + if (_writer) { + return _writer->close(); + } return Status::OK(); } diff --git a/be/src/exec/sink/writer/vjdbc_table_writer.h b/be/src/exec/sink/writer/vjdbc_table_writer.h index c3a54eb623d188..56fdadc85a58c5 100644 --- a/be/src/exec/sink/writer/vjdbc_table_writer.h +++ b/be/src/exec/sink/writer/vjdbc_table_writer.h @@ -20,37 +20,47 @@ #include #include +#include +#include #include #include #include "common/status.h" -#include "exec/connector/vjdbc_connector.h" #include "exec/sink/writer/async_result_writer.h" +#include "format/transformer/vjni_format_transformer.h" namespace doris { class Block; -class VJdbcTableWriter final : public AsyncResultWriter, public JdbcConnector { +/** + * VJdbcTableWriter writes data to external JDBC targets via JNI. + * + * Refactored to use VJniFormatTransformer (same pattern as VMCPartitionWriter for MaxCompute). + * The Java side writer is JdbcJniWriter which extends JniWriter. + * + * Transaction control (begin/commit/rollback) is handled through additional JNI method calls + * to the Java JdbcJniWriter instance. + */ +class VJdbcTableWriter final : public AsyncResultWriter { public: - static JdbcConnectorParam create_connect_param(const TDataSink&); - VJdbcTableWriter(const TDataSink& t_sink, const VExprContextSPtrs& output_exprs, std::shared_ptr dep, std::shared_ptr fin_dep); - // connect to jdbc server - Status open(RuntimeState* state, RuntimeProfile* operator_profile) override { - RETURN_IF_ERROR(JdbcConnector::open(state, false)); - return init_to_write(operator_profile); - } + Status open(RuntimeState* state, RuntimeProfile* operator_profile) override; Status write(RuntimeState* state, Block& block) override; - Status finish(RuntimeState* state) override { return JdbcConnector::finish_trans(); } + Status finish(RuntimeState* state) override; - Status close(Status s) override { return JdbcConnector::close(s); } + Status close(Status s) override; private: - JdbcConnectorParam _param; + // Build the writer_params map from TDataSink + static std::map _build_writer_params(const TDataSink& t_sink); + + std::unique_ptr _writer; + std::map _writer_params; + bool _use_transaction = false; }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/aggregate/aggregate_function_java_udaf.h b/be/src/exprs/aggregate/aggregate_function_java_udaf.h index 2bb5df64206966..ad15932fac1bac 100644 --- a/be/src/exprs/aggregate/aggregate_function_java_udaf.h +++ b/be/src/exprs/aggregate/aggregate_function_java_udaf.h @@ -35,8 +35,8 @@ #include "core/field.h" #include "core/string_ref.h" #include "core/types.h" -#include "exec/connector/jni_connector.h" #include "exprs/aggregate/aggregate_function.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/user_function_cache.h" #include "util/io_helper.h" #include "util/jni-util.h" @@ -110,8 +110,8 @@ struct AggregateJavaUdafData { std::to_string(i))); } std::unique_ptr input_table; - RETURN_IF_ERROR(JniConnector::to_java_table(&input_block, input_table)); - auto input_table_schema = JniConnector::parse_table_schema(&input_block); + RETURN_IF_ERROR(JniDataBridge::to_java_table(&input_block, input_table)); + auto input_table_schema = JniDataBridge::parse_table_schema(&input_block); std::map input_params = { {"meta_address", std::to_string((long)input_table.get())}, {"required_fields", input_table_schema.first}, @@ -190,7 +190,7 @@ struct AggregateJavaUdafData { Block output_block; output_block.insert(ColumnWithTypeAndName(to.get_ptr(), result_type, "_result_")); - auto output_table_schema = JniConnector::parse_table_schema(&output_block); + auto output_table_schema = JniDataBridge::parse_table_schema(&output_block); std::string output_nullable = result_type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, {"required_fields", output_table_schema.first}, @@ -205,7 +205,7 @@ struct AggregateJavaUdafData { .with_arg(output_map) .call(&output_address)); - return JniConnector::fill_block(&output_block, {0}, output_address); + return JniDataBridge::fill_block(&output_block, {0}, output_address); } private: diff --git a/be/src/exprs/function/cast/function_cast.cpp b/be/src/exprs/function/cast/function_cast.cpp index be867df9898dff..5ee75d2ed5ce30 100644 --- a/be/src/exprs/function/cast/function_cast.cpp +++ b/be/src/exprs/function/cast/function_cast.cpp @@ -20,6 +20,7 @@ #include "core/data_type/data_type_agg_state.h" #include "core/data_type/data_type_decimal.h" #include "core/data_type/data_type_number.h" // IWYU pragma: keep +#include "core/data_type/data_type_quantilestate.h" #include "core/data_type/primitive_type.h" #include "exprs/function/cast/cast_to_array.h" #include "exprs/function/cast/cast_to_boolean.h" @@ -60,6 +61,18 @@ WrapperType create_bitmap_wrapper(FunctionContext* context, const DataTypePtr& f return CastWrapper::create_unsupport_wrapper("Cast to BitMap only support from String type"); } +WrapperType create_quantile_state_wrapper(FunctionContext* context, + const DataTypePtr& from_type_untyped, + const DataTypeQuantileState& to_type) { + /// Conversion from String through parsing. + if (check_and_get_data_type(from_type_untyped.get())) { + return cast_from_string_to_generic; + } + + return CastWrapper::create_unsupport_wrapper( + "Cast to QuantileState only support from String type"); +} + WrapperType create_varbinary_wrapper(const DataTypePtr& from_type_untyped) { /// Conversion from String through parsing. if (check_and_get_data_type(from_type_untyped.get())) { @@ -300,6 +313,9 @@ WrapperType prepare_impl(FunctionContext* context, const DataTypePtr& origin_fro case PrimitiveType::TYPE_BITMAP: return create_bitmap_wrapper(context, from_type, static_cast(*to_type)); + case PrimitiveType::TYPE_QUANTILE_STATE: + return create_quantile_state_wrapper(context, from_type, + static_cast(*to_type)); case PrimitiveType::TYPE_JSONB: return create_cast_to_jsonb_wrapper(from_type, static_cast(*to_type), context ? context->string_as_jsonb_string() : false); diff --git a/be/src/exprs/function/function_java_udf.cpp b/be/src/exprs/function/function_java_udf.cpp index 39cfa7bd45313d..4a0aecff0862df 100644 --- a/be/src/exprs/function/function_java_udf.cpp +++ b/be/src/exprs/function/function_java_udf.cpp @@ -24,7 +24,7 @@ #include "common/exception.h" #include "core/block/block.h" -#include "exec/connector/jni_connector.h" +#include "format/jni/jni_data_bridge.h" #include "jni.h" #include "runtime/exec_env.h" #include "runtime/user_function_cache.h" @@ -90,8 +90,8 @@ Status JavaFunctionCall::execute_impl(FunctionContext* context, Block& block, context->get_function_state(FunctionContext::THREAD_LOCAL)); SCOPED_TIMER(context->get_udf_execute_timer()); std::unique_ptr input_table; - RETURN_IF_ERROR(JniConnector::to_java_table(&block, num_rows, arguments, input_table)); - auto input_table_schema = JniConnector::parse_table_schema(&block, arguments, true); + RETURN_IF_ERROR(JniDataBridge::to_java_table(&block, num_rows, arguments, input_table)); + auto input_table_schema = JniDataBridge::parse_table_schema(&block, arguments, true); std::map input_params = { {"meta_address", std::to_string((long)input_table.get())}, {"required_fields", input_table_schema.first}, @@ -99,7 +99,7 @@ Status JavaFunctionCall::execute_impl(FunctionContext* context, Block& block, Jni::LocalObject input_map; RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, input_params, &input_map)); - auto output_table_schema = JniConnector::parse_table_schema(&block, {result}, true); + auto output_table_schema = JniDataBridge::parse_table_schema(&block, {result}, true); std::string output_nullable = block.get_by_position(result).type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, @@ -113,7 +113,7 @@ Status JavaFunctionCall::execute_impl(FunctionContext* context, Block& block, .with_arg(output_map) .call(&output_address)); - return JniConnector::fill_block(&block, {result}, output_address); + return JniDataBridge::fill_block(&block, {result}, output_address); } Status JavaFunctionCall::close(FunctionContext* context, diff --git a/be/src/exprs/table_function/udf_table_function.cpp b/be/src/exprs/table_function/udf_table_function.cpp index f2f49a0d2f7d96..17763ec3ba4109 100644 --- a/be/src/exprs/table_function/udf_table_function.cpp +++ b/be/src/exprs/table_function/udf_table_function.cpp @@ -27,9 +27,9 @@ #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_factory.hpp" #include "core/types.h" -#include "exec/connector/jni_connector.h" #include "exprs/vexpr.h" #include "exprs/vexpr_context.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/user_function_cache.h" namespace doris { @@ -98,8 +98,8 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { RETURN_IF_ERROR(Jni::Env::Get(&env)); std::unique_ptr input_table; RETURN_IF_ERROR( - JniConnector::to_java_table(block, block->rows(), child_column_idxs, input_table)); - auto input_table_schema = JniConnector::parse_table_schema(block, child_column_idxs, true); + JniDataBridge::to_java_table(block, block->rows(), child_column_idxs, input_table)); + auto input_table_schema = JniDataBridge::parse_table_schema(block, child_column_idxs, true); std::map input_params = { {"meta_address", std::to_string((long)input_table.get())}, {"required_fields", input_table_schema.first}, @@ -110,7 +110,7 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { _array_result_column = _return_type->create_column(); _result_column_idx = block->columns(); block->insert({_array_result_column, _return_type, "res"}); - auto output_table_schema = JniConnector::parse_table_schema(block, {_result_column_idx}, true); + auto output_table_schema = JniDataBridge::parse_table_schema(block, {_result_column_idx}, true); std::string output_nullable = _return_type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, {"required_fields", output_table_schema.first}, @@ -123,7 +123,7 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { .with_arg(input_map) .with_arg(output_map) .call(&output_address)); - RETURN_IF_ERROR(JniConnector::fill_block(block, {_result_column_idx}, output_address)); + RETURN_IF_ERROR(JniDataBridge::fill_block(block, {_result_column_idx}, output_address)); block->erase(_result_column_idx); if (!extract_column_array_info(*_array_result_column, _array_column_detail)) { return Status::NotSupported("column type {} not supported now", diff --git a/be/src/format/avro/avro_jni_reader.cpp b/be/src/format/avro/avro_jni_reader.cpp deleted file mode 100644 index 74c58a14765c4e..00000000000000 --- a/be/src/format/avro/avro_jni_reader.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "format/avro/avro_jni_reader.h" - -#include -#include - -#include "core/data_type/data_type_array.h" -#include "core/data_type/data_type_factory.hpp" -#include "core/data_type/data_type_map.h" -#include "core/data_type/data_type_struct.h" -#include "runtime/descriptors.h" - -namespace doris { -#include "common/compile_check_begin.h" - -AvroJNIReader::AvroJNIReader(RuntimeState* state, RuntimeProfile* profile, - const TFileScanRangeParams& params, - const std::vector& file_slot_descs, - const TFileRangeDesc& range) - : JniReader(file_slot_descs, state, profile), _params(params), _range(range) {} - -AvroJNIReader::AvroJNIReader(RuntimeProfile* profile, const TFileScanRangeParams& params, - const TFileRangeDesc& range, - const std::vector& file_slot_descs) - : JniReader(file_slot_descs, nullptr, profile), _params(params), _range(range) {} - -AvroJNIReader::~AvroJNIReader() = default; - -Status AvroJNIReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { - RETURN_IF_ERROR(_jni_connector->get_next_block(block, read_rows, eof)); - if (*eof) { - RETURN_IF_ERROR(_jni_connector->close()); - } - return Status::OK(); -} - -Status AvroJNIReader::get_columns(std::unordered_map* name_to_type, - std::unordered_set* missing_cols) { - for (const auto& desc : _file_slot_descs) { - name_to_type->emplace(desc->col_name(), desc->type()); - } - return Status::OK(); -} - -Status AvroJNIReader::init_reader() { - std::ostringstream required_fields; - std::ostringstream columns_types; - std::vector column_names; - int index = 0; - for (const auto& desc : _file_slot_descs) { - std::string field = desc->col_name(); - column_names.emplace_back(field); - std::string type = JniConnector::get_jni_type_with_different_string(desc->type()); - if (index == 0) { - required_fields << field; - columns_types << type; - } else { - required_fields << "," << field; - columns_types << "#" << type; - } - index++; - } - - TFileType::type type = get_file_type(); - std::map required_param = { - {"required_fields", required_fields.str()}, - {"columns_types", columns_types.str()}, - {"file_type", std::to_string(type)}, - {"is_get_table_schema", "false"}, - {"hive.serde", "org.apache.hadoop.hive.serde2.avro.AvroSerDe"}}; - if (type == TFileType::FILE_S3) { - required_param.insert(_params.properties.begin(), _params.properties.end()); - } - required_param.insert( - std::make_pair("split_start_offset", std::to_string(_range.start_offset))); - required_param.insert(std::make_pair("split_size", std::to_string(_range.size))); - required_param.insert(std::make_pair("split_file_size", std::to_string(_range.file_size))); - required_param.insert(std::make_pair("uri", _range.path)); - _jni_connector = std::make_unique("org/apache/doris/avro/AvroJNIScanner", - required_param, column_names); - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); -} - -TFileType::type AvroJNIReader::get_file_type() const { - TFileType::type type; - if (_range.__isset.file_type) { - // for compatibility - type = _range.file_type; - } else { - type = _params.file_type; - } - return type; -} - -// open the jni connector for parsing schema -Status AvroJNIReader::init_schema_reader() { - std::map required_param = {{"uri", _range.path}, - {"file_type", std::to_string(get_file_type())}, - {"is_get_table_schema", "true"}}; - - required_param.insert(_params.properties.begin(), _params.properties.end()); - _jni_connector = - std::make_unique("org/apache/doris/avro/AvroJNIScanner", required_param); - return _jni_connector->open(nullptr, _profile); -} - -Status AvroJNIReader::get_parsed_schema(std::vector* col_names, - std::vector* col_types) { - std::string table_schema_str; - RETURN_IF_ERROR(_jni_connector->get_table_schema(table_schema_str)); - - rapidjson::Document document; - document.Parse(table_schema_str.c_str()); - if (document.IsArray()) { - for (int i = 0; i < document.Size(); ++i) { - rapidjson::Value& column_schema = document[i]; - col_names->emplace_back(column_schema["name"].GetString()); - col_types->push_back(convert_to_doris_type(column_schema)); - } - } - return _jni_connector->close(); -} - -DataTypePtr AvroJNIReader::convert_to_doris_type(const rapidjson::Value& column_schema) { - auto schema_type = static_cast< ::doris::TPrimitiveType::type>(column_schema["type"].GetInt()); - switch (schema_type) { - case TPrimitiveType::INT: - case TPrimitiveType::STRING: - case TPrimitiveType::BIGINT: - case TPrimitiveType::BOOLEAN: - case TPrimitiveType::DOUBLE: - case TPrimitiveType::FLOAT: - case TPrimitiveType::BINARY: - return DataTypeFactory::instance().create_data_type(thrift_to_type(schema_type), true); - case TPrimitiveType::ARRAY: { - const rapidjson::Value& childColumns = column_schema["childColumns"]; - return make_nullable(std::make_shared( - make_nullable(convert_to_doris_type(childColumns[0])))); - } - case TPrimitiveType::MAP: { - const rapidjson::Value& childColumns = column_schema["childColumns"]; - return make_nullable(std::make_shared( - DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_STRING, true), - make_nullable(convert_to_doris_type(childColumns[1])))); - } - case TPrimitiveType::STRUCT: { - DataTypes res_data_types; - std::vector names; - const rapidjson::Value& childColumns = column_schema["childColumns"]; - for (auto i = 0; i < childColumns.Size(); i++) { - const rapidjson::Value& child = childColumns[i]; - res_data_types.push_back(make_nullable(convert_to_doris_type(child))); - names.push_back(std::string(child["name"].GetString())); - } - return make_nullable(std::make_shared(res_data_types, names)); - } - default: - throw Exception(Status::InternalError("Orc type is not supported!")); - return nullptr; - } -} - -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/format/avro/avro_jni_reader.h b/be/src/format/avro/avro_jni_reader.h deleted file mode 100644 index 861dce16126517..00000000000000 --- a/be/src/format/avro/avro_jni_reader.h +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "format/jni_reader.h" -#include "storage/olap_scan_common.h" - -namespace doris { -class RuntimeProfile; -class RuntimeState; -class SlotDescriptor; -class Block; -} // namespace doris - -namespace doris { -#include "common/compile_check_begin.h" -/** - * Read avro-format file - */ -class AvroJNIReader : public JniReader { - ENABLE_FACTORY_CREATOR(AvroJNIReader); - -public: - /** - * Call java side by jni to get table data. - */ - AvroJNIReader(RuntimeState* state, RuntimeProfile* profile, const TFileScanRangeParams& params, - const std::vector& file_slot_descs, const TFileRangeDesc& range); - - /** - * Call java side by jni to get table schema. - */ - AvroJNIReader(RuntimeProfile* profile, const TFileScanRangeParams& params, - const TFileRangeDesc& range, const std::vector& file_slot_descs); - - ~AvroJNIReader() override; - - Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; - - Status get_columns(std::unordered_map* name_to_type, - std::unordered_set* missing_cols) override; - - Status init_reader(); - - TFileType::type get_file_type() const; - - Status init_schema_reader() override; - - Status get_parsed_schema(std::vector* col_names, - std::vector* col_types) override; - - DataTypePtr convert_to_doris_type(const rapidjson::Value& column_schema); - -private: - const TFileScanRangeParams _params; - const TFileRangeDesc _range; -}; - -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/exec/connector/jni_connector.cpp b/be/src/format/jni/jni_data_bridge.cpp similarity index 63% rename from be/src/exec/connector/jni_connector.cpp rename to be/src/format/jni/jni_data_bridge.cpp index 6216500a29343f..d411922f464d41 100644 --- a/be/src/exec/connector/jni_connector.cpp +++ b/be/src/format/jni/jni_data_bridge.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/connector/jni_connector.h" +#include "jni_data_bridge.h" #include @@ -38,16 +38,9 @@ #include "core/data_type/primitive_type.h" #include "core/types.h" #include "core/value/decimalv2_value.h" -#include "jni.h" -#include "runtime/runtime_state.h" -#include "util/jni-util.h" namespace doris { #include "common/compile_check_begin.h" -class RuntimeProfile; -} // namespace doris - -namespace doris { #define FOR_FIXED_LENGTH_TYPES(M) \ M(PrimitiveType::TYPE_TINYINT, ColumnInt8, Int8) \ @@ -70,175 +63,7 @@ namespace doris { M(PrimitiveType::TYPE_IPV4, ColumnIPv4, IPv4) \ M(PrimitiveType::TYPE_IPV6, ColumnIPv6, IPv6) -Status JniConnector::open(RuntimeState* state, RuntimeProfile* profile) { - _state = state; - _profile = profile; - ADD_TIMER(_profile, _connector_name.c_str()); - _open_scanner_time = ADD_CHILD_TIMER(_profile, "OpenScannerTime", _connector_name.c_str()); - _java_scan_time = ADD_CHILD_TIMER(_profile, "JavaScanTime", _connector_name.c_str()); - _java_append_data_time = - ADD_CHILD_TIMER(_profile, "JavaAppendDataTime", _connector_name.c_str()); - _java_create_vector_table_time = - ADD_CHILD_TIMER(_profile, "JavaCreateVectorTableTime", _connector_name.c_str()); - _fill_block_time = ADD_CHILD_TIMER(_profile, "FillBlockTime", _connector_name.c_str()); - _max_time_split_weight_counter = _profile->add_conditition_counter( - "MaxTimeSplitWeight", TUnit::UNIT, [](int64_t _c, int64_t c) { return c > _c; }, - _connector_name.c_str()); - _java_scan_watcher = 0; - // cannot put the env into fields, because frames in an env object is limited - // to avoid limited frames in a thread, we should get local env in a method instead of in whole object. - JNIEnv* env = nullptr; - int batch_size = 0; - if (!_is_table_schema) { - batch_size = _state->batch_size(); - } - RETURN_IF_ERROR(Jni::Env::Get(&env)); - SCOPED_RAW_TIMER(&_jni_scanner_open_watcher); - _scanner_params.emplace("time_zone", _state->timezone()); - RETURN_IF_ERROR(_init_jni_scanner(env, batch_size)); - // Call org.apache.doris.common.jni.JniScanner#open - RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_open).call()); - - RETURN_ERROR_IF_EXC(env); - _scanner_opened = true; - return Status::OK(); -} - -Status JniConnector::init() { - return Status::OK(); -} - -Status JniConnector::get_next_block(Block* block, size_t* read_rows, bool* eof) { - // Call org.apache.doris.common.jni.JniScanner#getNextBatchMeta - // return the address of meta information - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - long meta_address = 0; - { - SCOPED_RAW_TIMER(&_java_scan_watcher); - RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_next_batch) - .call(&meta_address)); - } - if (meta_address == 0) { - // Address == 0 when there's no data in scanner - *read_rows = 0; - *eof = true; - return Status::OK(); - } - _set_meta(meta_address); - long num_rows = _table_meta.next_meta_as_long(); - if (num_rows == 0) { - *read_rows = 0; - *eof = true; - return Status::OK(); - } - RETURN_IF_ERROR(_fill_block(block, num_rows)); - *read_rows = num_rows; - *eof = false; - RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call()); - _has_read += num_rows; - return Status::OK(); -} - -Status JniConnector::get_table_schema(std::string& table_schema_str) { - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - - Jni::LocalString jstr; - RETURN_IF_ERROR( - _jni_scanner_obj.call_object_method(env, _jni_scanner_get_table_schema).call(&jstr)); - Jni::LocalStringBufferGuard cstr; - RETURN_IF_ERROR(jstr.get_string_chars(env, &cstr)); - table_schema_str = std::string {cstr.get()}; // copy to std::string - return Status::OK(); -} - -Status JniConnector::get_statistics(JNIEnv* env, std::map* result) { - result->clear(); - Jni::LocalObject metrics; - RETURN_IF_ERROR( - _jni_scanner_obj.call_object_method(env, _jni_scanner_get_statistics).call(&metrics)); - - RETURN_IF_ERROR(Jni::Util::convert_to_cpp_map(env, metrics, result)); - return Status::OK(); -} - -Status JniConnector::close() { - if (!_closed) { - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - if (_scanner_opened) { - COUNTER_UPDATE(_open_scanner_time, _jni_scanner_open_watcher); - COUNTER_UPDATE(_fill_block_time, _fill_block_watcher); - - RETURN_ERROR_IF_EXC(env); - int64_t _append = 0; - RETURN_IF_ERROR( - _jni_scanner_obj.call_long_method(env, _jni_scanner_get_append_data_time) - .call(&_append)); - - COUNTER_UPDATE(_java_append_data_time, _append); - - int64_t _create = 0; - RETURN_IF_ERROR( - _jni_scanner_obj - .call_long_method(env, _jni_scanner_get_create_vector_table_time) - .call(&_create)); - - COUNTER_UPDATE(_java_create_vector_table_time, _create); - - COUNTER_UPDATE(_java_scan_time, _java_scan_watcher - _append - _create); - - _max_time_split_weight_counter->conditional_update( - _jni_scanner_open_watcher + _fill_block_watcher + _java_scan_watcher, - _self_split_weight); - - // _fill_block may be failed and returned, we should release table in close. - // org.apache.doris.common.jni.JniScanner#releaseTable is idempotent - RETURN_IF_ERROR( - _jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call()); - RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_close).call()); - } - } - return Status::OK(); -} - -Status JniConnector::_init_jni_scanner(JNIEnv* env, int batch_size) { - RETURN_IF_ERROR( - Jni::Util::get_jni_scanner_class(env, _connector_class.c_str(), &_jni_scanner_cls)); - - Jni::MethodId scanner_constructor; - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "", "(ILjava/util/Map;)V", - &scanner_constructor)); - - // prepare constructor parameters - Jni::LocalObject hashmap_object; - RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, _scanner_params, &hashmap_object)); - RETURN_IF_ERROR(_jni_scanner_cls.new_object(env, scanner_constructor) - .with_arg(batch_size) - .with_arg(hashmap_object) - .call(&_jni_scanner_obj)); - - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "open", "()V", &_jni_scanner_open)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getNextBatchMeta", "()J", - &_jni_scanner_get_next_batch)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getAppendDataTime", "()J", - &_jni_scanner_get_append_data_time)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getCreateVectorTableTime", "()J", - &_jni_scanner_get_create_vector_table_time)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getTableSchema", "()Ljava/lang/String;", - &_jni_scanner_get_table_schema)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "close", "()V", &_jni_scanner_close)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "releaseColumn", "(I)V", - &_jni_scanner_release_column)); - RETURN_IF_ERROR( - _jni_scanner_cls.get_method(env, "releaseTable", "()V", &_jni_scanner_release_table)); - RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getStatistics", "()Ljava/util/Map;", - &_jni_scanner_get_statistics)); - return Status::OK(); -} - -Status JniConnector::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) { +Status JniDataBridge::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) { if (table_address == 0) { return Status::InternalError("table_address is 0"); } @@ -268,31 +93,12 @@ Status JniConnector::fill_block(Block* block, const ColumnNumbers& arguments, lo auto& column_with_type_and_name = block->get_by_position(i); auto& column_ptr = column_with_type_and_name.column; auto& column_type = column_with_type_and_name.type; - RETURN_IF_ERROR(_fill_column(table_meta, column_ptr, column_type, num_rows)); + RETURN_IF_ERROR(fill_column(table_meta, column_ptr, column_type, num_rows)); } return Status::OK(); } -Status JniConnector::_fill_block(Block* block, size_t num_rows) { - SCOPED_RAW_TIMER(&_fill_block_watcher); - JNIEnv* env = nullptr; - RETURN_IF_ERROR(Jni::Env::Get(&env)); - for (int i = 0; i < _column_names.size(); ++i) { - auto& column_with_type_and_name = - block->get_by_position(_col_name_to_block_idx->at(_column_names[i])); - auto& column_ptr = column_with_type_and_name.column; - auto& column_type = column_with_type_and_name.type; - RETURN_IF_ERROR(_fill_column(_table_meta, column_ptr, column_type, num_rows)); - // Column is not released when _fill_column failed. It will be released when releasing table. - RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_column) - .with_arg(i) - .call()); - RETURN_ERROR_IF_EXC(env); - } - return Status::OK(); -} - -Status JniConnector::_fill_column(TableMetaAddress& address, ColumnPtr& doris_column, +Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_column, DataTypePtr& data_type, size_t num_rows) { auto logical_type = data_type->get_primitive_type(); void* null_map_ptr = address.next_meta_as_ptr(); @@ -314,8 +120,6 @@ Status JniConnector::_fill_column(TableMetaAddress& address, ColumnPtr& doris_co } // Date and DateTime are deprecated and not supported. switch (logical_type) { - //FIXME: in Doris we check data then insert. jdbc external table may have some data invalid for doris. - // should add check otherwise it may break some of our assumption now. #define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ case TYPE_INDEX: \ return _fill_fixed_length_column( \ @@ -342,8 +146,8 @@ Status JniConnector::_fill_column(TableMetaAddress& address, ColumnPtr& doris_co return Status::OK(); } -Status JniConnector::_fill_varbinary_column(TableMetaAddress& address, - MutableColumnPtr& doris_column, size_t num_rows) { +Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address, + MutableColumnPtr& doris_column, size_t num_rows) { auto* meta_base = reinterpret_cast(address.next_meta_as_ptr()); auto& varbinary_col = assert_cast(*doris_column); // Java side writes per-row metadata as 16 bytes: [len: long][addr: long] @@ -364,8 +168,8 @@ Status JniConnector::_fill_varbinary_column(TableMetaAddress& address, return Status::OK(); } -Status JniConnector::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - size_t num_rows) { +Status JniDataBridge::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + size_t num_rows) { const auto& string_col = static_cast(*doris_column); auto& string_chars = const_cast(string_col.get_chars()); auto& string_offsets = const_cast(string_col.get_offsets()); @@ -393,8 +197,8 @@ Status JniConnector::_fill_string_column(TableMetaAddress& address, MutableColum return Status::OK(); } -Status JniConnector::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows) { +Status JniDataBridge::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + DataTypePtr& data_type, size_t num_rows) { ColumnPtr& element_column = static_cast(*doris_column).get_data_ptr(); DataTypePtr& element_type = const_cast( (assert_cast(remove_nullable(data_type).get())) @@ -409,14 +213,12 @@ Status JniConnector::_fill_array_column(TableMetaAddress& address, MutableColumn offsets_data[origin_size + i] = offsets[i] + start_offset; } - // offsets[num_rows - 1] == offsets_data[origin_size + num_rows - 1] - start_offset - // but num_row equals 0 when there are all empty arrays - return _fill_column(address, element_column, element_type, - offsets_data[origin_size + num_rows - 1] - start_offset); + return fill_column(address, element_column, element_type, + offsets_data[origin_size + num_rows - 1] - start_offset); } -Status JniConnector::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows) { +Status JniDataBridge::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + DataTypePtr& data_type, size_t num_rows) { auto& map = static_cast(*doris_column); DataTypePtr& key_type = const_cast( reinterpret_cast(remove_nullable(data_type).get())->get_key_type()); @@ -435,27 +237,27 @@ Status JniConnector::_fill_map_column(TableMetaAddress& address, MutableColumnPt map_offsets[origin_size + i] = offsets[i] + start_offset; } - RETURN_IF_ERROR(_fill_column(address, key_column, key_type, - map_offsets[origin_size + num_rows - 1] - start_offset)); - RETURN_IF_ERROR(_fill_column(address, value_column, value_type, - map_offsets[origin_size + num_rows - 1] - start_offset)); + RETURN_IF_ERROR(fill_column(address, key_column, key_type, + map_offsets[origin_size + num_rows - 1] - start_offset)); + RETURN_IF_ERROR(fill_column(address, value_column, value_type, + map_offsets[origin_size + num_rows - 1] - start_offset)); return Status::OK(); } -Status JniConnector::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, - DataTypePtr& data_type, size_t num_rows) { +Status JniDataBridge::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + DataTypePtr& data_type, size_t num_rows) { auto& doris_struct = static_cast(*doris_column); const DataTypeStruct* doris_struct_type = reinterpret_cast(remove_nullable(data_type).get()); for (int i = 0; i < doris_struct.tuple_size(); ++i) { ColumnPtr& struct_field = doris_struct.get_column_ptr(i); DataTypePtr& field_type = const_cast(doris_struct_type->get_element(i)); - RETURN_IF_ERROR(_fill_column(address, struct_field, field_type, num_rows)); + RETURN_IF_ERROR(fill_column(address, struct_field, field_type, num_rows)); } return Status::OK(); } -std::string JniConnector::get_jni_type(const DataTypePtr& data_type) { +std::string JniDataBridge::get_jni_type(const DataTypePtr& data_type) { DataTypePtr type = remove_nullable(data_type); std::ostringstream buffer; switch (type->get_primitive_type()) { @@ -545,12 +347,21 @@ std::string JniConnector::get_jni_type(const DataTypePtr& data_type) { } case TYPE_VARBINARY: return "varbinary"; + // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI + case TYPE_BITMAP: + [[fallthrough]]; + case TYPE_HLL: + [[fallthrough]]; + case TYPE_QUANTILE_STATE: + [[fallthrough]]; + case TYPE_JSONB: + return "string"; default: return "unsupported"; } } -std::string JniConnector::get_jni_type_with_different_string(const DataTypePtr& data_type) { +std::string JniDataBridge::get_jni_type_with_different_string(const DataTypePtr& data_type) { DataTypePtr type = remove_nullable(data_type); std::ostringstream buffer; switch (data_type->get_primitive_type()) { @@ -656,13 +467,22 @@ std::string JniConnector::get_jni_type_with_different_string(const DataTypePtr& << get_jni_type_with_different_string(type_map->get_value_type()) << ">"; return buffer.str(); } + // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI + case TYPE_BITMAP: + [[fallthrough]]; + case TYPE_HLL: + [[fallthrough]]; + case TYPE_QUANTILE_STATE: + [[fallthrough]]; + case TYPE_JSONB: + return "string"; default: return "unsupported"; } } -Status JniConnector::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, - std::vector& meta_data) { +Status JniDataBridge::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, + std::vector& meta_data) { auto logical_type = data_type->get_primitive_type(); const IColumn* column = nullptr; // insert const flag @@ -700,7 +520,7 @@ Status JniConnector::_fill_column_meta(const ColumnPtr& doris_column, const Data [[fallthrough]]; case PrimitiveType::TYPE_VARCHAR: { const auto& string_column = assert_cast(*data_column); - // inert offsets + // insert offsets meta_data.emplace_back((long)string_column.get_offsets().data()); meta_data.emplace_back((long)string_column.get_chars().data()); break; @@ -753,7 +573,7 @@ Status JniConnector::_fill_column_meta(const ColumnPtr& doris_column, const Data return Status::OK(); } -Status JniConnector::to_java_table(Block* block, std::unique_ptr& meta) { +Status JniDataBridge::to_java_table(Block* block, std::unique_ptr& meta) { ColumnNumbers arguments; for (size_t i = 0; i < block->columns(); ++i) { arguments.emplace_back(i); @@ -761,8 +581,8 @@ Status JniConnector::to_java_table(Block* block, std::unique_ptr& meta) return to_java_table(block, block->rows(), arguments, meta); } -Status JniConnector::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments, - std::unique_ptr& meta) { +Status JniDataBridge::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments, + std::unique_ptr& meta) { std::vector meta_data; // insert number of rows meta_data.emplace_back(num_rows); @@ -777,16 +597,13 @@ Status JniConnector::to_java_table(Block* block, size_t num_rows, const ColumnNu return Status::OK(); } -std::pair JniConnector::parse_table_schema(Block* block, - const ColumnNumbers& arguments, - bool ignore_column_name) { +std::pair JniDataBridge::parse_table_schema( + Block* block, const ColumnNumbers& arguments, bool ignore_column_name) { // prepare table schema std::ostringstream required_fields; std::ostringstream columns_types; for (int i = 0; i < arguments.size(); ++i) { - // column name maybe empty or has special characters - // std::string field = block->get_by_position(i).name; - std::string type = JniConnector::get_jni_type(block->get_by_position(arguments[i]).type); + std::string type = JniDataBridge::get_jni_type(block->get_by_position(arguments[i]).type); if (i == 0) { if (ignore_column_name) { required_fields << "_col_" << arguments[i]; @@ -807,7 +624,7 @@ std::pair JniConnector::parse_table_schema(Block* bloc return std::make_pair(required_fields.str(), columns_types.str()); } -std::pair JniConnector::parse_table_schema(Block* block) { +std::pair JniDataBridge::parse_table_schema(Block* block) { ColumnNumbers arguments; for (size_t i = 0; i < block->columns(); ++i) { arguments.emplace_back(i); @@ -815,47 +632,5 @@ std::pair JniConnector::parse_table_schema(Block* bloc return parse_table_schema(block, arguments, true); } -void JniConnector::_collect_profile_before_close() { - if (_scanner_opened && _profile != nullptr) { - JNIEnv* env = nullptr; - Status st = Jni::Env::Get(&env); - if (!st) { - LOG(WARNING) << "failed to get jni env when collect profile: " << st; - return; - } - // update scanner metrics - std::map statistics_result; - st = get_statistics(env, &statistics_result); - if (!st) { - LOG(WARNING) << "failed to get_statistics when collect profile: " << st; - return; - } - - for (const auto& metric : statistics_result) { - std::vector type_and_name = split(metric.first, ":"); - if (type_and_name.size() != 2) { - LOG(WARNING) << "Name of JNI Scanner metric should be pattern like " - << "'metricType:metricName'"; - continue; - } - long metric_value = std::stol(metric.second); - RuntimeProfile::Counter* scanner_counter; - if (type_and_name[0] == "timer") { - scanner_counter = - ADD_CHILD_TIMER(_profile, type_and_name[1], _connector_name.c_str()); - } else if (type_and_name[0] == "counter") { - scanner_counter = ADD_CHILD_COUNTER(_profile, type_and_name[1], TUnit::UNIT, - _connector_name.c_str()); - } else if (type_and_name[0] == "bytes") { - scanner_counter = ADD_CHILD_COUNTER(_profile, type_and_name[1], TUnit::BYTES, - _connector_name.c_str()); - } else { - LOG(WARNING) << "Type of JNI Scanner metric should be timer, counter or bytes"; - continue; - } - COUNTER_UPDATE(scanner_counter, metric_value); - } - } -} #include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/format/jni/jni_data_bridge.h b/be/src/format/jni/jni_data_bridge.h new file mode 100644 index 00000000000000..e5999947c7fc47 --- /dev/null +++ b/be/src/format/jni/jni_data_bridge.h @@ -0,0 +1,239 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/status.h" +#include "core/column/column_decimal.h" +#include "core/data_type/data_type.h" +#include "core/data_type/define_primitive_type.h" +#include "core/data_type/primitive_type.h" +#include "core/string_ref.h" +#include "core/types.h" +#include "exprs/aggregate/aggregate_function.h" + +namespace doris { +#include "common/compile_check_begin.h" + +class Block; +template +class ColumnDecimal; +template +class ColumnVector; + +/** + * JniDataBridge is a stateless utility class that handles data exchange + * between C++ Blocks and Java-side shared memory via JNI. + * + * It is data-source agnostic — it only cares about data types and Block structure. + * All methods are static. + * + * This class was extracted from JniConnector to separate the data exchange + * concerns from the JNI scanner lifecycle management. + */ +class JniDataBridge { +public: + /** + * Helper class to read metadata from the address returned by Java side. + * The metadata is stored as a long array in shared memory. + */ + class TableMetaAddress { + private: + long* _meta_ptr; + int _meta_index; + + public: + TableMetaAddress() { + _meta_ptr = nullptr; + _meta_index = 0; + } + + TableMetaAddress(long meta_addr) { + _meta_ptr = static_cast(reinterpret_cast(meta_addr)); + _meta_index = 0; + } + + void set_meta(long meta_addr) { + _meta_ptr = static_cast(reinterpret_cast(meta_addr)); + _meta_index = 0; + } + + long next_meta_as_long() { return _meta_ptr[_meta_index++]; } + + void* next_meta_as_ptr() { return reinterpret_cast(_meta_ptr[_meta_index++]); } + }; + + // ========================================================================= + // Read direction: Java shared memory → C++ Block + // ========================================================================= + + /** + * Fill specified columns in a Block from a Java-side table address. + * The table_address points to metadata returned by Java JniScanner/JdbcExecutor. + */ + static Status fill_block(Block* block, const ColumnNumbers& arguments, long table_address); + + /** + * Fill a single column from a TableMetaAddress. Supports all Doris types + * including nested types (Array, Map, Struct). + */ + static Status fill_column(TableMetaAddress& address, ColumnPtr& doris_column, + const DataTypePtr& data_type, size_t num_rows); + + // ========================================================================= + // Write direction: C++ Block → Java shared memory + // ========================================================================= + + /** + * Serialize all columns of a Block into a long[] metadata array + * that Java side can read via VectorTable.createReadableTable(). + */ + static Status to_java_table(Block* block, std::unique_ptr& meta); + + /** + * Serialize specified columns of a Block into a long[] metadata array. + */ + static Status to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments, + std::unique_ptr& meta); + + /** + * Parse Block schema into JNI format strings. + * Returns (required_fields, columns_types) pair. + */ + static std::pair parse_table_schema(Block* block); + + static std::pair parse_table_schema(Block* block, + const ColumnNumbers& arguments, + bool ignore_column_name = true); + + // ========================================================================= + // Type mapping + // ========================================================================= + + /** + * Convert a Doris DataType to its JNI type string representation. + * e.g., TYPE_INT -> "int", TYPE_DECIMAL128I -> "decimal128(p,s)" + */ + static std::string get_jni_type(const DataTypePtr& data_type); + + /** + * Like get_jni_type but preserves varchar/char length info in the type string. + * e.g., TYPE_VARCHAR -> "varchar(len)" instead of just "string" + */ + static std::string get_jni_type_with_different_string(const DataTypePtr& data_type); + +private: + // Column fill helpers for various types + static Status _fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + size_t num_rows); + + static Status _fill_varbinary_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + size_t num_rows); + + static Status _fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + const DataTypePtr& data_type, size_t num_rows); + + static Status _fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + const DataTypePtr& data_type, size_t num_rows); + + static Status _fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, + const DataTypePtr& data_type, size_t num_rows); + + /** + * Fill column metadata (addresses) for a single column, used by to_java_table. + */ + static Status _fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, + std::vector& meta_data); + + // Fixed-length column fill specializations + template + requires(!std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) + static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, + size_t num_rows) { + auto& column_data = assert_cast(*doris_column).get_data(); + size_t origin_size = column_data.size(); + column_data.resize(origin_size + num_rows); + memcpy(column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); + return Status::OK(); + } + + template + requires(std::is_same_v || + std::is_same_v) + static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, + size_t num_rows) { + auto& column_data = assert_cast(*doris_column).get_data(); + size_t origin_size = column_data.size(); + column_data.resize(origin_size + num_rows); + memcpy((int64_t*)column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); + return Status::OK(); + } + + template + requires(std::is_same_v) + static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, + size_t num_rows) { + auto& column_data = assert_cast(*doris_column).get_data(); + size_t origin_size = column_data.size(); + column_data.resize(origin_size + num_rows); + memcpy((uint32_t*)column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); + return Status::OK(); + } + + template + requires(std::is_same_v || + std::is_same_v) + static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, + size_t num_rows) { + auto& column_data = assert_cast(*doris_column).get_data(); + size_t origin_size = column_data.size(); + column_data.resize(origin_size + num_rows); + memcpy((uint64_t*)column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); + return Status::OK(); + } + + template + requires(std::is_same_v) + static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, + size_t num_rows) { + auto& column_data = assert_cast(*doris_column).get_data(); + size_t origin_size = column_data.size(); + column_data.resize(origin_size + num_rows); + for (size_t i = 0; i < num_rows; i++) { + column_data[origin_size + i] = DecimalV2Value(ptr[i]); + } + return Status::OK(); + } + + template + static long _get_fixed_length_column_address(const IColumn& doris_column) { + return (long)assert_cast(doris_column).get_data().data(); + } +}; + +#include "common/compile_check_end.h" +} // namespace doris diff --git a/be/src/format/jni/jni_reader.cpp b/be/src/format/jni/jni_reader.cpp new file mode 100644 index 00000000000000..22e26d829c2010 --- /dev/null +++ b/be/src/format/jni/jni_reader.cpp @@ -0,0 +1,384 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "jni_reader.h" + +#include + +#include +#include +#include + +#include "core/block/block.h" +#include "core/types.h" +#include "format/jni/jni_data_bridge.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/jni-util.h" + +namespace doris { +#include "common/compile_check_begin.h" +class RuntimeProfile; +class RuntimeState; + +class Block; +} // namespace doris + +namespace doris { + +const std::vector JniReader::_s_empty_slot_descs; + +// ========================================================================= +// JniReader constructors +// ========================================================================= + +JniReader::JniReader(const std::vector& file_slot_descs, RuntimeState* state, + RuntimeProfile* profile, std::string connector_class, + std::map scanner_params, + std::vector column_names, int64_t self_split_weight) + : _file_slot_descs(file_slot_descs), + _state(state), + _profile(profile), + _connector_class(std::move(connector_class)), + _scanner_params(std::move(scanner_params)), + _column_names(std::move(column_names)), + _self_split_weight(static_cast(self_split_weight)) { + _connector_name = split(_connector_class, "/").back(); +} + +JniReader::JniReader(std::string connector_class, std::map scanner_params) + : _file_slot_descs(_s_empty_slot_descs), + _connector_class(std::move(connector_class)), + _scanner_params(std::move(scanner_params)) { + _is_table_schema = true; + _connector_name = split(_connector_class, "/").back(); +} + +// ========================================================================= +// JniReader::open (merged from JniConnector::open) +// ========================================================================= + +Status JniReader::open(RuntimeState* state, RuntimeProfile* profile) { + _state = state; + _profile = profile; + if (_profile) { + ADD_TIMER(_profile, _connector_name.c_str()); + _open_scanner_time = ADD_CHILD_TIMER(_profile, "OpenScannerTime", _connector_name.c_str()); + _java_scan_time = ADD_CHILD_TIMER(_profile, "JavaScanTime", _connector_name.c_str()); + _java_append_data_time = + ADD_CHILD_TIMER(_profile, "JavaAppendDataTime", _connector_name.c_str()); + _java_create_vector_table_time = + ADD_CHILD_TIMER(_profile, "JavaCreateVectorTableTime", _connector_name.c_str()); + _fill_block_time = ADD_CHILD_TIMER(_profile, "FillBlockTime", _connector_name.c_str()); + _max_time_split_weight_counter = _profile->add_conditition_counter( + "MaxTimeSplitWeight", TUnit::UNIT, [](int64_t _c, int64_t c) { return c > _c; }, + _connector_name.c_str()); + } + _java_scan_watcher = 0; + + JNIEnv* env = nullptr; + int batch_size = 0; + if (!_is_table_schema && _state) { + batch_size = _state->batch_size(); + } + RETURN_IF_ERROR(Jni::Env::Get(&env)); + SCOPED_RAW_TIMER(&_jni_scanner_open_watcher); + if (_state) { + _scanner_params.emplace("time_zone", _state->timezone()); + } + RETURN_IF_ERROR(_init_jni_scanner(env, batch_size)); + // Call org.apache.doris.common.jni.JniScanner#open + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_open).call()); + + RETURN_ERROR_IF_EXC(env); + _scanner_opened = true; + return Status::OK(); +} + +// ========================================================================= +// JniReader::get_next_block (merged from JniConnector::get_next_block) +// ========================================================================= + +Status JniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + long meta_address = 0; + { + SCOPED_RAW_TIMER(&_java_scan_watcher); + RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_next_batch) + .call(&meta_address)); + } + if (meta_address == 0) { + *read_rows = 0; + *eof = true; + return Status::OK(); + } + _set_meta(meta_address); + long num_rows = _table_meta.next_meta_as_long(); + if (num_rows == 0) { + *read_rows = 0; + *eof = true; + return Status::OK(); + } + RETURN_IF_ERROR(_fill_block(block, num_rows)); + *read_rows = num_rows; + *eof = false; + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call()); + _has_read += num_rows; + return Status::OK(); +} + +// ========================================================================= +// JniReader::get_table_schema (merged from JniConnector::get_table_schema) +// ========================================================================= + +Status JniReader::get_table_schema(std::string& table_schema_str) { + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + + Jni::LocalString jstr; + RETURN_IF_ERROR( + _jni_scanner_obj.call_object_method(env, _jni_scanner_get_table_schema).call(&jstr)); + Jni::LocalStringBufferGuard cstr; + RETURN_IF_ERROR(jstr.get_string_chars(env, &cstr)); + table_schema_str = std::string {cstr.get()}; + return Status::OK(); +} + +// ========================================================================= +// JniReader::close (merged from JniConnector::close) +// ========================================================================= + +Status JniReader::close() { + if (!_closed) { + _closed = true; + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + if (_scanner_opened) { + if (_profile) { + COUNTER_UPDATE(_open_scanner_time, _jni_scanner_open_watcher); + COUNTER_UPDATE(_fill_block_time, _fill_block_watcher); + } + + RETURN_ERROR_IF_EXC(env); + jlong _append = 0; + RETURN_IF_ERROR( + _jni_scanner_obj.call_long_method(env, _jni_scanner_get_append_data_time) + .call(&_append)); + + if (_profile) { + COUNTER_UPDATE(_java_append_data_time, _append); + } + + jlong _create = 0; + RETURN_IF_ERROR( + _jni_scanner_obj + .call_long_method(env, _jni_scanner_get_create_vector_table_time) + .call(&_create)); + + if (_profile) { + COUNTER_UPDATE(_java_create_vector_table_time, _create); + COUNTER_UPDATE(_java_scan_time, _java_scan_watcher - _append - _create); + _max_time_split_weight_counter->conditional_update( + _jni_scanner_open_watcher + _fill_block_watcher + _java_scan_watcher, + _self_split_weight); + } + + // _fill_block may be failed and returned, we should release table in close. + // org.apache.doris.common.jni.JniScanner#releaseTable is idempotent + RETURN_IF_ERROR( + _jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call()); + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_close).call()); + } + } + return Status::OK(); +} + +// ========================================================================= +// JniReader::_init_jni_scanner (merged from JniConnector::_init_jni_scanner) +// ========================================================================= + +Status JniReader::_init_jni_scanner(JNIEnv* env, int batch_size) { + RETURN_IF_ERROR( + Jni::Util::get_jni_scanner_class(env, _connector_class.c_str(), &_jni_scanner_cls)); + + Jni::MethodId scanner_constructor; + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "", "(ILjava/util/Map;)V", + &scanner_constructor)); + + // prepare constructor parameters + Jni::LocalObject hashmap_object; + RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, _scanner_params, &hashmap_object)); + RETURN_IF_ERROR(_jni_scanner_cls.new_object(env, scanner_constructor) + .with_arg(batch_size) + .with_arg(hashmap_object) + .call(&_jni_scanner_obj)); + + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "open", "()V", &_jni_scanner_open)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getNextBatchMeta", "()J", + &_jni_scanner_get_next_batch)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getAppendDataTime", "()J", + &_jni_scanner_get_append_data_time)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getCreateVectorTableTime", "()J", + &_jni_scanner_get_create_vector_table_time)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getTableSchema", "()Ljava/lang/String;", + &_jni_scanner_get_table_schema)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "close", "()V", &_jni_scanner_close)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "releaseColumn", "(I)V", + &_jni_scanner_release_column)); + RETURN_IF_ERROR( + _jni_scanner_cls.get_method(env, "releaseTable", "()V", &_jni_scanner_release_table)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getStatistics", "()Ljava/util/Map;", + &_jni_scanner_get_statistics)); + return Status::OK(); +} + +// ========================================================================= +// JniReader::_fill_block (merged from JniConnector::_fill_block) +// ========================================================================= + +Status JniReader::_fill_block(Block* block, size_t num_rows) { + SCOPED_RAW_TIMER(&_fill_block_watcher); + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + // Fallback: if _col_name_to_block_idx was not set by the caller (e.g. JdbcScanner), + // build the name-to-position map from the block itself. + std::unordered_map local_name_to_idx; + const std::unordered_map* col_map = _col_name_to_block_idx; + if (col_map == nullptr) { + local_name_to_idx = block->get_name_to_pos_map(); + col_map = &local_name_to_idx; + } + for (int i = 0; i < _column_names.size(); ++i) { + auto& column_with_type_and_name = block->get_by_position(col_map->at(_column_names[i])); + auto& column_ptr = column_with_type_and_name.column; + auto& column_type = column_with_type_and_name.type; + RETURN_IF_ERROR(JniDataBridge::fill_column(_table_meta, column_ptr, column_type, num_rows)); + // Column is not released when fill_column failed. It will be released when releasing table. + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_column) + .with_arg(i) + .call()); + RETURN_ERROR_IF_EXC(env); + } + return Status::OK(); +} + +// ========================================================================= +// JniReader::_get_statistics (merged from JniConnector::get_statistics) +// ========================================================================= + +Status JniReader::_get_statistics(JNIEnv* env, std::map* result) { + result->clear(); + Jni::LocalObject metrics; + RETURN_IF_ERROR( + _jni_scanner_obj.call_object_method(env, _jni_scanner_get_statistics).call(&metrics)); + + RETURN_IF_ERROR(Jni::Util::convert_to_cpp_map(env, metrics, result)); + return Status::OK(); +} + +// ========================================================================= +// JniReader::_collect_profile_before_close +// (merged from JniConnector::_collect_profile_before_close) +// ========================================================================= + +void JniReader::_collect_profile_before_close() { + if (_scanner_opened && _profile != nullptr) { + JNIEnv* env = nullptr; + Status st = Jni::Env::Get(&env); + if (!st) { + LOG(WARNING) << "failed to get jni env when collect profile: " << st; + return; + } + // update scanner metrics + std::map statistics_result; + st = _get_statistics(env, &statistics_result); + if (!st) { + LOG(WARNING) << "failed to get_statistics when collect profile: " << st; + return; + } + + for (const auto& metric : statistics_result) { + std::vector type_and_name = split(metric.first, ":"); + if (type_and_name.size() != 2) { + LOG(WARNING) << "Name of JNI Scanner metric should be pattern like " + << "'metricType:metricName'"; + continue; + } + long metric_value = std::stol(metric.second); + RuntimeProfile::Counter* scanner_counter; + if (type_and_name[0] == "timer") { + scanner_counter = + ADD_CHILD_TIMER(_profile, type_and_name[1], _connector_name.c_str()); + } else if (type_and_name[0] == "counter") { + scanner_counter = ADD_CHILD_COUNTER(_profile, type_and_name[1], TUnit::UNIT, + _connector_name.c_str()); + } else if (type_and_name[0] == "bytes") { + scanner_counter = ADD_CHILD_COUNTER(_profile, type_and_name[1], TUnit::BYTES, + _connector_name.c_str()); + } else { + LOG(WARNING) << "Type of JNI Scanner metric should be timer, counter or bytes"; + continue; + } + COUNTER_UPDATE(scanner_counter, metric_value); + } + } +} + +// ========================================================================= +// MockJniReader +// ========================================================================= + +MockJniReader::MockJniReader(const std::vector& file_slot_descs, + RuntimeState* state, RuntimeProfile* profile) + : JniReader( + file_slot_descs, state, profile, "org/apache/doris/common/jni/MockJniScanner", + [&]() { + std::ostringstream required_fields; + std::ostringstream columns_types; + int index = 0; + for (const auto& desc : file_slot_descs) { + std::string field = desc->col_name(); + std::string type = + JniDataBridge::get_jni_type_with_different_string(desc->type()); + if (index == 0) { + required_fields << field; + columns_types << type; + } else { + required_fields << "," << field; + columns_types << "#" << type; + } + index++; + } + return std::map {{"mock_rows", "10240"}, + {"required_fields", required_fields.str()}, + {"columns_types", columns_types.str()}}; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }()) {} + +Status MockJniReader::init_reader() { + return open(_state, _profile); +} + +#include "common/compile_check_end.h" +} // namespace doris diff --git a/be/src/format/jni/jni_reader.h b/be/src/format/jni/jni_reader.h new file mode 100644 index 00000000000000..87c0c9c0d828e1 --- /dev/null +++ b/be/src/format/jni/jni_reader.h @@ -0,0 +1,206 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "format/generic_reader.h" +#include "format/jni/jni_data_bridge.h" +#include "runtime/runtime_profile.h" +#include "util/jni-util.h" +#include "util/profile_collector.h" +#include "util/string_util.h" + +namespace doris { +#include "common/compile_check_begin.h" +class RuntimeProfile; +class RuntimeState; +class SlotDescriptor; +class Block; +} // namespace doris + +namespace doris { + +/** + * JniReader is the base class for all JNI-based readers. It directly manages + * the JNI lifecycle (open/read/close) for Java scanners that extend + * org.apache.doris.common.jni.JniScanner. + * + * Subclasses only need to: + * 1. Build scanner_params/column_names in their constructor + * 2. Pass them to JniReader's constructor + * 3. Call open() in their init_reader() + * + * This class replaces the old JniConnector intermediary. + */ +class JniReader : public GenericReader { +public: + /** + * Constructor for scan mode. + * @param file_slot_descs Slot descriptors for the output columns + * @param state Runtime state + * @param profile Runtime profile for metrics + * @param connector_class Java scanner class path (e.g. "org/apache/doris/paimon/PaimonJniScanner") + * @param scanner_params Configuration map passed to Java scanner constructor + * @param column_names Fields to read (also the required_fields in scanner_params) + * @param self_split_weight Weight for this split (for profile conditition counter) + */ + JniReader(const std::vector& file_slot_descs, RuntimeState* state, + RuntimeProfile* profile, std::string connector_class, + std::map scanner_params, + std::vector column_names, int64_t self_split_weight = -1); + + /** + * Constructor for table-schema-only mode (no data reading). + * @param connector_class Java scanner class path + * @param scanner_params Configuration map passed to Java scanner constructor + */ + JniReader(std::string connector_class, std::map scanner_params); + + ~JniReader() override = default; + + /** + * Open the java scanner: set up profile counters, create Java object, + * get method IDs, and call JniScanner#open. + */ + Status open(RuntimeState* state, RuntimeProfile* profile); + + Status get_columns(std::unordered_map* name_to_type, + std::unordered_set* missing_cols) override { + for (const auto& desc : _file_slot_descs) { + name_to_type->emplace(desc->col_name(), desc->type()); + } + return Status::OK(); + } + + /** + * Read next batch from Java scanner and fill the block. + */ + virtual Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; + + /** + * Get table schema from Java scanner (used by Avro schema discovery). + */ + Status get_table_schema(std::string& table_schema_str); + + /** + * Close the scanner and release JNI resources. + */ + Status close() override; + + /** + * Set column name to block index map from FileScanner to avoid repeated map creation. + */ + void set_col_name_to_block_idx( + const std::unordered_map* col_name_to_block_idx) { + _col_name_to_block_idx = col_name_to_block_idx; + } + +protected: + void _collect_profile_before_close() override; + + /** + * Update scanner params and column names after construction. + * Used by Avro which builds params in init_reader/init_schema_reader + * rather than in the constructor. + */ + void _update_scanner_params(std::map params, + std::vector column_names) { + _scanner_params = std::move(params); + _column_names = std::move(column_names); + } + + const std::vector& _file_slot_descs; + RuntimeState* _state = nullptr; + RuntimeProfile* _profile = nullptr; + +private: + static const std::vector _s_empty_slot_descs; + + Status _init_jni_scanner(JNIEnv* env, int batch_size); + Status _fill_block(Block* block, size_t num_rows); + Status _get_statistics(JNIEnv* env, std::map* result); + + std::string _connector_name; + std::string _connector_class; + std::map _scanner_params; + std::vector _column_names; + int32_t _self_split_weight = -1; + bool _is_table_schema = false; + + RuntimeProfile::Counter* _open_scanner_time = nullptr; + RuntimeProfile::Counter* _java_scan_time = nullptr; + RuntimeProfile::Counter* _java_append_data_time = nullptr; + RuntimeProfile::Counter* _java_create_vector_table_time = nullptr; + RuntimeProfile::Counter* _fill_block_time = nullptr; + RuntimeProfile::ConditionCounter* _max_time_split_weight_counter = nullptr; + + int64_t _jni_scanner_open_watcher = 0; + int64_t _java_scan_watcher = 0; + int64_t _fill_block_watcher = 0; + + size_t _has_read = 0; + + bool _closed = false; + bool _scanner_opened = false; + + Jni::GlobalClass _jni_scanner_cls; + Jni::GlobalObject _jni_scanner_obj; + Jni::MethodId _jni_scanner_open; + Jni::MethodId _jni_scanner_get_append_data_time; + Jni::MethodId _jni_scanner_get_create_vector_table_time; + Jni::MethodId _jni_scanner_get_next_batch; + Jni::MethodId _jni_scanner_get_table_schema; + Jni::MethodId _jni_scanner_close; + Jni::MethodId _jni_scanner_release_column; + Jni::MethodId _jni_scanner_release_table; + Jni::MethodId _jni_scanner_get_statistics; + + JniDataBridge::TableMetaAddress _table_meta; + + // Column name to block index map, passed from FileScanner to avoid repeated map creation + const std::unordered_map* _col_name_to_block_idx = nullptr; + + void _set_meta(long meta_addr) { _table_meta.set_meta(meta_addr); } +}; + +/** + * The demo usage of JniReader, showing how to read data from java scanner. + * The java side is also a mock reader that provide values for each type. + * This class will only be retained during the functional testing phase to verify that + * the communication and data exchange with the jvm are correct. + */ +class MockJniReader : public JniReader { +public: + MockJniReader(const std::vector& file_slot_descs, RuntimeState* state, + RuntimeProfile* profile); + + ~MockJniReader() override = default; + + Status init_reader(); +}; + +#include "common/compile_check_end.h" +} // namespace doris diff --git a/be/src/format/jni_reader.cpp b/be/src/format/jni_reader.cpp deleted file mode 100644 index 79efe93f5a4db2..00000000000000 --- a/be/src/format/jni_reader.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "format/jni_reader.h" - -#include -#include - -#include "core/types.h" -#include "runtime/descriptors.h" - -namespace doris { -#include "common/compile_check_begin.h" -class RuntimeProfile; -class RuntimeState; - -class Block; -} // namespace doris - -namespace doris { - -MockJniReader::MockJniReader(const std::vector& file_slot_descs, - RuntimeState* state, RuntimeProfile* profile) - : JniReader(file_slot_descs, state, profile) { - std::ostringstream required_fields; - std::ostringstream columns_types; - std::vector column_names; - int index = 0; - for (const auto& desc : _file_slot_descs) { - std::string field = desc->col_name(); - std::string type = JniConnector::get_jni_type_with_different_string(desc->type()); - column_names.emplace_back(field); - if (index == 0) { - required_fields << field; - columns_types << type; - } else { - required_fields << "," << field; - columns_types << "#" << type; - } - index++; - } - std::map params = {{"mock_rows", "10240"}, - {"required_fields", required_fields.str()}, - {"columns_types", columns_types.str()}}; - _jni_connector = std::make_unique("org/apache/doris/common/jni/MockJniScanner", - params, column_names); -} - -Status MockJniReader::init_reader() { - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); -} -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/format/jni_reader.h b/be/src/format/jni_reader.h deleted file mode 100644 index 18506344c1d9c8..00000000000000 --- a/be/src/format/jni_reader.h +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "exec/connector/jni_connector.h" -#include "format/generic_reader.h" -#include "storage/olap_scan_common.h" - -namespace doris { -#include "common/compile_check_begin.h" -class RuntimeProfile; -class RuntimeState; -class SlotDescriptor; -class Block; -} // namespace doris - -namespace doris { - -class JniReader : public GenericReader { -public: - JniReader(const std::vector& file_slot_descs, RuntimeState* state, - RuntimeProfile* profile) - : _file_slot_descs(file_slot_descs), _state(state), _profile(profile) {}; - - ~JniReader() override = default; - - Status get_columns(std::unordered_map* name_to_type, - std::unordered_set* missing_cols) override { - for (const auto& desc : _file_slot_descs) { - name_to_type->emplace(desc->col_name(), desc->type()); - } - return Status::OK(); - } - - Status get_next_block(Block* block, size_t* read_rows, bool* eof) override { - return _jni_connector->get_next_block(block, read_rows, eof); - } - - Status close() override { - if (_jni_connector) { - return _jni_connector->close(); - } - return Status::OK(); - } - - void set_col_name_to_block_idx( - const std::unordered_map* col_name_to_block_idx) { - if (_jni_connector) { - _jni_connector->set_col_name_to_block_idx(col_name_to_block_idx); - } - } - -protected: - void _collect_profile_before_close() override { - if (_jni_connector) { - _jni_connector->collect_profile_before_close(); - } - } - - const std::vector& _file_slot_descs; - RuntimeState* _state = nullptr; - RuntimeProfile* _profile = nullptr; - std::unique_ptr _jni_connector; -}; - -/** - * The demo usage of JniReader, showing how to read data from java scanner. - * The java side is also a mock reader that provide values for each type. - * This class will only be retained during the functional testing phase to verify that - * the communication and data exchange with the jvm are correct. - */ -class MockJniReader : public JniReader { -public: - MockJniReader(const std::vector& file_slot_descs, RuntimeState* state, - RuntimeProfile* profile); - - ~MockJniReader() override = default; - - Status init_reader(); - - Status close() override { - if (_jni_connector) { - return _jni_connector->close(); - } - return Status::OK(); - } - -protected: - void _collect_profile_before_close() override { - if (_jni_connector != nullptr) { - _jni_connector->collect_profile_before_close(); - } - } -}; - -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/format/table/hudi_jni_reader.cpp b/be/src/format/table/hudi_jni_reader.cpp index 5c270524886f24..2df5c17bbbd210 100644 --- a/be/src/format/table/hudi_jni_reader.cpp +++ b/be/src/format/table/hudi_jni_reader.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "format/table/hudi_jni_reader.h" +#include "hudi_jni_reader.h" #include @@ -38,44 +38,45 @@ HudiJniReader::HudiJniReader(const TFileScanRangeParams& scan_params, const THudiFileDesc& hudi_params, const std::vector& file_slot_descs, RuntimeState* state, RuntimeProfile* profile) - : JniReader(file_slot_descs, state, profile), - _scan_params(scan_params), - _hudi_params(hudi_params) { - std::vector required_fields; - for (const auto& desc : _file_slot_descs) { - required_fields.emplace_back(desc->col_name()); - } - - std::map params = { - {"query_id", print_id(_state->query_id())}, - {"base_path", _hudi_params.base_path}, - {"data_file_path", _hudi_params.data_file_path}, - {"data_file_length", std::to_string(_hudi_params.data_file_length)}, - {"delta_file_paths", join(_hudi_params.delta_logs, ",")}, - {"hudi_column_names", join(_hudi_params.column_names, ",")}, - {"hudi_column_types", join(_hudi_params.column_types, "#")}, - {"required_fields", join(required_fields, ",")}, - {"instant_time", _hudi_params.instant_time}, - {"serde", _hudi_params.serde}, - {"input_format", _hudi_params.input_format}, - {"time_zone", state->timezone_obj().name()}}; - - // Use compatible hadoop client to read data - for (const auto& kv : _scan_params.properties) { - if (kv.first.starts_with(HOODIE_CONF_PREFIX)) { - params[kv.first] = kv.second; - } else { - params[HADOOP_CONF_PREFIX + kv.first] = kv.second; - } - } - - _jni_connector = std::make_unique("org/apache/doris/hudi/HadoopHudiJniScanner", - params, required_fields); -} + : JniReader( + file_slot_descs, state, profile, "org/apache/doris/hudi/HadoopHudiJniScanner", + [&]() { + std::vector required_fields; + for (const auto& desc : file_slot_descs) { + required_fields.emplace_back(desc->col_name()); + } + std::map params = { + {"query_id", print_id(state->query_id())}, + {"base_path", hudi_params.base_path}, + {"data_file_path", hudi_params.data_file_path}, + {"data_file_length", std::to_string(hudi_params.data_file_length)}, + {"delta_file_paths", join(hudi_params.delta_logs, ",")}, + {"hudi_column_names", join(hudi_params.column_names, ",")}, + {"hudi_column_types", join(hudi_params.column_types, "#")}, + {"required_fields", join(required_fields, ",")}, + {"instant_time", hudi_params.instant_time}, + {"serde", hudi_params.serde}, + {"input_format", hudi_params.input_format}, + {"time_zone", state->timezone_obj().name()}}; + for (const auto& kv : scan_params.properties) { + if (kv.first.starts_with(HOODIE_CONF_PREFIX)) { + params[kv.first] = kv.second; + } else { + params[HADOOP_CONF_PREFIX + kv.first] = kv.second; + } + } + return params; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }()) {} Status HudiJniReader::init_reader() { - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); + return open(_state, _profile); } #include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/format/table/hudi_jni_reader.h b/be/src/format/table/hudi_jni_reader.h index 505dba0b82dfea..47bc6bc8de2df0 100644 --- a/be/src/format/table/hudi_jni_reader.h +++ b/be/src/format/table/hudi_jni_reader.h @@ -24,8 +24,8 @@ #include #include "common/status.h" -#include "format/jni_reader.h" -#include "storage/olap_scan_common.h" +#include "format/jni/jni_reader.h" +#include "storage/olap_common.h" namespace doris { class RuntimeProfile; @@ -50,11 +50,6 @@ class HudiJniReader : public JniReader { ~HudiJniReader() override = default; Status init_reader(); - -private: - const TFileScanRangeParams& _scan_params; - const THudiFileDesc& _hudi_params; }; - #include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/format/table/iceberg_sys_table_jni_reader.cpp b/be/src/format/table/iceberg_sys_table_jni_reader.cpp index 7a6817a8ccfe89..35bf92db3c98b0 100644 --- a/be/src/format/table/iceberg_sys_table_jni_reader.cpp +++ b/be/src/format/table/iceberg_sys_table_jni_reader.cpp @@ -15,8 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "format/table/iceberg_sys_table_jni_reader.h" +#include "iceberg_sys_table_jni_reader.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/runtime_state.h" #include "util/string_util.h" @@ -28,32 +29,37 @@ static const std::string HADOOP_OPTION_PREFIX = "hadoop."; IcebergSysTableJniReader::IcebergSysTableJniReader( const std::vector& file_slot_descs, RuntimeState* state, RuntimeProfile* profile, const TMetaScanRange& meta_scan_range) - : JniReader(file_slot_descs, state, profile), _meta_scan_range(meta_scan_range) {} + : JniReader( + file_slot_descs, state, profile, + "org/apache/doris/iceberg/IcebergSysTableJniScanner", + [&]() { + std::vector required_fields; + std::vector required_types; + for (const auto& desc : file_slot_descs) { + required_fields.emplace_back(desc->col_name()); + required_types.emplace_back( + JniDataBridge::get_jni_type_with_different_string(desc->type())); + } + std::map params; + params["serialized_splits"] = join(meta_scan_range.serialized_splits, ","); + params["required_fields"] = join(required_fields, ","); + params["required_types"] = join(required_types, "#"); + params["time_zone"] = state->timezone(); + for (const auto& kv : meta_scan_range.hadoop_props) { + params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; + } + return params; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }()) {} Status IcebergSysTableJniReader::init_reader() { - std::vector required_fields; - std::vector required_types; - for (const auto& desc : _file_slot_descs) { - required_fields.emplace_back(desc->col_name()); - required_types.emplace_back(JniConnector::get_jni_type_with_different_string(desc->type())); - } - std::map params; - // "," is not in base64 - params["serialized_splits"] = join(_meta_scan_range.serialized_splits, ","); - params["required_fields"] = join(required_fields, ","); - params["required_types"] = join(required_types, "#"); - params["time_zone"] = _state->timezone(); - for (const auto& kv : _meta_scan_range.hadoop_props) { - params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; - } - _jni_connector = - std::make_unique("org/apache/doris/iceberg/IcebergSysTableJniScanner", - std::move(params), required_fields); - if (_jni_connector == nullptr) { - return Status::InternalError("JniConnector failed to initialize"); - } - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); + return open(_state, _profile); } #include "common/compile_check_end.h" diff --git a/be/src/format/table/iceberg_sys_table_jni_reader.h b/be/src/format/table/iceberg_sys_table_jni_reader.h index cb4ce044780772..1d52bdc1cf6e51 100644 --- a/be/src/format/table/iceberg_sys_table_jni_reader.h +++ b/be/src/format/table/iceberg_sys_table_jni_reader.h @@ -26,8 +26,8 @@ #include #include "common/status.h" -#include "format/jni_reader.h" -#include "storage/olap_scan_common.h" +#include "format/jni/jni_reader.h" +#include "storage/olap_common.h" namespace doris { class RuntimeProfile; @@ -50,9 +50,6 @@ class IcebergSysTableJniReader : public JniReader { ~IcebergSysTableJniReader() override = default; Status init_reader(); - -private: - const TMetaScanRange& _meta_scan_range; }; #include "common/compile_check_end.h" diff --git a/be/src/format/table/jdbc_jni_reader.cpp b/be/src/format/table/jdbc_jni_reader.cpp new file mode 100644 index 00000000000000..89071563c653ca --- /dev/null +++ b/be/src/format/table/jdbc_jni_reader.cpp @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "jdbc_jni_reader.h" + +#include + +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_string.h" +#include "core/types.h" +#include "exprs/function/simple_function_factory.h" +#include "format/jni/jni_data_bridge.h" +#include "runtime/descriptors.h" +#include "util/jdbc_utils.h" + +namespace doris { +#include "common/compile_check_begin.h" + +JdbcJniReader::JdbcJniReader(const std::vector& file_slot_descs, + RuntimeState* state, RuntimeProfile* profile, + const std::map& jdbc_params) + : JniReader( + file_slot_descs, state, profile, "org/apache/doris/jdbc/JdbcJniScanner", + [&]() { + std::ostringstream required_fields; + std::ostringstream columns_types; + std::ostringstream replace_string; + int index = 0; + for (const auto& desc : file_slot_descs) { + std::string field = desc->col_name(); + std::string type = + JniDataBridge::get_jni_type_with_different_string(desc->type()); + + // Determine replace_string for special types + // (bitmap, hll, quantile_state, jsonb) + std::string replace_type = "not_replace"; + auto ptype = desc->type()->get_primitive_type(); + if (ptype == PrimitiveType::TYPE_BITMAP) { + replace_type = "bitmap"; + } else if (ptype == PrimitiveType::TYPE_HLL) { + replace_type = "hll"; + } else if (ptype == PrimitiveType::TYPE_JSONB) { + replace_type = "jsonb"; + } else if (ptype == PrimitiveType::TYPE_QUANTILE_STATE) { + replace_type = "quantile_state"; + } + + if (index == 0) { + required_fields << field; + columns_types << type; + replace_string << replace_type; + } else { + required_fields << "," << field; + columns_types << "#" << type; + replace_string << "," << replace_type; + } + index++; + } + // Merge JDBC-specific params with schema params + std::map params = jdbc_params; + params["required_fields"] = required_fields.str(); + params["columns_types"] = columns_types.str(); + params["replace_string"] = replace_string.str(); + // Resolve jdbc_driver_url to absolute file:// URL + if (params.count("jdbc_driver_url")) { + std::string resolved; + if (JdbcUtils::resolve_driver_url(params["jdbc_driver_url"], &resolved) + .ok()) { + params["jdbc_driver_url"] = resolved; + } + } + return params; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }()), + _jdbc_params(jdbc_params) {} + +Status JdbcJniReader::init_reader() { + return open(_state, _profile); +} + +bool JdbcJniReader::_is_special_type(PrimitiveType type) { + return type == PrimitiveType::TYPE_BITMAP || type == PrimitiveType::TYPE_HLL || + type == PrimitiveType::TYPE_QUANTILE_STATE || type == PrimitiveType::TYPE_JSONB; +} + +Status JdbcJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { + // Identify columns with special types (bitmap, HLL, quantile_state, JSONB) + // and temporarily replace them with string columns for JNI data transfer. + // This follows the same pattern as the old vjdbc_connector.cpp _get_reader_params. + struct SpecialColumnInfo { + int block_idx; + DataTypePtr original_type; + ColumnPtr original_column; + }; + std::vector special_columns; + + auto name_to_pos_map = block->get_name_to_pos_map(); + const auto& slots = _file_slot_descs; + for (size_t i = 0; i < slots.size(); ++i) { + auto* slot = slots[i]; + auto ptype = slot->type()->get_primitive_type(); + if (_is_special_type(ptype)) { + // Find the block index for this column + int block_idx = name_to_pos_map[slot->col_name()]; + auto& col_with_type = block->get_by_position(block_idx); + + SpecialColumnInfo info; + info.block_idx = block_idx; + info.original_type = col_with_type.type; + info.original_column = col_with_type.column; + special_columns.push_back(info); + + // Replace block column with string type + DataTypePtr string_type = std::make_shared(); + if (slot->is_nullable()) { + string_type = make_nullable(string_type); + } + block->get_by_position(block_idx).column = + string_type->create_column()->convert_to_full_column_if_const(); + block->get_by_position(block_idx).type = string_type; + } + } + + // Call parent to do the actual JNI read with string columns + RETURN_IF_ERROR(JniReader::get_next_block(block, read_rows, eof)); + + // Cast string columns back to their target types + if (*read_rows > 0 && !special_columns.empty()) { + for (size_t i = 0; i < slots.size(); ++i) { + auto* slot = slots[i]; + auto ptype = slot->type()->get_primitive_type(); + if (_is_special_type(ptype)) { + int block_idx = name_to_pos_map[slot->col_name()]; + RETURN_IF_ERROR(_cast_string_to_special_type(slot, block, block_idx, + static_cast(*read_rows))); + } + } + } else if (special_columns.empty()) { + // No special columns, nothing to do + } else { + // No rows read but we replaced columns, restore original types for next call + for (auto& info : special_columns) { + block->get_by_position(info.block_idx).type = info.original_type; + block->get_by_position(info.block_idx).column = info.original_column; + } + } + + return Status::OK(); +} + +Status JdbcJniReader::_cast_string_to_special_type(const SlotDescriptor* slot_desc, Block* block, + int column_index, int num_rows) { + DataTypePtr target_data_type = slot_desc->get_data_type_ptr(); + std::string target_data_type_name = target_data_type->get_name(); + + // Build input string type (nullable if slot is nullable) + DataTypePtr input_string_type; + if (slot_desc->is_nullable()) { + input_string_type = make_nullable(std::make_shared()); + } else { + input_string_type = std::make_shared(); + } + + auto& input_col = block->get_by_position(column_index).column; + + // Build CAST function arguments + DataTypePtr cast_param_data_type = target_data_type; + ColumnPtr cast_param = cast_param_data_type->create_column_const_with_default_value(1); + + ColumnsWithTypeAndName argument_template; + argument_template.reserve(2); + argument_template.emplace_back(std::move(input_col), input_string_type, "java.sql.String"); + argument_template.emplace_back(cast_param, cast_param_data_type, target_data_type_name); + + FunctionBasePtr func_cast = SimpleFunctionFactory::instance().get_function( + "CAST", argument_template, make_nullable(target_data_type)); + + if (func_cast == nullptr) { + return Status::InternalError("Failed to find CAST function for type {}", + target_data_type_name); + } + + Block cast_block(argument_template); + int result_idx = cast_block.columns(); + cast_block.insert({nullptr, make_nullable(target_data_type), "cast_result"}); + RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, num_rows)); + + auto res_col = cast_block.get_by_position(result_idx).column; + block->get_by_position(column_index).type = target_data_type; + if (target_data_type->is_nullable()) { + block->replace_by_position(column_index, res_col); + } else { + auto nested_ptr = + reinterpret_cast(res_col.get())->get_nested_column_ptr(); + block->replace_by_position(column_index, nested_ptr); + } + + return Status::OK(); +} + +#include "common/compile_check_end.h" +} // namespace doris diff --git a/be/src/format/table/jdbc_jni_reader.h b/be/src/format/table/jdbc_jni_reader.h new file mode 100644 index 00000000000000..c0ef978682c66e --- /dev/null +++ b/be/src/format/table/jdbc_jni_reader.h @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/factory_creator.h" +#include "common/status.h" +#include "format/jni/jni_reader.h" + +namespace doris { +class RuntimeProfile; +class RuntimeState; +class SlotDescriptor; +class TupleDescriptor; +class Block; + +#include "common/compile_check_begin.h" + +/** + * JdbcJniReader reads data from JDBC data sources via the unified JniReader + * framework. It delegates scanning to Java-side JdbcJniScanner (extends JniScanner). + * + * This reader follows the same pattern as PaimonJniReader, HudiJniReader, etc: + * - Passes Java scanner class path and parameters to JniReader base constructor + * - init_reader() calls open() to start the Java scanner + * - get_next_block() reads data batch by batch (inherited from JniReader) + * - close() releases Java resources (inherited from JniReader) + * + * Special types like bitmap, HLL and quantile_state are handled by: + * 1. Temporarily replacing block columns to string type before reading + * 2. Reading data as strings via JNI + * 3. Casting string columns back to the target special types after reading + */ +class JdbcJniReader : public JniReader { + ENABLE_FACTORY_CREATOR(JdbcJniReader); + +public: + /** + * Construct a JdbcJniReader. + * + * @param file_slot_descs Slot descriptors for the output columns + * @param state Runtime state + * @param profile Runtime profile for metrics + * @param jdbc_params JDBC connection parameters (jdbc_url, query_sql, etc.) + */ + JdbcJniReader(const std::vector& file_slot_descs, RuntimeState* state, + RuntimeProfile* profile, const std::map& jdbc_params); + + ~JdbcJniReader() override = default; + + Status init_reader(); + + /** + * Override get_next_block to handle special types (bitmap, HLL, quantile_state, JSONB). + * Before reading, replaces block columns of special types with string columns. + * After reading, casts the string data back to the target types. + */ + Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; + +private: + std::map _jdbc_params; + + /** + * Check if a primitive type needs special string-based handling. + * These types (bitmap, HLL, quantile_state, JSONB) are read as strings via JDBC + * and need post-read casting back to their target types. + */ + static bool _is_special_type(PrimitiveType type); + + /** + * Cast a string column back to the target special type using the CAST function. + * Follows the same pattern as the old vjdbc_connector.cpp _cast_string_to_hll/bitmap/json. + */ + Status _cast_string_to_special_type(const SlotDescriptor* slot_desc, Block* block, + int column_index, int num_rows); +}; + +#include "common/compile_check_end.h" +} // namespace doris diff --git a/be/src/format/table/lakesoul_jni_reader.cpp b/be/src/format/table/lakesoul_jni_reader.cpp deleted file mode 100644 index 3b882a77b1ce07..00000000000000 --- a/be/src/format/table/lakesoul_jni_reader.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// DEPRECATED: LakeSoul catalog support has been deprecated and will be removed in a future version. -// This file is kept for backward compatibility but should not be used in new code. - -#include "format/table/lakesoul_jni_reader.h" - -#include - -#include "core/types.h" -#include "format/jni_reader.h" -#include "runtime/descriptors.h" -#include "runtime/runtime_state.h" - -namespace doris { -#include "common/compile_check_begin.h" -class RuntimeProfile; -class RuntimeState; - -class Block; -} // namespace doris - -namespace doris { -LakeSoulJniReader::LakeSoulJniReader(const TLakeSoulFileDesc& lakesoul_params, - const std::vector& file_slot_descs, - RuntimeState* state, RuntimeProfile* profile) - : JniReader(file_slot_descs, state, profile), _lakesoul_params(lakesoul_params) { - std::vector required_fields; - for (const auto& desc : _file_slot_descs) { - required_fields.emplace_back(desc->col_name()); - } - - std::map params = { - {"query_id", print_id(_state->query_id())}, - {"file_paths", join(_lakesoul_params.file_paths, ";")}, - {"primary_keys", join(_lakesoul_params.primary_keys, ";")}, - {"partition_descs", join(_lakesoul_params.partition_descs, ";")}, - {"required_fields", join(required_fields, ";")}, - {"options", _lakesoul_params.options}, - {"table_schema", _lakesoul_params.table_schema}, - }; - _jni_connector = std::make_unique("org/apache/doris/lakesoul/LakeSoulJniScanner", - params, required_fields); -} - -Status LakeSoulJniReader::init_reader() { - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); -} -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/format/table/lakesoul_jni_reader.h b/be/src/format/table/lakesoul_jni_reader.h deleted file mode 100644 index fe639f6e6bbd5d..00000000000000 --- a/be/src/format/table/lakesoul_jni_reader.h +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// DEPRECATED: LakeSoul catalog support has been deprecated and will be removed in a future version. -// This file is kept for backward compatibility but should not be used in new code. - -#pragma once - -#include -#include -#include - -#include "common/status.h" -#include "exec/connector/jni_connector.h" -#include "format/jni_reader.h" -#include "storage/olap_scan_common.h" - -namespace doris { -#include "common/compile_check_begin.h" -class RuntimeProfile; -class RuntimeState; -class SlotDescriptor; - -class Block; -} // namespace doris - -namespace doris { -class LakeSoulJniReader : public JniReader { - ENABLE_FACTORY_CREATOR(LakeSoulJniReader); - -public: - LakeSoulJniReader(const TLakeSoulFileDesc& lakesoul_params, - const std::vector& file_slot_descs, RuntimeState* state, - RuntimeProfile* profile); - - ~LakeSoulJniReader() override = default; - - Status init_reader(); - -private: - const TLakeSoulFileDesc& _lakesoul_params; -}; -#include "common/compile_check_end.h" -} // namespace doris diff --git a/be/src/format/table/max_compute_jni_reader.cpp b/be/src/format/table/max_compute_jni_reader.cpp index 37d9c0adbd86d6..1408464e74991f 100644 --- a/be/src/format/table/max_compute_jni_reader.cpp +++ b/be/src/format/table/max_compute_jni_reader.cpp @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -#include "format/table/max_compute_jni_reader.h" +#include "max_compute_jni_reader.h" #include #include #include +#include #include "core/types.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/descriptors.h" namespace doris { @@ -38,53 +40,53 @@ MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_des const std::vector& file_slot_descs, const TFileRangeDesc& range, RuntimeState* state, RuntimeProfile* profile) - : JniReader(file_slot_descs, state, profile), - _max_compute_params(max_compute_params), - _range(range) { - _table_desc = mc_desc; - std::ostringstream required_fields; - std::ostringstream columns_types; - std::vector column_names; - int index = 0; - for (const auto& desc : _file_slot_descs) { - std::string field = desc->col_name(); - std::string type = JniConnector::get_jni_type_with_different_string(desc->type()); - column_names.emplace_back(field); - if (index == 0) { - required_fields << field; - columns_types << type; - } else { - required_fields << "," << field; - columns_types << "#" << type; - } - index++; - } - - auto properties = _table_desc->properties(); - properties["endpoint"] = _table_desc->endpoint(); - properties["quota"] = _table_desc->quota(); - properties["project"] = _table_desc->project(); - properties["table"] = _table_desc->table(); - - properties["session_id"] = _max_compute_params.session_id; - properties["scan_serializer"] = _max_compute_params.table_batch_read_session; - - properties["start_offset"] = std::to_string(_range.start_offset); - properties["split_size"] = std::to_string(_range.size); - properties["required_fields"] = required_fields.str(); - properties["columns_types"] = columns_types.str(); - - properties["connect_timeout"] = std::to_string(_max_compute_params.connect_timeout); - properties["read_timeout"] = std::to_string(_max_compute_params.read_timeout); - properties["retry_count"] = std::to_string(_max_compute_params.retry_times); - - _jni_connector = std::make_unique( - "org/apache/doris/maxcompute/MaxComputeJniScanner", properties, column_names); -} + : JniReader( + file_slot_descs, state, profile, + "org/apache/doris/maxcompute/MaxComputeJniScanner", + [&]() { + std::ostringstream required_fields; + std::ostringstream columns_types; + int index = 0; + for (const auto& desc : file_slot_descs) { + std::string field = desc->col_name(); + std::string type = + JniDataBridge::get_jni_type_with_different_string(desc->type()); + if (index == 0) { + required_fields << field; + columns_types << type; + } else { + required_fields << "," << field; + columns_types << "#" << type; + } + index++; + } + auto properties = mc_desc->properties(); + properties["endpoint"] = mc_desc->endpoint(); + properties["quota"] = mc_desc->quota(); + properties["project"] = mc_desc->project(); + properties["table"] = mc_desc->table(); + properties["session_id"] = max_compute_params.session_id; + properties["scan_serializer"] = max_compute_params.table_batch_read_session; + properties["start_offset"] = std::to_string(range.start_offset); + properties["split_size"] = std::to_string(range.size); + properties["required_fields"] = required_fields.str(); + properties["columns_types"] = columns_types.str(); + properties["connect_timeout"] = + std::to_string(max_compute_params.connect_timeout); + properties["read_timeout"] = std::to_string(max_compute_params.read_timeout); + properties["retry_count"] = std::to_string(max_compute_params.retry_times); + return properties; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }()) {} Status MaxComputeJniReader::init_reader() { - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); + return open(_state, _profile); } #include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/format/table/max_compute_jni_reader.h b/be/src/format/table/max_compute_jni_reader.h index ad79bf35b0120d..71a1e74f4f85a2 100644 --- a/be/src/format/table/max_compute_jni_reader.h +++ b/be/src/format/table/max_compute_jni_reader.h @@ -24,9 +24,9 @@ #include #include "common/status.h" -#include "format/jni_reader.h" +#include "format/jni/jni_reader.h" #include "runtime/descriptors.h" -#include "storage/olap_scan_common.h" +#include "storage/olap_common.h" namespace doris { class RuntimeProfile; @@ -55,11 +55,6 @@ class MaxComputeJniReader : public JniReader { ~MaxComputeJniReader() override = default; Status init_reader(); - -private: - const MaxComputeTableDescriptor* _table_desc = nullptr; - const TMaxComputeFileDesc& _max_compute_params; - const TFileRangeDesc& _range; }; #include "common/compile_check_end.h" diff --git a/be/src/format/table/paimon_jni_reader.cpp b/be/src/format/table/paimon_jni_reader.cpp index ec68ad145438fd..4fc9c76e4c47f5 100644 --- a/be/src/format/table/paimon_jni_reader.cpp +++ b/be/src/format/table/paimon_jni_reader.cpp @@ -15,13 +15,15 @@ // specific language governing permissions and limitations // under the License. -#include "format/table/paimon_jni_reader.h" +#include "paimon_jni_reader.h" #include #include "core/types.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" + namespace doris { class RuntimeProfile; class RuntimeState; @@ -38,53 +40,58 @@ PaimonJniReader::PaimonJniReader(const std::vector& file_slot_d RuntimeState* state, RuntimeProfile* profile, const TFileRangeDesc& range, const TFileScanRangeParams* range_params) - : JniReader(file_slot_descs, state, profile) { - std::vector column_names; - std::vector column_types; - for (const auto& desc : _file_slot_descs) { - column_names.emplace_back(desc->col_name()); - column_types.emplace_back(JniConnector::get_jni_type_with_different_string(desc->type())); - } - const auto& paimon_params = range.table_format_params.paimon_params; - std::map params; - params["paimon_split"] = paimon_params.paimon_split; - if (range_params->__isset.paimon_predicate && !range_params->paimon_predicate.empty()) { - params["paimon_predicate"] = range_params->paimon_predicate; - } else if (paimon_params.__isset.paimon_predicate) { - // Fallback to split level paimon_predicate for backward compatibility - params["paimon_predicate"] = paimon_params.paimon_predicate; - } - params["required_fields"] = join(column_names, ","); - params["columns_types"] = join(column_types, "#"); - params["time_zone"] = _state->timezone(); - if (range_params->__isset.serialized_table) { - params["serialized_table"] = range_params->serialized_table; - } + : JniReader( + file_slot_descs, state, profile, "org/apache/doris/paimon/PaimonJniScanner", + [&]() { + std::vector column_names; + std::vector column_types; + for (const auto& desc : file_slot_descs) { + column_names.emplace_back(desc->col_name()); + column_types.emplace_back( + JniDataBridge::get_jni_type_with_different_string(desc->type())); + } + const auto& paimon_params = range.table_format_params.paimon_params; + std::map params; + params["paimon_split"] = paimon_params.paimon_split; + if (range_params->__isset.paimon_predicate && + !range_params->paimon_predicate.empty()) { + params["paimon_predicate"] = range_params->paimon_predicate; + } else if (paimon_params.__isset.paimon_predicate) { + params["paimon_predicate"] = paimon_params.paimon_predicate; + } + params["required_fields"] = join(column_names, ","); + params["columns_types"] = join(column_types, "#"); + params["time_zone"] = state->timezone(); + if (range_params->__isset.serialized_table) { + params["serialized_table"] = range_params->serialized_table; + } + for (const auto& kv : paimon_params.paimon_options) { + params[PAIMON_OPTION_PREFIX + kv.first] = kv.second; + } + if (range_params->__isset.properties && !range_params->properties.empty()) { + for (const auto& kv : range_params->properties) { + params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; + } + } else if (paimon_params.__isset.hadoop_conf) { + for (const auto& kv : paimon_params.hadoop_conf) { + params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; + } + } + return params; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }(), + range.__isset.self_split_weight ? range.self_split_weight : -1) { if (range.table_format_params.__isset.table_level_row_count) { _remaining_table_level_row_count = range.table_format_params.table_level_row_count; } else { _remaining_table_level_row_count = -1; } - - // Used to create paimon option - for (const auto& kv : paimon_params.paimon_options) { - params[PAIMON_OPTION_PREFIX + kv.first] = kv.second; - } - // Prefer hadoop conf from scan node level (range_params->properties) over split level - // to avoid redundant configuration in each split - if (range_params->__isset.properties && !range_params->properties.empty()) { - for (const auto& kv : range_params->properties) { - params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; - } - } else if (paimon_params.__isset.hadoop_conf) { - // Fallback to split level hadoop conf for backward compatibility - for (const auto& kv : paimon_params.hadoop_conf) { - params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; - } - } - int64_t self_split_weight = range.__isset.self_split_weight ? range.self_split_weight : -1; - _jni_connector = std::make_unique("org/apache/doris/paimon/PaimonJniScanner", - params, column_names, self_split_weight); } Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { @@ -104,12 +111,11 @@ Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eo return Status::OK(); } - return _jni_connector->get_next_block(block, read_rows, eof); + return JniReader::get_next_block(block, read_rows, eof); } Status PaimonJniReader::init_reader() { - RETURN_IF_ERROR(_jni_connector->init()); - return _jni_connector->open(_state, _profile); + return open(_state, _profile); } #include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/format/table/paimon_jni_reader.h b/be/src/format/table/paimon_jni_reader.h index 0026ab4118103d..548f1c6485a2f7 100644 --- a/be/src/format/table/paimon_jni_reader.h +++ b/be/src/format/table/paimon_jni_reader.h @@ -24,7 +24,7 @@ #include #include "common/status.h" -#include "format/jni_reader.h" +#include "format/jni/jni_reader.h" #include "storage/olap_scan_common.h" namespace doris { diff --git a/be/src/format/table/remote_doris_reader.h b/be/src/format/table/remote_doris_reader.h index ab5a89df6a5d01..d8ea431fda288f 100644 --- a/be/src/format/table/remote_doris_reader.h +++ b/be/src/format/table/remote_doris_reader.h @@ -26,7 +26,7 @@ #include #include "common/status.h" -#include "format/jni_reader.h" +#include "format/jni/jni_reader.h" #include "storage/olap_scan_common.h" namespace doris { diff --git a/be/src/format/table/trino_connector_jni_reader.cpp b/be/src/format/table/trino_connector_jni_reader.cpp index d37afdabdd9e83..729a0b8f1175f5 100644 --- a/be/src/format/table/trino_connector_jni_reader.cpp +++ b/be/src/format/table/trino_connector_jni_reader.cpp @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include "format/table/trino_connector_jni_reader.h" +#include "trino_connector_jni_reader.h" #include #include "core/types.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/descriptors.h" #include "util/jni-util.h" @@ -37,46 +38,60 @@ const std::string TrinoConnectorJniReader::TRINO_CONNECTOR_OPTION_PREFIX = "trin TrinoConnectorJniReader::TrinoConnectorJniReader( const std::vector& file_slot_descs, RuntimeState* state, RuntimeProfile* profile, const TFileRangeDesc& range) - : JniReader(file_slot_descs, state, profile) { - std::vector column_names; - std::vector column_types; - for (const auto& desc : _file_slot_descs) { - std::string field = desc->col_name(); - column_names.emplace_back(field); - column_types.emplace_back(JniConnector::get_jni_type_with_different_string(desc->type())); - } - std::map params = { - {"catalog_name", range.table_format_params.trino_connector_params.catalog_name}, - {"db_name", range.table_format_params.trino_connector_params.db_name}, - {"table_name", range.table_format_params.trino_connector_params.table_name}, - {"trino_connector_split", - range.table_format_params.trino_connector_params.trino_connector_split}, - {"trino_connector_table_handle", - range.table_format_params.trino_connector_params.trino_connector_table_handle}, - {"trino_connector_column_handles", - range.table_format_params.trino_connector_params.trino_connector_column_handles}, - {"trino_connector_column_metadata", - range.table_format_params.trino_connector_params.trino_connector_column_metadata}, - {"trino_connector_predicate", - range.table_format_params.trino_connector_params.trino_connector_predicate}, - {"trino_connector_trascation_handle", - range.table_format_params.trino_connector_params.trino_connector_trascation_handle}, - {"required_fields", join(column_names, ",")}, - {"columns_types", join(column_types, "#")}}; - - // Used to create trino connector options - for (const auto& kv : - range.table_format_params.trino_connector_params.trino_connector_options) { - params[TRINO_CONNECTOR_OPTION_PREFIX + kv.first] = kv.second; - } - _jni_connector = std::make_unique( - "org/apache/doris/trinoconnector/TrinoConnectorJniScanner", params, column_names); -} + : JniReader( + file_slot_descs, state, profile, + "org/apache/doris/trinoconnector/TrinoConnectorJniScanner", + [&]() { + std::vector column_names; + std::vector column_types; + for (const auto& desc : file_slot_descs) { + column_names.emplace_back(desc->col_name()); + column_types.emplace_back( + JniDataBridge::get_jni_type_with_different_string(desc->type())); + } + std::map params = { + {"catalog_name", + range.table_format_params.trino_connector_params.catalog_name}, + {"db_name", range.table_format_params.trino_connector_params.db_name}, + {"table_name", + range.table_format_params.trino_connector_params.table_name}, + {"trino_connector_split", + range.table_format_params.trino_connector_params + .trino_connector_split}, + {"trino_connector_table_handle", + range.table_format_params.trino_connector_params + .trino_connector_table_handle}, + {"trino_connector_column_handles", + range.table_format_params.trino_connector_params + .trino_connector_column_handles}, + {"trino_connector_column_metadata", + range.table_format_params.trino_connector_params + .trino_connector_column_metadata}, + {"trino_connector_predicate", + range.table_format_params.trino_connector_params + .trino_connector_predicate}, + {"trino_connector_trascation_handle", + range.table_format_params.trino_connector_params + .trino_connector_trascation_handle}, + {"required_fields", join(column_names, ",")}, + {"columns_types", join(column_types, "#")}}; + for (const auto& kv : range.table_format_params.trino_connector_params + .trino_connector_options) { + params[TRINO_CONNECTOR_OPTION_PREFIX + kv.first] = kv.second; + } + return params; + }(), + [&]() { + std::vector names; + for (const auto& desc : file_slot_descs) { + names.emplace_back(desc->col_name()); + } + return names; + }()) {} Status TrinoConnectorJniReader::init_reader() { - RETURN_IF_ERROR(_jni_connector->init()); RETURN_IF_ERROR(_set_spi_plugins_dir()); - return _jni_connector->open(_state, _profile); + return open(_state, _profile); } Status TrinoConnectorJniReader::_set_spi_plugins_dir() { diff --git a/be/src/format/table/trino_connector_jni_reader.h b/be/src/format/table/trino_connector_jni_reader.h index a0412aaab4d50a..d571c5cd5866ed 100644 --- a/be/src/format/table/trino_connector_jni_reader.h +++ b/be/src/format/table/trino_connector_jni_reader.h @@ -24,7 +24,7 @@ #include #include "common/status.h" -#include "format/jni_reader.h" +#include "format/jni/jni_reader.h" #include "storage/olap_scan_common.h" namespace doris { diff --git a/be/src/format/transformer/vjni_format_transformer.cpp b/be/src/format/transformer/vjni_format_transformer.cpp index 782818f05aa05c..308577fd1f99b6 100644 --- a/be/src/format/transformer/vjni_format_transformer.cpp +++ b/be/src/format/transformer/vjni_format_transformer.cpp @@ -17,7 +17,7 @@ #include "format/transformer/vjni_format_transformer.h" -#include "exec/connector/jni_connector.h" +#include "format/jni/jni_data_bridge.h" #include "runtime/runtime_state.h" namespace doris { @@ -85,11 +85,11 @@ Status VJniFormatTransformer::write(const Block& block) { // 1. Convert Block to Java table metadata (column addresses) Block* mutable_block = const_cast(&block); std::unique_ptr input_table; - RETURN_IF_ERROR(JniConnector::to_java_table(mutable_block, input_table)); + RETURN_IF_ERROR(JniDataBridge::to_java_table(mutable_block, input_table)); // 2. Cache schema on first call if (!_schema_cached) { - auto schema = JniConnector::parse_table_schema(mutable_block); + auto schema = JniDataBridge::parse_table_schema(mutable_block); _cached_required_fields = schema.first; _cached_columns_types = schema.second; _schema_cached = true; diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 58589c31aa5839..1bf83704b72ed0 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -64,16 +64,15 @@ #include "core/block/block.h" #include "core/data_type/data_type.h" #include "exec/common/variant_util.h" -#include "exec/connector/vjdbc_connector.h" #include "exec/exchange/vdata_stream_mgr.h" #include "exec/rowid_fetcher.h" #include "exec/sink/writer/varrow_flight_result_writer.h" #include "exec/sink/writer/vmysql_result_writer.h" #include "exprs/function/dictionary_factory.h" #include "format/arrow/arrow_row_batch.h" -#include "format/avro/avro_jni_reader.h" #include "format/csv/csv_reader.h" #include "format/generic_reader.h" +#include "format/jni/jni_reader.h" #include "format/json/new_json_reader.h" #include "format/native/native_reader.h" #include "format/orc/vorc_reader.h" @@ -121,6 +120,7 @@ #include "util/async_io.h" #include "util/brpc_client_cache.h" #include "util/brpc_closure.h" +#include "util/jdbc_utils.h" #include "util/jsonb/serialize.h" #include "util/md5.h" #include "util/network_util.h" @@ -865,10 +865,6 @@ void PInternalService::fetch_table_schema(google::protobuf::RpcController* contr NewJsonReader::create_unique(profile.get(), params, range, file_slots, &io_ctx); break; } - case TFileFormatType::FORMAT_AVRO: { - reader = AvroJNIReader::create_unique(profile.get(), params, range, file_slots); - break; - } default: st = Status::InternalError("Not supported file format in fetch table schema: {}", params.format_type); @@ -988,7 +984,6 @@ void PInternalService::test_jdbc_connection(google::protobuf::RpcController* con fmt::format("InternalService::test_jdbc_connection")); SCOPED_ATTACH_TASK(mem_tracker); TTableDescriptor table_desc; - JdbcConnectorParam jdbc_param; Status st = Status::OK(); { const uint8_t* buf = (const uint8_t*)request->jdbc_table().data(); @@ -1001,35 +996,96 @@ void PInternalService::test_jdbc_connection(google::protobuf::RpcController* con } } TJdbcTable jdbc_table = (table_desc.jdbcTable); - jdbc_param.catalog_id = jdbc_table.catalog_id; - jdbc_param.driver_class = jdbc_table.jdbc_driver_class; - jdbc_param.driver_path = jdbc_table.jdbc_driver_url; - jdbc_param.driver_checksum = jdbc_table.jdbc_driver_checksum; - jdbc_param.jdbc_url = jdbc_table.jdbc_url; - jdbc_param.user = jdbc_table.jdbc_user; - jdbc_param.passwd = jdbc_table.jdbc_password; - jdbc_param.query_string = request->query_str(); - jdbc_param.table_type = static_cast(request->jdbc_table_type()); - jdbc_param.use_transaction = false; - jdbc_param.connection_pool_min_size = jdbc_table.connection_pool_min_size; - jdbc_param.connection_pool_max_size = jdbc_table.connection_pool_max_size; - jdbc_param.connection_pool_max_life_time = jdbc_table.connection_pool_max_life_time; - jdbc_param.connection_pool_max_wait_time = jdbc_table.connection_pool_max_wait_time; - jdbc_param.connection_pool_keep_alive = jdbc_table.connection_pool_keep_alive; - - std::unique_ptr jdbc_connector; - jdbc_connector.reset(new (std::nothrow) JdbcConnector(jdbc_param)); - - st = jdbc_connector->test_connection(); - st.to_protobuf(result->mutable_status()); - Status clean_st = jdbc_connector->clean_datasource(); - if (!clean_st.ok()) { - LOG(WARNING) << "Failed to clean JDBC datasource: " << clean_st.msg(); + // Resolve driver URL to absolute file:// path + std::string driver_url; + st = JdbcUtils::resolve_driver_url(jdbc_table.jdbc_driver_url, &driver_url); + if (!st.ok()) { + st.to_protobuf(result->mutable_status()); + return; } - Status close_st = jdbc_connector->close(); + + // Build params for JdbcConnectionTester + std::map params; + params["jdbc_url"] = jdbc_table.jdbc_url; + params["jdbc_user"] = jdbc_table.jdbc_user; + params["jdbc_password"] = jdbc_table.jdbc_password; + params["jdbc_driver_class"] = jdbc_table.jdbc_driver_class; + params["jdbc_driver_url"] = driver_url; + params["query_sql"] = request->query_str(); + params["catalog_id"] = std::to_string(jdbc_table.catalog_id); + params["connection_pool_min_size"] = std::to_string(jdbc_table.connection_pool_min_size); + params["connection_pool_max_size"] = std::to_string(jdbc_table.connection_pool_max_size); + params["connection_pool_max_wait_time"] = + std::to_string(jdbc_table.connection_pool_max_wait_time); + params["connection_pool_max_life_time"] = + std::to_string(jdbc_table.connection_pool_max_life_time); + params["connection_pool_keep_alive"] = + jdbc_table.connection_pool_keep_alive ? "true" : "false"; + params["clean_datasource"] = "true"; + // Map jdbc_table_type (TOdbcTableType enum value) to string name + // for JdbcTypeHandlerFactory to select the correct type handler. + // This ensures the right validation query is used (e.g. Oracle: "SELECT 1 FROM dual"). + if (request->has_jdbc_table_type()) { + std::string type_name; + switch (request->jdbc_table_type()) { + case 0: + type_name = "MYSQL"; + break; + case 1: + type_name = "ORACLE"; + break; + case 2: + type_name = "POSTGRESQL"; + break; + case 3: + type_name = "SQLSERVER"; + break; + case 6: + type_name = "CLICKHOUSE"; + break; + case 7: + type_name = "SAP_HANA"; + break; + case 8: + type_name = "TRINO"; + break; + case 9: + type_name = "PRESTO"; + break; + case 10: + type_name = "OCEANBASE"; + break; + case 11: + type_name = "OCEANBASE_ORACLE"; + break; + case 13: + type_name = "DB2"; + break; + case 14: + type_name = "GBASE"; + break; + default: + break; + } + if (!type_name.empty()) { + params["table_type"] = type_name; + } + } + // required_fields and columns_types are required by JniReader + params["required_fields"] = "result"; + params["columns_types"] = "int"; + + // Use JniReader to create JdbcConnectionTester, which tests + // the connection in its open() method. + auto jni_reader = + std::make_unique("org/apache/doris/jdbc/JdbcConnectionTester", params); + st = jni_reader->open(nullptr, nullptr); + st.to_protobuf(result->mutable_status()); + + Status close_st = jni_reader->close(); if (!close_st.ok()) { - LOG(WARNING) << "Failed to close JDBC connector: " << close_st.msg(); + LOG(WARNING) << "Failed to close JDBC connection tester: " << close_st.msg(); } }); diff --git a/be/src/util/jdbc_utils.cpp b/be/src/util/jdbc_utils.cpp new file mode 100644 index 00000000000000..cf091f6c9ff425 --- /dev/null +++ b/be/src/util/jdbc_utils.cpp @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/jdbc_utils.h" + +#include + +#include "cloud/config.h" +#include "common/config.h" +#include "runtime/plugin/cloud_plugin_downloader.h" + +namespace doris { + +Status JdbcUtils::resolve_driver_url(const std::string& url, std::string* result_url) { + // Already a full URL (e.g. "file:///path/to/driver.jar" or "hdfs://...") + if (url.find(":/") != std::string::npos) { + *result_url = url; + return Status::OK(); + } + + const char* doris_home = std::getenv("DORIS_HOME"); + if (doris_home == nullptr) { + return Status::InternalError("DORIS_HOME environment variable is not set"); + } + + std::string default_url = std::string(doris_home) + "/plugins/jdbc_drivers"; + std::string default_old_url = std::string(doris_home) + "/jdbc_drivers"; + + if (config::jdbc_drivers_dir == default_url) { + std::string target_path = default_url + "/" + url; + std::string old_target_path = default_old_url + "/" + url; + if (std::filesystem::exists(target_path)) { + *result_url = "file://" + target_path; + } else if (std::filesystem::exists(old_target_path)) { + *result_url = "file://" + old_target_path; + } else if (config::is_cloud_mode()) { + // In cloud/elastic deployments, BEs are ephemeral and driver JARs + // may not exist locally. Try downloading from cloud storage. + std::string downloaded_path; + RETURN_IF_ERROR(CloudPluginDownloader::download_from_cloud( + CloudPluginDownloader::PluginType::JDBC_DRIVERS, url, target_path, + &downloaded_path)); + *result_url = "file://" + downloaded_path; + } else { + return Status::InternalError("JDBC driver file does not exist: " + url); + } + } else { + *result_url = "file://" + config::jdbc_drivers_dir + "/" + url; + } + return Status::OK(); +} + +} // namespace doris diff --git a/be/src/util/jdbc_utils.h b/be/src/util/jdbc_utils.h new file mode 100644 index 00000000000000..8c6004c4c24882 --- /dev/null +++ b/be/src/util/jdbc_utils.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" + +namespace doris { + +/** + * Utility functions for JDBC driver management. + */ +class JdbcUtils { +public: + /** + * Resolve a JDBC driver URL to an absolute file:// URL. + * + * FE sends just the JAR filename (e.g. "mysql-connector-java-8.0.25.jar"). + * This method resolves it to a full file:// URL by searching in the + * configured jdbc_drivers_dir (or the default DORIS_HOME/plugins/jdbc_drivers). + * + * If the URL already contains ":/", it is assumed to be a full URL and + * returned as-is. + * + * @param url The driver URL from FE (may be just a filename) + * @param result_url Output: the resolved file:// URL + * @return Status::OK on success, or InternalError if the file is not found + */ + static Status resolve_driver_url(const std::string& url, std::string* result_url); +}; + +} // namespace doris diff --git a/be/test/exec/connector/vjdbc_connector_test.cpp b/be/test/exec/connector/vjdbc_connector_test.cpp index b1256d876eb8b3..16ff8689aafaf2 100644 --- a/be/test/exec/connector/vjdbc_connector_test.cpp +++ b/be/test/exec/connector/vjdbc_connector_test.cpp @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include "exec/connector/vjdbc_connector.h" - #include #include @@ -26,10 +24,11 @@ #include "common/config.h" #include "common/status.h" +#include "util/jdbc_utils.h" namespace doris { -class JdbcConnectorTest : public ::testing::Test { +class JdbcUtilsTest : public ::testing::Test { protected: void SetUp() override { // Save original config and environment @@ -38,17 +37,6 @@ class JdbcConnectorTest : public ::testing::Test { // Set DORIS_HOME for testing setenv("DORIS_HOME", "/tmp/test_doris", 1); - - // Initialize test JDBC parameters - param_.catalog_id = 1; - param_.driver_path = "test-driver.jar"; - param_.driver_class = "com.test.Driver"; - param_.resource_name = "test_resource"; - param_.driver_checksum = "test_checksum"; - param_.jdbc_url = "jdbc:test://localhost:3306/test"; - param_.user = "test_user"; - param_.passwd = "test_passwd"; - param_.query_string = "SELECT * FROM test"; } void TearDown() override { @@ -62,51 +50,34 @@ class JdbcConnectorTest : public ::testing::Test { } } - JdbcConnector createConnector() { return JdbcConnector(param_); } - private: std::string original_jdbc_drivers_dir_; const char* original_doris_home_ = nullptr; - JdbcConnectorParam param_; }; -// Test _get_real_url method -TEST_F(JdbcConnectorTest, TestGetRealUrlWithAbsoluteUrl) { - auto connector = createConnector(); +// Test resolve_driver_url with absolute URLs +TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithAbsoluteUrl) { std::string result_url; - // Test with absolute URL (contains ":/ ") - Status status = connector._get_real_url("http://example.com/driver.jar", &result_url); + // Test with HTTP URL (contains ":/") + Status status = JdbcUtils::resolve_driver_url("http://example.com/driver.jar", &result_url); EXPECT_TRUE(status.ok()); EXPECT_EQ(result_url, "http://example.com/driver.jar"); // Test with S3 URL - status = connector._get_real_url("s3://bucket/path/driver.jar", &result_url); + status = JdbcUtils::resolve_driver_url("s3://bucket/path/driver.jar", &result_url); EXPECT_TRUE(status.ok()); EXPECT_EQ(result_url, "s3://bucket/path/driver.jar"); // Test with file URL - status = connector._get_real_url("file:///path/to/driver.jar", &result_url); + status = JdbcUtils::resolve_driver_url("file:///path/to/driver.jar", &result_url); EXPECT_TRUE(status.ok()); EXPECT_EQ(result_url, "file:///path/to/driver.jar"); } -TEST_F(JdbcConnectorTest, TestGetRealUrlWithRelativeUrl) { - auto connector = createConnector(); +TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { std::string result_url; - // Test with relative URL (no ":/" found) - should call _check_and_return_default_driver_url - Status status = connector._get_real_url("mysql-connector.jar", &result_url); - - // This should process successfully - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(result_url.empty()); -} - -// Test _check_and_return_default_driver_url method with default directory -TEST_F(JdbcConnectorTest, TestCheckAndReturnDefaultDriverUrlWithDefaultConfig) { - auto connector = createConnector(); - // Set config to default value to trigger the default directory logic config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; @@ -114,19 +85,16 @@ TEST_F(JdbcConnectorTest, TestCheckAndReturnDefaultDriverUrlWithDefaultConfig) { std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; std::string file_path = dir + "/mysql-connector.jar"; - // Create directory + // Create directory and file std::filesystem::create_directories(dir); - - // Create test file std::ofstream file(file_path); file << "test content"; file.close(); - std::string result_url; - Status status = - connector._check_and_return_default_driver_url("mysql-connector.jar", &result_url); - + // Test with relative URL (no ":/") - should resolve to file:// + Status status = JdbcUtils::resolve_driver_url("mysql-connector.jar", &result_url); EXPECT_TRUE(status.ok()); + EXPECT_FALSE(result_url.empty()); EXPECT_EQ(result_url, "file://" + file_path); // Cleanup @@ -134,41 +102,21 @@ TEST_F(JdbcConnectorTest, TestCheckAndReturnDefaultDriverUrlWithDefaultConfig) { std::filesystem::remove_all(dir); } -TEST_F(JdbcConnectorTest, TestCheckAndReturnDefaultDriverUrlWithCustomConfig) { - auto connector = createConnector(); - - // Set custom JDBC drivers directory - config::jdbc_drivers_dir = "/custom/jdbc/path"; - - std::string result_url; - Status status = - connector._check_and_return_default_driver_url("postgres-driver.jar", &result_url); - - EXPECT_TRUE(status.ok()); - EXPECT_EQ(result_url, "file:///custom/jdbc/path/postgres-driver.jar"); -} - -TEST_F(JdbcConnectorTest, TestDefaultDirectoryFileExistsPath) { - auto connector = createConnector(); - - // Set config to default value +// Test resolve_driver_url with default directory +TEST_F(JdbcUtilsTest, TestResolveWithDefaultConfig) { config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; // Create the target directory and file for testing std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; - std::string file_path = dir + "/existing-driver.jar"; + std::string file_path = dir + "/mysql-connector.jar"; - // Create directory std::filesystem::create_directories(dir); - - // Create test file std::ofstream file(file_path); file << "test content"; file.close(); std::string result_url; - Status status = - connector._check_and_return_default_driver_url("existing-driver.jar", &result_url); + Status status = JdbcUtils::resolve_driver_url("mysql-connector.jar", &result_url); EXPECT_TRUE(status.ok()); EXPECT_EQ(result_url, "file://" + file_path); @@ -178,61 +126,55 @@ TEST_F(JdbcConnectorTest, TestDefaultDirectoryFileExistsPath) { std::filesystem::remove_all(dir); } -// Simplified test without cloud mode dependency -TEST_F(JdbcConnectorTest, TestCloudModeSimulation) { - auto connector = createConnector(); +TEST_F(JdbcUtilsTest, TestResolveWithCustomConfig) { + // Set custom JDBC drivers directory + config::jdbc_drivers_dir = "/custom/jdbc/path"; - // Set config to default value - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string result_url; + Status status = JdbcUtils::resolve_driver_url("postgres-driver.jar", &result_url); - // Create the old directory and file for testing (fallback path) - std::string old_dir = "/tmp/test_doris/jdbc_drivers"; - std::string file_path = old_dir + "/cloud-driver.jar"; + EXPECT_TRUE(status.ok()); + EXPECT_EQ(result_url, "file:///custom/jdbc/path/postgres-driver.jar"); +} - // Create directory - std::filesystem::create_directories(old_dir); +TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { + config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; - // Create test file + std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string file_path = dir + "/existing-driver.jar"; + + std::filesystem::create_directories(dir); std::ofstream file(file_path); file << "test content"; file.close(); std::string result_url; - Status status = connector._check_and_return_default_driver_url("cloud-driver.jar", &result_url); + Status status = JdbcUtils::resolve_driver_url("existing-driver.jar", &result_url); - // Should process successfully and return fallback path EXPECT_TRUE(status.ok()); EXPECT_EQ(result_url, "file://" + file_path); // Cleanup std::filesystem::remove(file_path); - std::filesystem::remove_all(old_dir); + std::filesystem::remove_all(dir); } -TEST_F(JdbcConnectorTest, TestFallbackToOldDirectory) { - auto connector = createConnector(); - - // Set config to default value but file doesn't exist in new directory +TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; - // Create the old directory and file for testing (fallback path) + // Create only the old directory and file (not the new one) std::string old_dir = "/tmp/test_doris/jdbc_drivers"; std::string file_path = old_dir + "/fallback-driver.jar"; - // Create directory std::filesystem::create_directories(old_dir); - - // Create test file std::ofstream file(file_path); file << "test content"; file.close(); std::string result_url; - Status status = - connector._check_and_return_default_driver_url("fallback-driver.jar", &result_url); + Status status = JdbcUtils::resolve_driver_url("fallback-driver.jar", &result_url); EXPECT_TRUE(status.ok()); - // Should fallback to old directory when file not found and not in cloud mode EXPECT_EQ(result_url, "file://" + file_path); // Cleanup @@ -240,81 +182,64 @@ TEST_F(JdbcConnectorTest, TestFallbackToOldDirectory) { std::filesystem::remove_all(old_dir); } -TEST_F(JdbcConnectorTest, TestPathConstruction) { - auto connector = createConnector(); - - // Test different DORIS_HOME values (use /tmp to avoid permission issues) +TEST_F(JdbcUtilsTest, TestPathConstruction) { setenv("DORIS_HOME", "/tmp/test_doris2", 1); - - // Set to default config config::jdbc_drivers_dir = "/tmp/test_doris2/plugins/jdbc_drivers"; - // Create the old directory and file for testing (fallback path) std::string old_dir = "/tmp/test_doris2/jdbc_drivers"; std::string file_path = old_dir + "/test.jar"; - // Create directory std::filesystem::create_directories(old_dir); - - // Create test file std::ofstream file(file_path); file << "test content"; file.close(); std::string result_url; - Status status = connector._check_and_return_default_driver_url("test.jar", &result_url); + Status status = JdbcUtils::resolve_driver_url("test.jar", &result_url); EXPECT_TRUE(status.ok()); - EXPECT_EQ(result_url, "file://" + file_path); // Fallback path + EXPECT_EQ(result_url, "file://" + file_path); // Cleanup std::filesystem::remove(file_path); std::filesystem::remove_all(old_dir); } -TEST_F(JdbcConnectorTest, TestEdgeCases) { - auto connector = createConnector(); +TEST_F(JdbcUtilsTest, TestEdgeCases) { std::string result_url; - // Test empty URL - Status status = connector._get_real_url("", &result_url); - EXPECT_TRUE(status.ok()); // Should be treated as relative URL + // Test empty URL - treated as relative, should go through resolve logic + config::jdbc_drivers_dir = "/custom/path"; + Status status = JdbcUtils::resolve_driver_url("", &result_url); + EXPECT_TRUE(status.ok()); - // Test URL with just colon (no slash after) - status = connector._get_real_url("invalid:url", &result_url); - EXPECT_TRUE(status.ok()); // Should be treated as relative URL + // Test URL with just colon (no slash after) - treated as relative + status = JdbcUtils::resolve_driver_url("invalid:url", &result_url); + EXPECT_TRUE(status.ok()); - // Test URL with spaces - status = connector._get_real_url("my driver.jar", &result_url); - EXPECT_TRUE(status.ok()); // Should be treated as relative URL + // Test URL with spaces - treated as relative + status = JdbcUtils::resolve_driver_url("my driver.jar", &result_url); + EXPECT_TRUE(status.ok()); } -TEST_F(JdbcConnectorTest, TestMultipleCallsConsistency) { - auto connector = createConnector(); - +TEST_F(JdbcUtilsTest, TestMultipleCallsConsistency) { config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; - // Create the target directory and file for testing std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; std::string file_path = dir + "/same-driver.jar"; - // Create directory std::filesystem::create_directories(dir); - - // Create test file std::ofstream file(file_path); file << "test content"; file.close(); std::string result_url1, result_url2; - Status status1 = - connector._check_and_return_default_driver_url("same-driver.jar", &result_url1); - Status status2 = - connector._check_and_return_default_driver_url("same-driver.jar", &result_url2); + Status status1 = JdbcUtils::resolve_driver_url("same-driver.jar", &result_url1); + Status status2 = JdbcUtils::resolve_driver_url("same-driver.jar", &result_url2); EXPECT_TRUE(status1.ok()); EXPECT_TRUE(status2.ok()); - EXPECT_EQ(result_url1, result_url2); // Should be consistent + EXPECT_EQ(result_url1, result_url2); EXPECT_EQ(result_url1, "file://" + file_path); // Cleanup @@ -322,8 +247,7 @@ TEST_F(JdbcConnectorTest, TestMultipleCallsConsistency) { std::filesystem::remove_all(dir); } -TEST_F(JdbcConnectorTest, TestUrlDetectionLogic) { - auto connector = createConnector(); +TEST_F(JdbcUtilsTest, TestUrlDetectionLogic) { std::string result_url; // Test various URL patterns that should be detected as absolute @@ -332,17 +256,17 @@ TEST_F(JdbcConnectorTest, TestUrlDetectionLogic) { "s3://bucket/driver.jar", "file:///local/driver.jar", "ftp://server/driver.jar"}; for (const auto& url : absolute_urls) { - Status status = connector._get_real_url(url, &result_url); + Status status = JdbcUtils::resolve_driver_url(url, &result_url); EXPECT_TRUE(status.ok()); EXPECT_EQ(result_url, url); } // Test patterns that should be treated as relative + config::jdbc_drivers_dir = "/custom/path"; std::vector relative_urls = {"driver.jar", "path/driver.jar", "invalid:no-slash"}; for (const auto& url : relative_urls) { - Status status = connector._get_real_url(url, &result_url); - // Should process through _check_and_return_default_driver_url + Status status = JdbcUtils::resolve_driver_url(url, &result_url); EXPECT_TRUE(status.ok()); EXPECT_TRUE(result_url.find(url) != std::string::npos); } diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java index 0cf4136a461060..e5cd39fc5a7e73 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java @@ -62,6 +62,15 @@ import java.util.Map; import java.util.function.Function; +/** + * @deprecated This class and its subclasses are deprecated. + * JDBC read operations should use {@link JdbcJniScanner} (extends JniScanner). + * JDBC write operations should use {@link JdbcJniWriter} (extends JniWriter). + * This class is retained temporarily because JdbcJniScanner does not yet implement + * all database-specific type conversions that exist in the executor subclasses. + * Once those are ported, this class hierarchy can be removed. + */ +@Deprecated public abstract class BaseJdbcExecutor implements JdbcExecutor { private static final Logger LOG = Logger.getLogger(BaseJdbcExecutor.class); private static final TBinaryProtocol.Factory PROTOCOL_FACTORY = new TBinaryProtocol.Factory(); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseJdbcExecutor.java index a3bb9b7b261b67..4d4ee93572706c 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseJdbcExecutor.java @@ -34,6 +34,10 @@ import java.util.ArrayList; import java.util.List; +/** + * @deprecated Use {@link ClickHouseTypeHandler} instead. + */ +@Deprecated public class ClickHouseJdbcExecutor extends BaseJdbcExecutor { public ClickHouseJdbcExecutor(byte[] thriftParams) throws Exception { diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseTypeHandler.java new file mode 100644 index 00000000000000..0ce1d9973f2f4b --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/ClickHouseTypeHandler.java @@ -0,0 +1,211 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import com.google.common.collect.Lists; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.net.InetAddress; +import java.sql.Array; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +/** + * ClickHouse-specific type handler. + * Key specializations: + * - ARRAY: direct getArray() call with element type conversion + * - All numeric types use getObject(Class) for proper null handling + */ +public class ClickHouseTypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case BOOLEAN: + return rs.getObject(columnIndex, Boolean.class); + case TINYINT: + return rs.getObject(columnIndex, Byte.class); + case SMALLINT: + return rs.getObject(columnIndex, Short.class); + case INT: + return rs.getObject(columnIndex, Integer.class); + case BIGINT: + return rs.getObject(columnIndex, Long.class); + case LARGEINT: + return rs.getObject(columnIndex, BigInteger.class); + case FLOAT: + return rs.getObject(columnIndex, Float.class); + case DOUBLE: + return rs.getObject(columnIndex, Double.class); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case DATE: + case DATEV2: + return rs.getObject(columnIndex, LocalDate.class); + case DATETIME: + case DATETIMEV2: + return rs.getObject(columnIndex, LocalDateTime.class); + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex, String.class); + case ARRAY: { + Array array = rs.getArray(columnIndex); + if (array == null) { + return null; + } + return convertArrayToList(array.getArray()); + } + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + if (columnType.getType() == ColumnType.Type.ARRAY) { + return createConverter( + input -> convertArray((List) input, columnType.getChildTypes().get(0)), + List.class); + } + return null; + } + + private static List convertArrayToList(Object array) { + if (array == null) { + return null; + } + int length = java.lang.reflect.Array.getLength(array); + List list = new ArrayList<>(length); + for (int i = 0; i < length; i++) { + Object element = java.lang.reflect.Array.get(array, i); + list.add(element); + } + return list; + } + + /** + * Convert array elements to the expected Doris column type. + * ClickHouse JDBC driver may return unsigned integer array elements as wider + * Java types (e.g., UInt32 elements as BigInteger instead of Long). + */ + private List convertArray(List array, ColumnType type) { + if (array == null) { + return null; + } + switch (type.getType()) { + case SMALLINT: { + List result = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else if (element instanceof Number) { + result.add(((Number) element).shortValue()); + } else { + throw new IllegalArgumentException("Unsupported element type: " + element.getClass()); + } + } + return result; + } + case INT: { + List result = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else if (element instanceof Number) { + result.add(((Number) element).intValue()); + } else { + throw new IllegalArgumentException("Unsupported element type: " + element.getClass()); + } + } + return result; + } + case BIGINT: { + List result = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else if (element instanceof Number) { + result.add(((Number) element).longValue()); + } else { + throw new IllegalArgumentException("Unsupported element type: " + element.getClass()); + } + } + return result; + } + case LARGEINT: { + List result = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else if (element instanceof BigInteger) { + result.add((BigInteger) element); + } else if (element instanceof BigDecimal) { + result.add(((BigDecimal) element).toBigInteger()); + } else if (element instanceof Number) { + result.add(BigInteger.valueOf(((Number) element).longValue())); + } else { + throw new IllegalArgumentException("Unsupported element type: " + element.getClass()); + } + } + return result; + } + case STRING: { + List result = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else if (element instanceof InetAddress) { + result.add(((InetAddress) element).getHostAddress()); + } else { + result.add(element.toString()); + } + } + return result; + } + case ARRAY: { + List> resultArray = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + resultArray.add(null); + } else { + resultArray.add( + Lists.newArrayList(convertArray((List) element, type.getChildTypes().get(0)))); + } + } + return resultArray; + } + default: + return array; + } + } +} + diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java index b08797d21acfe7..ea570342344f2e 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java @@ -31,6 +31,10 @@ import java.time.LocalDate; import java.time.LocalDateTime; +/** + * @deprecated Use {@link DB2TypeHandler} instead. + */ +@Deprecated public class DB2JdbcExecutor extends BaseJdbcExecutor { public DB2JdbcExecutor(byte[] thriftParams) throws Exception { super(thriftParams); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2TypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2TypeHandler.java new file mode 100644 index 00000000000000..2a1933bccc0a11 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2TypeHandler.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; + +import com.zaxxer.hikari.HikariDataSource; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; + +/** + * DB2-specific type handler. + * Key specializations: + * - Validation query: "select 1 from sysibm.sysdummy1" + * - Uses getObject(Class) for DECIMAL types + */ +public class DB2TypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case SMALLINT: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + return rs.getObject(columnIndex); + case DATE: + case DATEV2: { + Date sqlDate = rs.getDate(columnIndex); + return rs.wasNull() ? null : sqlDate.toLocalDate(); + } + case DATETIME: + case DATETIMEV2: { + Timestamp ts = rs.getTimestamp(columnIndex); + return rs.wasNull() ? null : ts.toLocalDateTime(); + } + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex, String.class); + case VARBINARY: + return rs.getObject(columnIndex, byte[].class); + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public void setValidationQuery(HikariDataSource ds) { + ds.setConnectionTestQuery("select 1 from sysibm.sysdummy1"); + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultTypeHandler.java new file mode 100644 index 00000000000000..ddec833562d166 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultTypeHandler.java @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; + +/** + * Default type handler for databases without specific specializations. + * Provides basic JDBC type reading that works for most databases. + * Database-specific handlers extend this and override methods as needed. + */ +public class DefaultTypeHandler implements JdbcTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + Object value; + switch (type.getType()) { + case BOOLEAN: + value = rs.getBoolean(columnIndex); + break; + case TINYINT: + value = rs.getByte(columnIndex); + break; + case SMALLINT: + value = rs.getShort(columnIndex); + break; + case INT: + value = rs.getInt(columnIndex); + break; + case BIGINT: + value = rs.getLong(columnIndex); + break; + case LARGEINT: + value = rs.getObject(columnIndex); + if (value instanceof BigDecimal) { + value = ((BigDecimal) value).toBigInteger(); + } else if (value != null && !(value instanceof BigInteger)) { + value = new BigInteger(value.toString()); + } + break; + case FLOAT: + value = rs.getFloat(columnIndex); + break; + case DOUBLE: + value = rs.getDouble(columnIndex); + break; + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + value = rs.getBigDecimal(columnIndex); + break; + case DATE: + case DATEV2: { + Date sqlDate = rs.getDate(columnIndex); + value = sqlDate != null ? sqlDate.toLocalDate() : null; + break; + } + case DATETIME: + case DATETIMEV2: { + Timestamp ts = rs.getTimestamp(columnIndex); + value = ts != null ? ts.toLocalDateTime() : null; + break; + } + case CHAR: + case VARCHAR: + case STRING: + value = rs.getString(columnIndex); + break; + case VARBINARY: + case BYTE: { + byte[] data = rs.getBytes(columnIndex); + return rs.wasNull() ? null : data; + } + default: + value = rs.getString(columnIndex); + break; + } + return rs.wasNull() ? null : value; + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + return null; + } + + // ===================================================================== + // Shared utility methods available to all type handlers + // ===================================================================== + + /** + * Trim trailing spaces from a string. Used by Oracle, PostgreSQL, Gbase + * for CHAR type columns that have right-padding. + */ + protected static String trimSpaces(String str) { + if (str == null) { + return null; + } + int end = str.length(); + while (end > 0 && str.charAt(end - 1) == ' ') { + end--; + } + return end < str.length() ? str.substring(0, end) : str; + } + + /** + * Convert java.sql.Time to a properly formatted String. + * Preserves millisecond precision when present (e.g., "16:49:05.123"). + */ + protected static String timeToString(java.sql.Time time) { + if (time == null) { + return null; + } + long milliseconds = Math.abs(time.getTime() % 1000L); + if (milliseconds != 0) { + return String.format("%s.%03d", time, milliseconds); + } else { + return time.toString(); + } + } + + /** + * Convert byte array to hex string with "0x" prefix. + * Default implementation used by most databases. + */ + protected static String defaultByteArrayToHexString(byte[] bytes) { + if (bytes == null) { + return null; + } + StringBuilder hexString = new StringBuilder("0x"); + for (byte b : bytes) { + String hex = Integer.toHexString(0xFF & b); + if (hex.length() == 1) { + hexString.append('0'); + } + hexString.append(hex.toUpperCase()); + } + return hexString.toString(); + } + + /** + * Helper to create a ColumnValueConverter from a converter function. + */ + protected static ColumnValueConverter createConverter( + java.util.function.Function converterFunction, Class type) { + return column -> { + Object[] result = (Object[]) java.lang.reflect.Array.newInstance(type, column.length); + for (int i = 0; i < column.length; i++) { + result[i] = column[i] != null ? converterFunction.apply(column[i]) : null; + } + return result; + }; + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseJdbcExecutor.java index e8e29a8178fb82..f0e098480d33c9 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseJdbcExecutor.java @@ -27,6 +27,10 @@ import java.sql.Timestamp; import java.util.Objects; +/** + * @deprecated Use {@link GbaseTypeHandler} instead. + */ +@Deprecated public class GbaseJdbcExecutor extends BaseJdbcExecutor { public GbaseJdbcExecutor(byte[] thriftParams) throws Exception { diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseTypeHandler.java new file mode 100644 index 00000000000000..e83f45e5fa4428 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/GbaseTypeHandler.java @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; + +/** + * Gbase-specific type handler. + * Key specializations: + * - CHAR trimming + * - Explicit null checks via wasNull() for primitive types + */ +public class GbaseTypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case TINYINT: { + byte val = rs.getByte(columnIndex); + return rs.wasNull() ? null : val; + } + case SMALLINT: { + short val = rs.getShort(columnIndex); + return rs.wasNull() ? null : val; + } + case INT: { + int val = rs.getInt(columnIndex); + return rs.wasNull() ? null : val; + } + case BIGINT: { + long val = rs.getLong(columnIndex); + return rs.wasNull() ? null : val; + } + case FLOAT: { + float val = rs.getFloat(columnIndex); + return rs.wasNull() ? null : val; + } + case DOUBLE: { + double val = rs.getDouble(columnIndex); + return rs.wasNull() ? null : val; + } + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: { + BigDecimal val = rs.getBigDecimal(columnIndex); + return rs.wasNull() ? null : val; + } + case DATE: + case DATEV2: { + Date val = rs.getDate(columnIndex); + return rs.wasNull() ? null : val.toLocalDate(); + } + case DATETIME: + case DATETIMEV2: { + Timestamp val = rs.getTimestamp(columnIndex); + return rs.wasNull() ? null : val.toLocalDateTime(); + } + case CHAR: + case VARCHAR: + case STRING: { + String val = (String) rs.getObject(columnIndex); + return rs.wasNull() ? null : val; + } + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + if (columnType.getType() == ColumnType.Type.CHAR) { + return createConverter(input -> trimSpaces(input.toString()), String.class); + } + return null; + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcConnectionTester.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcConnectionTester.java new file mode 100644 index 00000000000000..e3a2cd3823a832 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcConnectionTester.java @@ -0,0 +1,201 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.cloud.security.SecurityChecker; +import org.apache.doris.common.jni.JniScanner; +import org.apache.doris.common.jni.vec.ColumnType; + +import com.zaxxer.hikari.HikariDataSource; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Collections; +import java.util.Map; + +/** + * JdbcConnectionTester is a lightweight JNI-invocable class for testing JDBC connections. + * It extends JniScanner to reuse the JniConnector infrastructure on the C++ side. + * + *

Usage: C++ creates a JniConnector with this class name and connection params, + * calls open() to test the connection, then close() to clean up. + * + *

The getNext() method is a no-op since this class is only used for testing. + * + *

Parameters: + *

    + *
  • jdbc_url, jdbc_user, jdbc_password, jdbc_driver_class, jdbc_driver_url
  • + *
  • query_sql — the test query to run
  • + *
  • catalog_id, connection_pool_min_size, connection_pool_max_size, etc.
  • + *
  • clean_datasource — if "true", close the datasource pool on close()
  • + *
+ */ +public class JdbcConnectionTester extends JniScanner { + private static final Logger LOG = Logger.getLogger(JdbcConnectionTester.class); + + private final String jdbcUrl; + private final String jdbcUser; + private final String jdbcPassword; + private final String jdbcDriverClass; + private final String jdbcDriverUrl; + private final String querySql; + private final long catalogId; + private final int connectionPoolMinSize; + private final int connectionPoolMaxSize; + private final int connectionPoolMaxWaitTime; + private final int connectionPoolMaxLifeTime; + private final boolean connectionPoolKeepAlive; + private final boolean cleanDatasource; + private final JdbcTypeHandler typeHandler; + + private HikariDataSource hikariDataSource = null; + private Connection conn = null; + private PreparedStatement stmt = null; + private ClassLoader classLoader = null; + + public JdbcConnectionTester(int batchSize, Map params) { + this.jdbcUrl = params.getOrDefault("jdbc_url", ""); + this.jdbcUser = params.getOrDefault("jdbc_user", ""); + this.jdbcPassword = params.getOrDefault("jdbc_password", ""); + this.jdbcDriverClass = params.getOrDefault("jdbc_driver_class", ""); + this.jdbcDriverUrl = params.getOrDefault("jdbc_driver_url", ""); + this.querySql = params.getOrDefault("query_sql", "SELECT 1"); + this.catalogId = Long.parseLong(params.getOrDefault("catalog_id", "0")); + this.connectionPoolMinSize = Integer.parseInt( + params.getOrDefault("connection_pool_min_size", "1")); + this.connectionPoolMaxSize = Integer.parseInt( + params.getOrDefault("connection_pool_max_size", "10")); + this.connectionPoolMaxWaitTime = Integer.parseInt( + params.getOrDefault("connection_pool_max_wait_time", "5000")); + this.connectionPoolMaxLifeTime = Integer.parseInt( + params.getOrDefault("connection_pool_max_life_time", "1800000")); + this.connectionPoolKeepAlive = "true".equalsIgnoreCase( + params.getOrDefault("connection_pool_keep_alive", "false")); + this.cleanDatasource = "true".equalsIgnoreCase( + params.getOrDefault("clean_datasource", "false")); + + // Select database-specific type handler for validation query + String tableType = params.getOrDefault("table_type", ""); + this.typeHandler = JdbcTypeHandlerFactory.create(tableType); + + // Initialize with a dummy schema since this is only for connection testing + initTableInfo(new ColumnType[] {ColumnType.parseType("result", "int")}, + new String[] {"result"}, batchSize); + } + + /** + * Open the connection and execute the test query to verify connectivity. + */ + @Override + public void open() throws IOException { + ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); + try { + URL[] urls = {new URL(jdbcDriverUrl)}; + ClassLoader parent = getClass().getClassLoader(); + this.classLoader = URLClassLoader.newInstance(urls, parent); + Thread.currentThread().setContextClassLoader(classLoader); + + String cacheKey = createCacheKey(); + hikariDataSource = JdbcDataSource.getDataSource().getSource(cacheKey); + if (hikariDataSource == null) { + synchronized (JdbcConnectionTester.class) { + hikariDataSource = JdbcDataSource.getDataSource().getSource(cacheKey); + if (hikariDataSource == null) { + HikariDataSource ds = new HikariDataSource(); + ds.setDriverClassName(jdbcDriverClass); + ds.setJdbcUrl(SecurityChecker.getInstance().getSafeJdbcUrl(jdbcUrl)); + ds.setUsername(jdbcUser); + ds.setPassword(jdbcPassword); + ds.setMinimumIdle(connectionPoolMinSize); + ds.setMaximumPoolSize(connectionPoolMaxSize); + ds.setConnectionTimeout(connectionPoolMaxWaitTime); + ds.setMaxLifetime(connectionPoolMaxLifeTime); + ds.setIdleTimeout(connectionPoolMaxLifeTime / 2L); + // Use type handler for database-specific validation query + // (e.g. Oracle: "SELECT 1 FROM dual", DB2: "select 1 from sysibm.sysdummy1") + typeHandler.setValidationQuery(ds); + if (connectionPoolKeepAlive) { + ds.setKeepaliveTime(connectionPoolMaxLifeTime / 5L); + } + hikariDataSource = ds; + JdbcDataSource.getDataSource().putSource(cacheKey, hikariDataSource); + } + } + } + + conn = hikariDataSource.getConnection(); + stmt = conn.prepareStatement(querySql); + ResultSet rs = stmt.executeQuery(); + if (!rs.next()) { + throw new IOException( + "Failed to test connection: query executed but returned no results."); + } + rs.close(); + LOG.info("JdbcConnectionTester: connection test succeeded for " + jdbcUrl); + } catch (Exception e) { + throw new IOException("Failed to test JDBC connection: " + e.getMessage(), e); + } finally { + Thread.currentThread().setContextClassLoader(oldClassLoader); + } + } + + /** + * No-op: connection tester does not read data. + */ + @Override + protected int getNext() throws IOException { + return 0; + } + + @Override + public void close() throws IOException { + try { + if (stmt != null && !stmt.isClosed()) { + stmt.close(); + } + if (conn != null && !conn.isClosed()) { + conn.close(); + } + } catch (SQLException e) { + LOG.warn("JdbcConnectionTester close error: " + e.getMessage(), e); + } finally { + stmt = null; + conn = null; + if (cleanDatasource && hikariDataSource != null) { + hikariDataSource.close(); + JdbcDataSource.getDataSource().getSourcesMap().remove(createCacheKey()); + hikariDataSource = null; + } + } + } + + @Override + public Map getStatistics() { + return Collections.emptyMap(); + } + + private String createCacheKey() { + return catalogId + "#" + jdbcUrl + "#" + jdbcUser + "#" + jdbcDriverClass; + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java index a254924cbb3bc3..9dd1a931293fd3 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java @@ -111,4 +111,19 @@ private synchronized void startCleanupTask() { restartCleanupTask(); } } + + /** + * Create a cache key for connection pool lookup. + * The key includes all connection parameters so that any change + * (e.g. password via ALTER RESOURCE) results in a new pool. + */ + public static String createCacheKey(long catalogId, String jdbcUrl, String jdbcUser, + String jdbcPassword, String jdbcDriverUrl, String jdbcDriverClass, + int connectionPoolMinSize, int connectionPoolMaxSize, + int connectionPoolMaxLifeTime, int connectionPoolMaxWaitTime, + boolean connectionPoolKeepAlive) { + return catalogId + jdbcUrl + jdbcUser + jdbcPassword + jdbcDriverUrl + jdbcDriverClass + + connectionPoolMinSize + connectionPoolMaxSize + connectionPoolMaxLifeTime + + connectionPoolMaxWaitTime + connectionPoolKeepAlive; + } } diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java index 65c8c7c7920f05..6adb6d861cdf3d 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java @@ -19,6 +19,10 @@ import java.util.Map; +/** + * @deprecated Use {@link JdbcJniScanner} for reads and {@link JdbcJniWriter} for writes instead. + */ +@Deprecated public interface JdbcExecutor { int read() throws JdbcExecutorException; diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorException.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorException.java index 7486ee54001c65..98f4c13dc29ec5 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorException.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorException.java @@ -17,6 +17,10 @@ package org.apache.doris.jdbc; +/** + * @deprecated Legacy exception class for the old JdbcExecutor framework. + */ +@Deprecated public class JdbcExecutorException extends Exception { public JdbcExecutorException(String msg, Throwable cause) { super(msg, cause); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorFactory.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorFactory.java index c5866fcc353b45..db1e444cd6efc7 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorFactory.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutorFactory.java @@ -19,6 +19,13 @@ import org.apache.doris.thrift.TOdbcTableType; +/** + * @deprecated No longer called from C++ side after JdbcConnector removal. + * The old C++ JdbcConnector used this to dynamically load database-specific executor classes. + * New architecture uses JdbcJniScanner (for reads) and JdbcJniWriter (for writes) directly. + * Retained temporarily until BaseJdbcExecutor class hierarchy is fully cleaned up. + */ +@Deprecated public class JdbcExecutorFactory { public static String getExecutorClass(TOdbcTableType type) { switch (type) { diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcJniScanner.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcJniScanner.java new file mode 100644 index 00000000000000..780abb60e011a4 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcJniScanner.java @@ -0,0 +1,383 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.cloud.security.SecurityChecker; +import org.apache.doris.common.jni.JniScanner; +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import com.zaxxer.hikari.HikariDataSource; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.lang.reflect.Array; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * JdbcJniScanner reads data from JDBC sources via the unified JniScanner framework. + * It extends JniScanner to integrate with the JniConnector/JniReader system on the C++ side, + * following the same pattern as PaimonJniScanner, HudiJniScanner, etc. + * + *

This class uses the {@link JdbcTypeHandler} strategy pattern for database-specific + * type handling. The appropriate handler is selected via {@link JdbcTypeHandlerFactory} + * based on the "table_type" parameter. + * + *

Parameters (passed via constructor params map): + *

    + *
  • jdbc_url - JDBC connection URL
  • + *
  • jdbc_user - database user
  • + *
  • jdbc_password - database password
  • + *
  • jdbc_driver_class - JDBC driver class name
  • + *
  • jdbc_driver_url - path to driver JAR
  • + *
  • query_sql - the SELECT SQL to execute
  • + *
  • catalog_id - catalog ID for connection pool keying
  • + *
  • table_type - database type (MYSQL, ORACLE, POSTGRESQL, etc.)
  • + *
  • connection_pool_min_size - min connection pool size
  • + *
  • connection_pool_max_size - max connection pool size
  • + *
  • connection_pool_max_wait_time - max wait time (ms)
  • + *
  • connection_pool_max_life_time - max lifetime (ms)
  • + *
  • connection_pool_keep_alive - "true"/"false"
  • + *
  • required_fields - comma-separated output field names
  • + *
  • columns_types - #-separated column type strings
  • + *
+ */ +public class JdbcJniScanner extends JniScanner { + private static final Logger LOG = Logger.getLogger(JdbcJniScanner.class); + + private final String jdbcUrl; + private final String jdbcUser; + private final String jdbcPassword; + private final String jdbcDriverClass; + private final String jdbcDriverUrl; + private final String querySql; + private final long catalogId; + private final int connectionPoolMinSize; + private final int connectionPoolMaxSize; + private final int connectionPoolMaxWaitTime; + private final int connectionPoolMaxLifeTime; + private final boolean connectionPoolKeepAlive; + + // Database-specific type handling strategy + private final JdbcTypeHandler typeHandler; + // Per-column output converters, initialized once per scan + private ColumnValueConverter[] outputConverters; + + private HikariDataSource hikariDataSource = null; + private Connection conn = null; + private PreparedStatement stmt = null; + private ResultSet resultSet = null; + private ResultSetMetaData resultSetMetaData = null; + private ClassLoader classLoader = null; + + // Read state + private boolean resultSetOpened = false; + private List block = null; + // Per-column replace strings for special type handling (bitmap, hll) + private String[] replaceStringList; + // Mapping from field index (in types[]/fields[]) to JDBC ResultSet column index (1-based). + // The slot descriptor order may differ from the query SQL column order. + private int[] columnIndexMapping; + + // Statistics + private long readRows = 0; + private long readTime = 0; + + public JdbcJniScanner(int batchSize, Map params) { + this.jdbcUrl = params.getOrDefault("jdbc_url", ""); + this.jdbcUser = params.getOrDefault("jdbc_user", ""); + this.jdbcPassword = params.getOrDefault("jdbc_password", ""); + this.jdbcDriverClass = params.getOrDefault("jdbc_driver_class", ""); + this.jdbcDriverUrl = params.getOrDefault("jdbc_driver_url", ""); + this.querySql = params.getOrDefault("query_sql", ""); + this.catalogId = Long.parseLong(params.getOrDefault("catalog_id", "0")); + this.connectionPoolMinSize = Integer.parseInt( + params.getOrDefault("connection_pool_min_size", "1")); + this.connectionPoolMaxSize = Integer.parseInt( + params.getOrDefault("connection_pool_max_size", "10")); + this.connectionPoolMaxWaitTime = Integer.parseInt( + params.getOrDefault("connection_pool_max_wait_time", "5000")); + this.connectionPoolMaxLifeTime = Integer.parseInt( + params.getOrDefault("connection_pool_max_life_time", "1800000")); + this.connectionPoolKeepAlive = "true".equalsIgnoreCase( + params.getOrDefault("connection_pool_keep_alive", "false")); + + // Select database-specific type handler + String tableType = params.getOrDefault("table_type", ""); + this.typeHandler = JdbcTypeHandlerFactory.create(tableType); + + String requiredFields = params.getOrDefault("required_fields", ""); + String columnsTypes = params.getOrDefault("columns_types", ""); + String[] fieldArr = requiredFields.isEmpty() ? new String[0] : requiredFields.split(","); + ColumnType[] typeArr; + if (columnsTypes.isEmpty()) { + typeArr = new ColumnType[0]; + } else { + String[] typeStrs = columnsTypes.split("#"); + typeArr = new ColumnType[typeStrs.length]; + for (int i = 0; i < typeStrs.length; i++) { + typeArr[i] = ColumnType.parseType(fieldArr[i], typeStrs[i]); + } + } + initTableInfo(typeArr, fieldArr, batchSize); + + // Parse replace_string for special type handling (bitmap, hll, etc.) + String replaceString = params.getOrDefault("replace_string", ""); + if (!replaceString.isEmpty()) { + replaceStringList = replaceString.split(","); + } else { + replaceStringList = new String[fieldArr.length]; + for (int i = 0; i < fieldArr.length; i++) { + replaceStringList[i] = "not_replace"; + } + } + } + + @Override + public void open() throws IOException { + ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); + try { + // HikariCP's setDriverClassName() uses the thread context classloader + // to load the driver class. + initializeClassLoaderAndDataSource(); + + conn = hikariDataSource.getConnection(); + + // Use type handler to create the statement with database-specific settings + stmt = typeHandler.initializeStatement(conn, querySql, batchSize); + + LOG.info("JdbcJniScanner: Executing query: " + querySql); + resultSet = stmt.executeQuery(); + resultSetMetaData = resultSet.getMetaData(); + resultSetOpened = true; + + // Build column name -> JDBC ResultSet index mapping. + // The slot descriptors (fields[]) may be in a different order than the + // query SQL columns. We must map by name to avoid reading data with + // the wrong type handler. + columnIndexMapping = new int[fields.length]; + int rsColumnCount = resultSetMetaData.getColumnCount(); + Map rsColumnMap = new HashMap<>(rsColumnCount); + for (int i = 1; i <= rsColumnCount; i++) { + String colName = resultSetMetaData.getColumnLabel(i).toLowerCase(); + rsColumnMap.put(colName, i); + } + for (int i = 0; i < fields.length; i++) { + String fieldName = fields[i].toLowerCase(); + Integer rsIdx = rsColumnMap.get(fieldName); + if (rsIdx != null) { + columnIndexMapping[i] = rsIdx; + } else { + // Fallback to positional mapping if name not found + columnIndexMapping[i] = i + 1; + LOG.warn("Column '" + fields[i] + "' not found in ResultSet by name, " + + "falling back to positional index " + (i + 1)); + } + } + + block = new ArrayList<>(types.length); + + // Initialize per-column output converters once + outputConverters = new ColumnValueConverter[types.length]; + for (int i = 0; i < types.length; i++) { + String replaceStr = (replaceStringList != null && i < replaceStringList.length) + ? replaceStringList[i] : "not_replace"; + outputConverters[i] = typeHandler.getOutputConverter(types[i], replaceStr); + } + } catch (Exception e) { + LOG.warn("JdbcJniScanner " + jdbcUrl + " open failed: " + e.getMessage(), e); + throw new IOException("JdbcJniScanner open failed: " + e.getMessage(), e); + } finally { + Thread.currentThread().setContextClassLoader(oldClassLoader); + } + } + + @Override + protected int getNext() throws IOException { + try { + if (!resultSetOpened || resultSet == null) { + return 0; + } + + long startRead = System.nanoTime(); + + // Initialize block arrays for this batch + block.clear(); + for (int i = 0; i < types.length; i++) { + String replaceStr = (replaceStringList != null && i < replaceStringList.length) + ? replaceStringList[i] : "not_replace"; + if ("bitmap".equals(replaceStr) || "hll".equals(replaceStr) + || "quantile_state".equals(replaceStr)) { + // bitmap/hll/quantile_state columns: use byte[][] for raw binary data + block.add(new byte[batchSize][]); + } else if (outputConverters[i] != null) { + // When a converter exists, the raw value from getColumnValue() may have a + // different type than the final column type. For example, MySQL returns ARRAY + // columns as JSON Strings, but newObjectContainerArray() creates ArrayList[]. + // Use Object[] to avoid ArrayStoreException; the converter will produce + // the correctly typed array before data is written to VectorTable. + block.add(new Object[batchSize]); + } else { + block.add(vectorTable.getColumn(i).newObjectContainerArray(batchSize)); + } + } + + int curRows = 0; + while (curRows < batchSize) { + if (!resultSet.next()) { + break; + } + for (int col = 0; col < types.length; col++) { + int columnIndex = columnIndexMapping[col]; + String replaceStr = (replaceStringList != null && col < replaceStringList.length) + ? replaceStringList[col] : "not_replace"; + if ("bitmap".equals(replaceStr) || "hll".equals(replaceStr) + || "quantile_state".equals(replaceStr)) { + // bitmap/hll: read raw bytes directly + byte[] data = resultSet.getBytes(columnIndex); + block.get(col)[curRows] = resultSet.wasNull() ? null : data; + } else { + // Use type handler for database-specific value extraction + Object value = typeHandler.getColumnValue( + resultSet, columnIndex, types[col], resultSetMetaData); + block.get(col)[curRows] = value; + } + } + curRows++; + } + + if (curRows > 0) { + for (int col = 0; col < types.length; col++) { + Object[] columnData = block.get(col); + if (curRows < batchSize) { + // Trim to actual size + Class componentType = columnData.getClass().getComponentType(); + Object[] trimmed = (Object[]) Array.newInstance(componentType, curRows); + System.arraycopy(columnData, 0, trimmed, 0, curRows); + columnData = trimmed; + } + // Apply column-level output converter if present + if (outputConverters[col] != null) { + columnData = outputConverters[col].convert(columnData); + } + vectorTable.appendData(col, columnData, null, true); + } + } + + readTime += System.nanoTime() - startRead; + readRows += curRows; + return curRows; + } catch (Exception e) { + LOG.warn("JdbcJniScanner getNext failed: " + e.getMessage(), e); + throw new IOException("JdbcJniScanner getNext failed: " + e.getMessage(), e); + } + } + + @Override + public void close() throws IOException { + try { + // Use type handler for database-specific connection abort + if (conn != null && resultSet != null) { + typeHandler.abortReadConnection(conn, resultSet); + } + } catch (Exception e) { + LOG.warn("JdbcJniScanner abort connection error: " + e.getMessage(), e); + } + try { + if (resultSet != null && !resultSet.isClosed()) { + resultSet.close(); + } + if (stmt != null && !stmt.isClosed()) { + stmt.close(); + } + if (conn != null && !conn.isClosed()) { + conn.close(); + } + } catch (Exception e) { + LOG.warn("JdbcJniScanner close error: " + e.getMessage(), e); + } finally { + resultSet = null; + stmt = null; + conn = null; + if (connectionPoolMinSize == 0 && hikariDataSource != null) { + hikariDataSource.close(); + JdbcDataSource.getDataSource().getSourcesMap().remove(createCacheKey()); + hikariDataSource = null; + } + } + } + + @Override + public Map getStatistics() { + Map stats = new HashMap<>(); + stats.put("counter:ReadRows", String.valueOf(readRows)); + stats.put("timer:ReadTime", String.valueOf(readTime)); + return stats; + } + + private void initializeClassLoaderAndDataSource() throws Exception { + java.net.URL[] urls = {new java.net.URL(jdbcDriverUrl)}; + ClassLoader parent = getClass().getClassLoader(); + this.classLoader = java.net.URLClassLoader.newInstance(urls, parent); + // Must set thread context classloader BEFORE creating HikariDataSource, + // because HikariCP's setDriverClassName() loads the driver class from + // the thread context classloader. + Thread.currentThread().setContextClassLoader(classLoader); + + String cacheKey = createCacheKey(); + hikariDataSource = JdbcDataSource.getDataSource().getSource(cacheKey); + if (hikariDataSource == null) { + synchronized (JdbcJniScanner.class) { + hikariDataSource = JdbcDataSource.getDataSource().getSource(cacheKey); + if (hikariDataSource == null) { + HikariDataSource ds = new HikariDataSource(); + ds.setDriverClassName(jdbcDriverClass); + ds.setJdbcUrl(SecurityChecker.getInstance().getSafeJdbcUrl(jdbcUrl)); + ds.setUsername(jdbcUser); + ds.setPassword(jdbcPassword); + ds.setMinimumIdle(connectionPoolMinSize); + ds.setMaximumPoolSize(connectionPoolMaxSize); + ds.setConnectionTimeout(connectionPoolMaxWaitTime); + ds.setMaxLifetime(connectionPoolMaxLifeTime); + ds.setIdleTimeout(connectionPoolMaxLifeTime / 2L); + // Use type handler for database-specific validation query + typeHandler.setValidationQuery(ds); + if (connectionPoolKeepAlive) { + ds.setKeepaliveTime(connectionPoolMaxLifeTime / 5L); + } + hikariDataSource = ds; + JdbcDataSource.getDataSource().putSource(cacheKey, hikariDataSource); + LOG.info("JdbcJniScanner: Created connection pool for " + jdbcUrl); + } + } + } + } + + private String createCacheKey() { + return JdbcDataSource.createCacheKey(catalogId, jdbcUrl, jdbcUser, jdbcPassword, + jdbcDriverUrl, jdbcDriverClass, connectionPoolMinSize, connectionPoolMaxSize, + connectionPoolMaxLifeTime, connectionPoolMaxWaitTime, connectionPoolKeepAlive); + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcJniWriter.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcJniWriter.java new file mode 100644 index 00000000000000..0ba713399b3e79 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcJniWriter.java @@ -0,0 +1,408 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.cloud.security.SecurityChecker; +import org.apache.doris.common.jni.JniWriter; +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.VectorColumn; +import org.apache.doris.common.jni.vec.VectorTable; + +import com.zaxxer.hikari.HikariDataSource; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.Date; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.sql.Types; +import java.util.HashMap; +import java.util.Map; + +/** + * JdbcJniWriter writes C++ Block data to JDBC targets via PreparedStatement batch inserts. + * It extends JniWriter to integrate with the unified VJniFormatTransformer framework, + * following the same pattern as MaxComputeJniWriter. + * + *

Lifecycle (managed by C++ VJniFormatTransformer): + *

+ *   open() -> write() [repeated] -> close()
+ * 
+ * + *

Transaction control is exposed via getStatistics() responses and + * additional JNI method calls from C++ side. + * + *

Parameters (passed via constructor params map): + *

    + *
  • jdbc_url - JDBC connection URL
  • + *
  • jdbc_user - database user
  • + *
  • jdbc_password - database password
  • + *
  • jdbc_driver_class - JDBC driver class name
  • + *
  • jdbc_driver_url - path to driver JAR
  • + *
  • jdbc_driver_checksum - MD5 checksum of driver JAR
  • + *
  • insert_sql - INSERT SQL with ? placeholders
  • + *
  • use_transaction - "true"/"false"
  • + *
  • catalog_id - catalog ID for connection pool keying
  • + *
  • connection_pool_min_size - min pool size
  • + *
  • connection_pool_max_size - max pool size
  • + *
  • connection_pool_max_wait_time - max wait time (ms)
  • + *
  • connection_pool_max_life_time - max lifetime (ms)
  • + *
  • connection_pool_keep_alive - "true"/"false"
  • + *
+ */ +public class JdbcJniWriter extends JniWriter { + private static final Logger LOG = Logger.getLogger(JdbcJniWriter.class); + + private final String jdbcUrl; + private final String jdbcUser; + private final String jdbcPassword; + private final String jdbcDriverClass; + private final String jdbcDriverUrl; + private final String jdbcDriverChecksum; + private final String insertSql; + private final boolean useTransaction; + private final long catalogId; + private final int connectionPoolMinSize; + private final int connectionPoolMaxSize; + private final int connectionPoolMaxWaitTime; + private final int connectionPoolMaxLifeTime; + private final boolean connectionPoolKeepAlive; + + private HikariDataSource hikariDataSource = null; + private Connection conn = null; + private PreparedStatement preparedStatement = null; + private ClassLoader classLoader = null; + + // Statistics + private long writtenRows = 0; + private long insertTime = 0; + + public JdbcJniWriter(int batchSize, Map params) { + super(batchSize, params); + this.jdbcUrl = params.getOrDefault("jdbc_url", ""); + this.jdbcUser = params.getOrDefault("jdbc_user", ""); + this.jdbcPassword = params.getOrDefault("jdbc_password", ""); + this.jdbcDriverClass = params.getOrDefault("jdbc_driver_class", ""); + this.jdbcDriverUrl = params.getOrDefault("jdbc_driver_url", ""); + this.jdbcDriverChecksum = params.getOrDefault("jdbc_driver_checksum", ""); + this.insertSql = params.getOrDefault("insert_sql", ""); + this.useTransaction = "true".equalsIgnoreCase(params.getOrDefault("use_transaction", "false")); + this.catalogId = Long.parseLong(params.getOrDefault("catalog_id", "0")); + this.connectionPoolMinSize = Integer.parseInt(params.getOrDefault("connection_pool_min_size", "1")); + this.connectionPoolMaxSize = Integer.parseInt(params.getOrDefault("connection_pool_max_size", "10")); + this.connectionPoolMaxWaitTime = Integer.parseInt( + params.getOrDefault("connection_pool_max_wait_time", "5000")); + this.connectionPoolMaxLifeTime = Integer.parseInt( + params.getOrDefault("connection_pool_max_life_time", "1800000")); + this.connectionPoolKeepAlive = "true".equalsIgnoreCase( + params.getOrDefault("connection_pool_keep_alive", "false")); + } + + @Override + public void open() throws IOException { + ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); + try { + initializeClassLoaderAndDataSource(); + + conn = hikariDataSource.getConnection(); + + if (useTransaction) { + conn.setAutoCommit(false); + } + + LOG.debug("JdbcJniWriter: Preparing insert statement: " + insertSql); + preparedStatement = conn.prepareStatement(insertSql); + } catch (Exception e) { + throw new IOException("JdbcJniWriter open failed: " + e.getMessage(), e); + } finally { + Thread.currentThread().setContextClassLoader(oldClassLoader); + } + } + + @Override + protected void writeInternal(VectorTable inputTable) throws IOException { + try { + long startInsert = System.nanoTime(); + int numRows = inputTable.getNumRows(); + VectorColumn[] columns = inputTable.getColumns(); + + for (int i = 0; i < numRows; ++i) { + for (int j = 0; j < columns.length; ++j) { + insertColumn(i, j, columns[j]); + } + preparedStatement.addBatch(); + } + preparedStatement.executeBatch(); + preparedStatement.clearBatch(); + + insertTime += System.nanoTime() - startInsert; + writtenRows += numRows; + } catch (SQLException e) { + throw new IOException("JdbcJniWriter write failed: " + e.getMessage(), e); + } + } + + @Override + public void close() throws IOException { + try { + // Commit transaction before closing if useTransaction is enabled. + // autoCommit was set to false in open(), so without explicit commit() + // the JDBC driver will roll back all writes on connection close. + if (useTransaction && conn != null && !conn.isClosed()) { + conn.commit(); + } + if (preparedStatement != null && !preparedStatement.isClosed()) { + preparedStatement.close(); + } + if (conn != null && !conn.isClosed()) { + conn.close(); + } + } catch (SQLException e) { + // If commit or close fails, attempt rollback before rethrowing + try { + if (useTransaction && conn != null && !conn.isClosed()) { + conn.rollback(); + } + } catch (SQLException rollbackEx) { + LOG.warn("JdbcJniWriter rollback on close failure also failed: " + + rollbackEx.getMessage(), rollbackEx); + } + throw new IOException("JdbcJniWriter close failed: " + e.getMessage(), e); + } finally { + preparedStatement = null; + conn = null; + if (connectionPoolMinSize == 0 && hikariDataSource != null) { + hikariDataSource.close(); + JdbcDataSource.getDataSource().getSourcesMap() + .remove(createCacheKey()); + hikariDataSource = null; + } + } + } + + // === Transaction control methods (called by C++ via JNI) === + + public void openTrans() throws IOException { + try { + if (conn != null) { + conn.setAutoCommit(false); + } + } catch (SQLException e) { + throw new IOException("JdbcJniWriter openTrans failed: " + e.getMessage(), e); + } + } + + public void commitTrans() throws IOException { + try { + if (conn != null) { + conn.commit(); + } + } catch (SQLException e) { + throw new IOException("JdbcJniWriter commitTrans failed: " + e.getMessage(), e); + } + } + + public void rollbackTrans() throws IOException { + try { + if (conn != null) { + conn.rollback(); + } + } catch (SQLException e) { + throw new IOException("JdbcJniWriter rollbackTrans failed: " + e.getMessage(), e); + } + } + + @Override + public Map getStatistics() { + Map stats = new HashMap<>(); + stats.put("counter:WrittenRows", String.valueOf(writtenRows)); + stats.put("timer:InsertTime", String.valueOf(insertTime)); + stats.put("timer:WriteTime", String.valueOf(writeTime)); + stats.put("timer:ReadTableTime", String.valueOf(readTableTime)); + return stats; + } + + // ===================================================================== + // Private helpers — adapted from BaseJdbcExecutor.insert/insertColumn + // ===================================================================== + + private void insertColumn(int rowIdx, int colIdx, VectorColumn column) throws SQLException { + int parameterIndex = colIdx + 1; + ColumnType.Type dorisType = column.getColumnPrimitiveType(); + if (column.isNullAt(rowIdx)) { + insertNullColumn(parameterIndex, dorisType); + return; + } + switch (dorisType) { + case BOOLEAN: + preparedStatement.setBoolean(parameterIndex, column.getBoolean(rowIdx)); + break; + case TINYINT: + preparedStatement.setByte(parameterIndex, column.getByte(rowIdx)); + break; + case SMALLINT: + preparedStatement.setShort(parameterIndex, column.getShort(rowIdx)); + break; + case INT: + preparedStatement.setInt(parameterIndex, column.getInt(rowIdx)); + break; + case BIGINT: + preparedStatement.setLong(parameterIndex, column.getLong(rowIdx)); + break; + case LARGEINT: + preparedStatement.setObject(parameterIndex, column.getBigInteger(rowIdx)); + break; + case FLOAT: + preparedStatement.setFloat(parameterIndex, column.getFloat(rowIdx)); + break; + case DOUBLE: + preparedStatement.setDouble(parameterIndex, column.getDouble(rowIdx)); + break; + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + preparedStatement.setBigDecimal(parameterIndex, column.getDecimal(rowIdx)); + break; + case DATEV2: + preparedStatement.setDate(parameterIndex, Date.valueOf(column.getDate(rowIdx))); + break; + case DATETIMEV2: + preparedStatement.setTimestamp( + parameterIndex, Timestamp.valueOf(column.getDateTime(rowIdx))); + break; + case TIMESTAMPTZ: + preparedStatement.setObject( + parameterIndex, Timestamp.valueOf(column.getTimeStampTz(rowIdx))); + break; + case CHAR: + case VARCHAR: + case STRING: + preparedStatement.setString(parameterIndex, column.getStringWithOffset(rowIdx)); + break; + case BINARY: + case VARBINARY: + preparedStatement.setBytes(parameterIndex, column.getBytesVarbinary(rowIdx)); + break; + default: + throw new RuntimeException("Unknown type value: " + dorisType); + } + } + + private void insertNullColumn(int parameterIndex, ColumnType.Type dorisType) + throws SQLException { + switch (dorisType) { + case BOOLEAN: + preparedStatement.setNull(parameterIndex, Types.BOOLEAN); + break; + case TINYINT: + preparedStatement.setNull(parameterIndex, Types.TINYINT); + break; + case SMALLINT: + preparedStatement.setNull(parameterIndex, Types.SMALLINT); + break; + case INT: + preparedStatement.setNull(parameterIndex, Types.INTEGER); + break; + case BIGINT: + preparedStatement.setNull(parameterIndex, Types.BIGINT); + break; + case LARGEINT: + preparedStatement.setNull(parameterIndex, Types.JAVA_OBJECT); + break; + case FLOAT: + preparedStatement.setNull(parameterIndex, Types.FLOAT); + break; + case DOUBLE: + preparedStatement.setNull(parameterIndex, Types.DOUBLE); + break; + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + preparedStatement.setNull(parameterIndex, Types.DECIMAL); + break; + case DATEV2: + preparedStatement.setNull(parameterIndex, Types.DATE); + break; + case DATETIMEV2: + preparedStatement.setNull(parameterIndex, Types.TIMESTAMP); + break; + case TIMESTAMPTZ: + preparedStatement.setNull(parameterIndex, Types.TIMESTAMP_WITH_TIMEZONE); + break; + case CHAR: + case VARCHAR: + case STRING: + preparedStatement.setNull(parameterIndex, Types.VARCHAR); + break; + case BINARY: + case VARBINARY: + preparedStatement.setNull(parameterIndex, Types.VARBINARY); + break; + default: + throw new RuntimeException("Unknown type value: " + dorisType); + } + } + + private void initializeClassLoaderAndDataSource() throws Exception { + java.net.URL[] urls = {new java.net.URL(jdbcDriverUrl)}; + ClassLoader parent = getClass().getClassLoader(); + this.classLoader = java.net.URLClassLoader.newInstance(urls, parent); + // Must set thread context classloader BEFORE creating HikariDataSource, + // because HikariCP's setDriverClassName() loads the driver class from + // the thread context classloader. + Thread.currentThread().setContextClassLoader(classLoader); + + String cacheKey = createCacheKey(); + hikariDataSource = JdbcDataSource.getDataSource().getSource(cacheKey); + if (hikariDataSource == null) { + synchronized (JdbcJniWriter.class) { + hikariDataSource = JdbcDataSource.getDataSource().getSource(cacheKey); + if (hikariDataSource == null) { + HikariDataSource ds = new HikariDataSource(); + ds.setDriverClassName(jdbcDriverClass); + ds.setJdbcUrl(SecurityChecker.getInstance().getSafeJdbcUrl(jdbcUrl)); + ds.setUsername(jdbcUser); + ds.setPassword(jdbcPassword); + ds.setMinimumIdle(connectionPoolMinSize); + ds.setMaximumPoolSize(connectionPoolMaxSize); + ds.setConnectionTimeout(connectionPoolMaxWaitTime); + ds.setMaxLifetime(connectionPoolMaxLifeTime); + ds.setIdleTimeout(connectionPoolMaxLifeTime / 2L); + // Do not set connectionTestQuery — HikariCP defaults to JDBC4 isValid() + // which works across all databases (Oracle, MySQL, PostgreSQL, etc.) + if (connectionPoolKeepAlive) { + ds.setKeepaliveTime(connectionPoolMaxLifeTime / 5L); + } + hikariDataSource = ds; + JdbcDataSource.getDataSource().putSource(cacheKey, hikariDataSource); + LOG.info("JdbcJniWriter: Created connection pool for " + jdbcUrl); + } + } + } + } + + private String createCacheKey() { + return JdbcDataSource.createCacheKey(catalogId, jdbcUrl, jdbcUser, jdbcPassword, + jdbcDriverUrl, jdbcDriverClass, connectionPoolMinSize, connectionPoolMaxSize, + connectionPoolMaxLifeTime, connectionPoolMaxWaitTime, connectionPoolKeepAlive); + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcTypeHandler.java new file mode 100644 index 00000000000000..9c3be26e2b41d4 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcTypeHandler.java @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import com.zaxxer.hikari.HikariDataSource; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; + +/** + * Strategy interface for database-specific JDBC type handling. + * + *

Each supported database type (MySQL, Oracle, PostgreSQL, etc.) has an implementation + * that customizes how column values are read from ResultSets, how connections are configured, + * and how output values are converted. + * + *

Used by {@link JdbcJniScanner} to provide database-specific behavior without + * requiring the old {@link BaseJdbcExecutor} class hierarchy. + */ +public interface JdbcTypeHandler { + + /** + * Extract a column value from the ResultSet with database-specific handling. + * + * @param rs ResultSet positioned at the current row + * @param columnIndex 1-based column index + * @param type Doris column type + * @param metadata ResultSetMetaData (for inspecting JDBC column type) + * @return the column value, or null if the value is SQL NULL + */ + Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException; + + /** + * Create a converter for post-processing column values before writing to VectorTable. + * Returns null if no conversion is needed for this column type. + * + * @param columnType Doris column type + * @param replaceString replacement string indicator ("bitmap", "hll", "jsonb", or "not_replace") + * @return a converter, or null + */ + ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString); + + /** + * Set the HikariCP validation query for connection health checks. + * Default: "SELECT 1". Override for databases with different syntax + * (e.g., Oracle: "SELECT 1 FROM DUAL", DB2: "SELECT 1 FROM SYSIBM.SYSDUMMY1"). + */ + default void setValidationQuery(HikariDataSource ds) { + ds.setConnectionTestQuery("SELECT 1"); + } + + /** + * Initialize a PreparedStatement with database-specific settings. + * Default: TYPE_FORWARD_ONLY, CONCUR_READ_ONLY, fetchSize as given. + * Override for databases that need special streaming settings (e.g., MySQL: Integer.MIN_VALUE). + */ + default PreparedStatement initializeStatement(Connection conn, String sql, + int fetchSize) throws SQLException { + PreparedStatement stmt = conn.prepareStatement(sql, + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchSize(fetchSize); + return stmt; + } + + /** + * Abort a read connection properly before closing resources. + * Some databases (MySQL, SQLServer) need to abort() the connection to avoid + * draining all remaining results, which can cause hangs on large result sets. + */ + default void abortReadConnection(Connection conn, ResultSet rs) throws SQLException { + // default: no-op + } + + /** + * Set JVM-level system properties needed by specific JDBC drivers. + * Called once during scanner initialization. + */ + default void setSystemProperties() { + System.setProperty("com.zaxxer.hikari.useWeakReferences", "true"); + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcTypeHandlerFactory.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcTypeHandlerFactory.java new file mode 100644 index 00000000000000..4a6f045fb868c7 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcTypeHandlerFactory.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +/** + * Factory to create the appropriate {@link JdbcTypeHandler} based on the database type. + */ +public class JdbcTypeHandlerFactory { + + /** + * Create a type handler for the given database type string. + * + * @param tableType database type name (case-insensitive), matching TOdbcTableType enum names + * @return the appropriate type handler + */ + public static JdbcTypeHandler create(String tableType) { + if (tableType == null || tableType.isEmpty()) { + return new DefaultTypeHandler(); + } + switch (tableType.toUpperCase()) { + case "MYSQL": + case "OCEANBASE": + return new MySQLTypeHandler(tableType); + case "ORACLE": + case "OCEANBASE_ORACLE": + return new OracleTypeHandler(); + case "POSTGRESQL": + return new PostgreSQLTypeHandler(); + case "CLICKHOUSE": + return new ClickHouseTypeHandler(); + case "SQLSERVER": + return new SQLServerTypeHandler(); + case "DB2": + return new DB2TypeHandler(); + case "SAP_HANA": + return new SapHanaTypeHandler(); + case "TRINO": + case "PRESTO": + return new TrinoTypeHandler(); + case "GBASE": + return new GbaseTypeHandler(); + default: + return new DefaultTypeHandler(); + } + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java index 266eecce933fef..79b523f4bf0e0e 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java @@ -43,6 +43,10 @@ import java.util.List; import java.util.stream.Collectors; +/** + * @deprecated Use {@link MySQLTypeHandler} instead. + */ +@Deprecated public class MySQLJdbcExecutor extends BaseJdbcExecutor { private static final Logger LOG = Logger.getLogger(MySQLJdbcExecutor.class); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLTypeHandler.java new file mode 100644 index 00000000000000..45142029f9272e --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLTypeHandler.java @@ -0,0 +1,324 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import com.google.common.util.concurrent.MoreExecutors; +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; +import java.util.List; +import java.util.stream.Collectors; + +/** + * MySQL-specific type handler. + * Key specializations: + * - Streaming result set via Integer.MIN_VALUE fetchSize + * - TIME type uses getString() for >24h values + * - TINYINT/LARGEINT need type conversion + * - byte[] → hex string conversion + * - Connection abort for incomplete result sets + */ +public class MySQLTypeHandler extends DefaultTypeHandler { + + // Store the table type to differentiate MySQL vs OceanBase behavior. + // OceanBase TIME columns lose fractional second precision with rs.getString(), + // so we only use getString() for MySQL TIME (to handle >24h values). + private final String tableType; + + public MySQLTypeHandler() { + this("MYSQL"); + } + + public MySQLTypeHandler(String tableType) { + this.tableType = tableType != null ? tableType.toUpperCase() : "MYSQL"; + } + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case BOOLEAN: + return rs.getObject(columnIndex, Boolean.class); + case TINYINT: + case SMALLINT: + case LARGEINT: + return rs.getObject(columnIndex); + case INT: + return rs.getObject(columnIndex, Integer.class); + case BIGINT: + return rs.getObject(columnIndex, Long.class); + case FLOAT: + return rs.getObject(columnIndex, Float.class); + case DOUBLE: + return rs.getObject(columnIndex, Double.class); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case DATE: + case DATEV2: + return rs.getObject(columnIndex, LocalDate.class); + case DATETIME: + case DATETIMEV2: + return rs.getObject(columnIndex, LocalDateTime.class); + case CHAR: + case VARCHAR: + case ARRAY: + return rs.getObject(columnIndex, String.class); + case STRING: { + int jdbcType = metadata.getColumnType(columnIndex); + // MySQL TIME type needs getString() to handle >24h values. + // But for OceanBase, getString() loses fractional second precision, + // so we use getObject() instead (matching old MySQLJdbcExecutor behavior). + if (jdbcType == Types.TIME && "MYSQL".equals(tableType)) { + return rs.getString(columnIndex); + } + return rs.getObject(columnIndex); + } + case BYTE: + case VARBINARY: { + byte[] data = rs.getBytes(columnIndex); + return rs.wasNull() ? null : data; + } + case TIMESTAMPTZ: + return rs.getObject(columnIndex, LocalDateTime.class); + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + switch (columnType.getType()) { + case TINYINT: + return createConverter(input -> { + if (input instanceof Integer) { + return ((Integer) input).byteValue(); + } else if (input instanceof Number) { + return ((Number) input).byteValue(); + } else if (input instanceof String) { + return Byte.parseByte((String) input); + } + return input; + }, Byte.class); + case SMALLINT: + return createConverter(input -> { + if (input instanceof Integer) { + return ((Integer) input).shortValue(); + } else if (input instanceof Number) { + return ((Number) input).shortValue(); + } else if (input instanceof String) { + return Short.parseShort((String) input); + } + return input; + }, Short.class); + case LARGEINT: + return createConverter(input -> { + if (input instanceof BigInteger) { + return input; + } else if (input instanceof String) { + return new BigInteger((String) input); + } else if (input instanceof Number) { + // Use toString() to avoid signed long overflow for BIGINT UNSIGNED + return new BigDecimal(input.toString()).toBigInteger(); + } + return input; + }, BigInteger.class); + case STRING: + if ("bitmap".equals(replaceString) || "hll".equals(replaceString)) { + return null; + } + return createConverter(input -> { + if (input instanceof byte[]) { + return defaultByteArrayToHexString((byte[]) input); + } else if (input instanceof java.sql.Time) { + return timeToString((java.sql.Time) input); + } + return input.toString(); + }, String.class); + case ARRAY: + return createConverter( + input -> convertArray(input, columnType.getChildTypes().get(0)), + List.class); + default: + return null; + } + } + + @Override + public PreparedStatement initializeStatement(Connection conn, String sql, + int fetchSize) throws SQLException { + conn.setAutoCommit(false); + PreparedStatement stmt = conn.prepareStatement(sql, + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + // MySQL: Integer.MIN_VALUE signals streaming results to avoid OOM + stmt.setFetchSize(Integer.MIN_VALUE); + return stmt; + } + + @Override + public void abortReadConnection(Connection conn, ResultSet rs) throws SQLException { + if (rs != null && !rs.isAfterLast()) { + // Abort connection to avoid draining all remaining results + conn.abort(MoreExecutors.directExecutor()); + } + } + + @Override + public void setSystemProperties() { + super.setSystemProperties(); + System.setProperty("com.mysql.cj.disableAbandonedConnectionCleanup", "true"); + } + + private static final Gson gson = new Gson(); + + /** + * Parse MySQL JSON array string into a List, applying type-specific conversions. + * MySQL returns ARRAY columns as JSON strings (e.g. "[1,2,3]"); this method + * deserializes them into proper Java List objects matching the expected child type. + */ + private Object convertArray(Object input, ColumnType columnType) { + if (input == null) { + return null; + } + String jsonStr = input.toString(); + ColumnType.Type childTypeEnum = columnType.getType(); + + if (childTypeEnum == ColumnType.Type.BOOLEAN) { + List list = gson.fromJson(jsonStr, List.class); + return list.stream().map(item -> { + if (item instanceof Boolean) { + return item; + } else if (item instanceof Number) { + return ((Number) item).intValue() != 0; + } else { + throw new IllegalArgumentException("Cannot convert " + item + " to Boolean."); + } + }).collect(Collectors.toList()); + } else if (childTypeEnum == ColumnType.Type.DATE || childTypeEnum == ColumnType.Type.DATEV2) { + List list = gson.fromJson(jsonStr, List.class); + return list.stream().map(item -> { + if (item instanceof String) { + return LocalDate.parse((String) item); + } else { + throw new IllegalArgumentException("Cannot convert " + item + " to LocalDate."); + } + }).collect(Collectors.toList()); + } else if (childTypeEnum == ColumnType.Type.DATETIME || childTypeEnum == ColumnType.Type.DATETIMEV2) { + List list = gson.fromJson(jsonStr, List.class); + return list.stream().map(item -> { + if (item instanceof String) { + return LocalDateTime.parse( + (String) item, + new DateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd HH:mm:ss") + .appendFraction(ChronoField.MILLI_OF_SECOND, + columnType.getPrecision(), + columnType.getPrecision(), true) + .toFormatter()); + } else { + throw new IllegalArgumentException("Cannot convert " + item + " to LocalDateTime."); + } + }).collect(Collectors.toList()); + } else if (childTypeEnum == ColumnType.Type.LARGEINT) { + List list = gson.fromJson(jsonStr, List.class); + return list.stream().map(item -> { + if (item instanceof Number) { + return new BigDecimal(item.toString()).toBigInteger(); + } else if (item instanceof String) { + return new BigDecimal((String) item).toBigInteger(); + } else { + throw new IllegalArgumentException("Cannot convert " + item + " to BigInteger."); + } + }).collect(Collectors.toList()); + } else if (childTypeEnum == ColumnType.Type.ARRAY) { + ColumnType nestedChildType = columnType.getChildTypes().get(0); + List rawList = gson.fromJson(jsonStr, List.class); + return rawList.stream() + .map(element -> { + String elementJson = gson.toJson(element); + return convertArray(elementJson, nestedChildType); + }) + .collect(Collectors.toList()); + } else { + java.lang.reflect.Type listType = getListTypeForArray(columnType); + return gson.fromJson(jsonStr, listType); + } + } + + /** + * Map a Doris child ColumnType to the corresponding Gson TypeToken for List deserialization. + */ + private java.lang.reflect.Type getListTypeForArray(ColumnType type) { + switch (type.getType()) { + case BOOLEAN: + return new TypeToken>() { }.getType(); + case TINYINT: + return new TypeToken>() { }.getType(); + case SMALLINT: + return new TypeToken>() { }.getType(); + case INT: + return new TypeToken>() { }.getType(); + case BIGINT: + return new TypeToken>() { }.getType(); + case LARGEINT: + return new TypeToken>() { }.getType(); + case FLOAT: + return new TypeToken>() { }.getType(); + case DOUBLE: + return new TypeToken>() { }.getType(); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return new TypeToken>() { }.getType(); + case DATE: + case DATEV2: + return new TypeToken>() { }.getType(); + case DATETIME: + case DATETIMEV2: + return new TypeToken>() { }.getType(); + case CHAR: + case VARCHAR: + case STRING: + return new TypeToken>() { }.getType(); + case ARRAY: + java.lang.reflect.Type childType = getListTypeForArray(type.getChildTypes().get(0)); + TypeToken token = TypeToken.getParameterized(List.class, childType); + return token.getType(); + default: + throw new IllegalArgumentException("Unsupported array child type: " + type.getType()); + } + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java index c076fdf77e1e1d..f837da7283b9a5 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java @@ -36,6 +36,10 @@ import java.sql.Timestamp; import java.time.LocalDateTime; +/** + * @deprecated Use {@link OracleTypeHandler} instead. + */ +@Deprecated public class OracleJdbcExecutor extends BaseJdbcExecutor { private static final Logger LOG = Logger.getLogger(OracleJdbcExecutor.class); private final CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder(); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleTypeHandler.java new file mode 100644 index 00000000000000..33ff2908660b7c --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleTypeHandler.java @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import com.zaxxer.hikari.HikariDataSource; +import org.apache.log4j.Logger; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.StandardCharsets; +import java.sql.Clob; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; + +/** + * Oracle-specific type handler. + * Key specializations: + * - Validation query: "SELECT 1 FROM dual" + * - CHAR trimming + * - LARGEINT: BigDecimal → BigInteger + * - STRING: Clob and byte[] handling + * - Supports both old (ojdbc6, < 12.2.0 JDBC 4.0) and new (>= 12.2.0 JDBC 4.1) drivers. + * Old drivers don't support rs.getObject(int, Class), so we fall back to typed getters. + */ +public class OracleTypeHandler extends DefaultTypeHandler { + private static final Logger LOG = Logger.getLogger(OracleTypeHandler.class); + + // Whether the JDBC driver supports JDBC 4.1 getObject(int, Class) method. + // Determined at runtime from the driver version. Oracle ojdbc6 (< 12.2.0) does not. + private boolean jdbc41Supported = true; + + // Flag to track if we've detected the driver version yet + private volatile boolean versionDetected = false; + + /** + * Detect Oracle JDBC driver version from the connection metadata. + * ojdbc6 (version < 12.2.0) does not support JDBC 4.1 getObject(int, Class). + */ + private void detectDriverVersion(Connection conn) { + if (versionDetected) { + return; + } + try { + DatabaseMetaData meta = conn.getMetaData(); + String driverVersion = meta.getDriverVersion(); + if (driverVersion != null) { + jdbc41Supported = isVersionGreaterThanOrEqual(driverVersion, "12.2.0"); + LOG.info("Oracle JDBC driver version: " + driverVersion + + ", JDBC 4.1 supported: " + jdbc41Supported); + } + } catch (SQLException e) { + LOG.warn("Failed to detect Oracle driver version, assuming JDBC 4.1: " + e.getMessage()); + } + versionDetected = true; + } + + private static boolean isVersionGreaterThanOrEqual(String version, String target) { + try { + String[] vParts = version.split("[^0-9]+"); + String[] tParts = target.split("[^0-9]+"); + for (int i = 0; i < Math.max(vParts.length, tParts.length); i++) { + int v = i < vParts.length && !vParts[i].isEmpty() ? Integer.parseInt(vParts[i]) : 0; + int t = i < tParts.length && !tParts[i].isEmpty() ? Integer.parseInt(tParts[i]) : 0; + if (v > t) { + return true; + } + if (v < t) { + return false; + } + } + return true; // equal + } catch (NumberFormatException e) { + return true; // assume new version if parsing fails + } + } + + @Override + public PreparedStatement initializeStatement(Connection conn, String sql, + int fetchSize) throws SQLException { + // Detect driver version when creating the statement (first time we have access to connection) + detectDriverVersion(conn); + PreparedStatement stmt = conn.prepareStatement(sql, + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchSize(fetchSize); + return stmt; + } + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + if (jdbc41Supported) { + return newGetColumnValue(rs, columnIndex, type); + } else { + return oldGetColumnValue(rs, columnIndex, type); + } + } + + /** + * JDBC 4.1+ path: uses rs.getObject(int, Class) for typed retrieval. + */ + private Object newGetColumnValue(ResultSet rs, int columnIndex, ColumnType type) + throws SQLException { + switch (type.getType()) { + case BOOLEAN: + return rs.getObject(columnIndex, Boolean.class); + case TINYINT: + return rs.getObject(columnIndex, Byte.class); + case SMALLINT: + return rs.getObject(columnIndex, Short.class); + case INT: + return rs.getObject(columnIndex, Integer.class); + case BIGINT: + return rs.getObject(columnIndex, Long.class); + case LARGEINT: + return rs.getObject(columnIndex, BigDecimal.class); + case FLOAT: + return rs.getObject(columnIndex, Float.class); + case DOUBLE: + return rs.getObject(columnIndex, Double.class); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case DATE: + case DATEV2: + return rs.getObject(columnIndex, LocalDate.class); + case DATETIME: + case DATETIMEV2: + return rs.getObject(columnIndex, LocalDateTime.class); + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex); + case VARBINARY: + return rs.getBytes(columnIndex); + case TIMESTAMPTZ: { + Timestamp ts = rs.getObject(columnIndex, Timestamp.class); + return ts == null ? null : LocalDateTime.ofInstant(ts.toInstant(), java.time.ZoneOffset.UTC); + } + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + /** + * JDBC 4.0 fallback path for old Oracle drivers (ojdbc6, < 12.2.0). + * Uses typed getter methods (getByte, getShort, etc.) instead of getObject(int, Class). + */ + private Object oldGetColumnValue(ResultSet rs, int columnIndex, ColumnType type) + throws SQLException { + switch (type.getType()) { + case TINYINT: { + byte val = rs.getByte(columnIndex); + return rs.wasNull() ? null : val; + } + case SMALLINT: { + short val = rs.getShort(columnIndex); + return rs.wasNull() ? null : val; + } + case INT: { + int val = rs.getInt(columnIndex); + return rs.wasNull() ? null : val; + } + case BIGINT: { + long val = rs.getLong(columnIndex); + return rs.wasNull() ? null : val; + } + case FLOAT: { + float val = rs.getFloat(columnIndex); + return rs.wasNull() ? null : val; + } + case DOUBLE: { + double val = rs.getDouble(columnIndex); + return rs.wasNull() ? null : val; + } + case LARGEINT: + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: { + BigDecimal val = rs.getBigDecimal(columnIndex); + return rs.wasNull() ? null : val; + } + case DATE: + case DATEV2: { + java.sql.Date val = rs.getDate(columnIndex); + return val == null ? null : val.toLocalDate(); + } + case DATETIME: + case DATETIMEV2: { + Timestamp val = rs.getTimestamp(columnIndex); + return val == null ? null : val.toLocalDateTime(); + } + case CHAR: + case VARCHAR: + case STRING: { + Object val = rs.getObject(columnIndex); + return rs.wasNull() ? null : val; + } + case VARBINARY: { + byte[] val = rs.getBytes(columnIndex); + return rs.wasNull() ? null : val; + } + case TIMESTAMPTZ: { + Timestamp val = rs.getTimestamp(columnIndex); + return val == null ? null : LocalDateTime.ofInstant(val.toInstant(), java.time.ZoneOffset.UTC); + } + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + switch (columnType.getType()) { + case CHAR: + return createConverter(input -> trimSpaces(input.toString()), String.class); + case LARGEINT: + return createConverter( + input -> ((BigDecimal) input).toBigInteger(), BigInteger.class); + case STRING: + return createConverter(input -> { + if (input instanceof Clob) { + try { + return ((Clob) input).getSubString(1, (int) ((Clob) input).length()); + } catch (SQLException e) { + LOG.error("Failed to get string from clob", e); + return null; + } + } else if (input instanceof byte[]) { + return convertByteArrayToString((byte[]) input); + } + return input.toString(); + }, String.class); + default: + return null; + } + } + + /** + * Oracle RAW type returns byte[]. Try to decode as UTF-8 first; + * if it's not valid UTF-8, fall back to hex string with "0x" prefix. + * This matches the behavior of the old OracleJdbcExecutor. + */ + private static String convertByteArrayToString(byte[] bytes) { + CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder(); + try { + utf8Decoder.decode(ByteBuffer.wrap(bytes)); + return new String(bytes, StandardCharsets.UTF_8); + } catch (CharacterCodingException e) { + return defaultByteArrayToHexString(bytes); + } + } + + @Override + public void setValidationQuery(HikariDataSource ds) { + ds.setConnectionTestQuery("SELECT 1 FROM dual"); + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java index 62fd54a8f673be..eae09b1e924f32 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java @@ -35,6 +35,10 @@ import java.util.ArrayList; import java.util.List; +/** + * @deprecated Use {@link PostgreSQLTypeHandler} instead. + */ +@Deprecated public class PostgreSQLJdbcExecutor extends BaseJdbcExecutor { private static final Logger LOG = Logger.getLogger(PostgreSQLJdbcExecutor.class); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLTypeHandler.java new file mode 100644 index 00000000000000..d55896042576f7 --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLTypeHandler.java @@ -0,0 +1,233 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import java.math.BigDecimal; +import java.sql.Array; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.List; + +/** + * PostgreSQL-specific type handler. + * Key specializations: + * - TIMESTAMPTZ: OffsetDateTime → LocalDateTime conversion + * - ARRAY: PgArray handling + * - CHAR trimming + * - STRING: Time and byte[] conversion + */ +public class PostgreSQLTypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case BOOLEAN: + return rs.getObject(columnIndex, Boolean.class); + case SMALLINT: + return rs.getObject(columnIndex, Short.class); + case INT: + return rs.getObject(columnIndex, Integer.class); + case BIGINT: + return rs.getObject(columnIndex, Long.class); + case FLOAT: + return rs.getObject(columnIndex, Float.class); + case DOUBLE: + return rs.getObject(columnIndex, Double.class); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case DATE: + case DATEV2: + return rs.getObject(columnIndex, LocalDate.class); + case DATETIME: + case DATETIMEV2: + return rs.getObject(columnIndex); + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex); + case VARBINARY: + return rs.getBytes(columnIndex); + case TIMESTAMPTZ: { + OffsetDateTime odt = rs.getObject(columnIndex, OffsetDateTime.class); + return odt == null ? null : Timestamp.from(odt.toInstant()); + } + case ARRAY: { + Array array = rs.getArray(columnIndex); + return array == null ? null : convertArrayToList(array.getArray()); + } + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + switch (columnType.getType()) { + case DATE: + case DATEV2: + return createConverter(input -> { + if (input instanceof java.sql.Date) { + return ((java.sql.Date) input).toLocalDate(); + } + return input; + }, LocalDate.class); + case DATETIME: + case DATETIMEV2: + return createConverter(input -> { + if (input instanceof Timestamp) { + return ((Timestamp) input).toLocalDateTime(); + } else if (input instanceof OffsetDateTime) { + return ((OffsetDateTime) input).toLocalDateTime(); + } else if (input instanceof java.sql.Date) { + return ((java.sql.Date) input).toLocalDate().atStartOfDay(); + } + return input; + }, LocalDateTime.class); + case TIMESTAMPTZ: + return createConverter(input -> { + if (input instanceof Timestamp) { + return LocalDateTime.ofInstant( + ((Timestamp) input).toInstant(), java.time.ZoneOffset.UTC); + } + return input; + }, LocalDateTime.class); + case CHAR: + return createConverter(input -> trimSpaces(input.toString()), String.class); + case VARCHAR: + case STRING: + return createConverter(input -> { + if (input instanceof java.sql.Time) { + return timeToString((java.sql.Time) input); + } else if (input instanceof byte[]) { + return pgByteArrayToHexString((byte[]) input); + } + return input.toString(); + }, String.class); + case ARRAY: + return createConverter( + input -> convertArray((List) input, columnType.getChildTypes().get(0)), + List.class); + default: + return null; + } + } + + /** + * Convert a JDBC array object to a List. + */ + private static List convertArrayToList(Object array) { + if (array == null) { + return null; + } + List list = new ArrayList<>(); + if (array instanceof Object[]) { + for (Object element : (Object[]) array) { + list.add(element); + } + } + return list; + } + + /** + * PostgreSQL byte[] → hex string (uses \\x prefix format with lowercase). + * PostgreSQL native bytea format: \\xdeadbeef + */ + private static String pgByteArrayToHexString(byte[] bytes) { + StringBuilder hexString = new StringBuilder("\\x"); + for (byte b : bytes) { + hexString.append(String.format("%02x", b & 0xff)); + } + return hexString.toString(); + } + + /** + * Recursively convert array elements for nested ARRAY types. + * Handles DATE/DATETIME/TIMESTAMPTZ element conversion, matching + * the old PostgreSQLJdbcExecutor.convertArray() behavior. + */ + private List convertArray(List input, ColumnType childType) { + if (input == null) { + return null; + } + switch (childType.getType()) { + case DATE: + case DATEV2: { + List result = new ArrayList<>(input.size()); + for (Object element : input) { + if (element == null) { + result.add(null); + } else if (element instanceof java.sql.Date) { + result.add(((java.sql.Date) element).toLocalDate()); + } else if (element instanceof LocalDate) { + result.add((LocalDate) element); + } else { + result.add(LocalDate.parse(element.toString())); + } + } + return result; + } + case DATETIME: + case DATETIMEV2: { + List result = new ArrayList<>(input.size()); + for (Object element : input) { + if (element == null) { + result.add(null); + } else if (element instanceof Timestamp) { + result.add(((Timestamp) element).toLocalDateTime()); + } else if (element instanceof OffsetDateTime) { + result.add(((OffsetDateTime) element).toLocalDateTime()); + } else if (element instanceof java.sql.Date) { + result.add(((java.sql.Date) element).toLocalDate().atStartOfDay()); + } else if (element instanceof LocalDateTime) { + result.add((LocalDateTime) element); + } else { + result.add(LocalDateTime.parse(element.toString())); + } + } + return result; + } + case ARRAY: { + List> result = new ArrayList<>(input.size()); + for (Object element : input) { + if (element == null) { + result.add(null); + } else { + List nestedList = convertArrayToList(element); + result.add(convertArray(nestedList, childType.getChildTypes().get(0))); + } + } + return result; + } + default: + return input; + } + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java index bcb38f7de96988..7cb74e3a2d2a3f 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java @@ -32,6 +32,10 @@ import java.time.LocalDate; import java.time.LocalDateTime; +/** + * @deprecated Use {@link SQLServerTypeHandler} instead. + */ +@Deprecated public class SQLServerJdbcExecutor extends BaseJdbcExecutor { public SQLServerJdbcExecutor(byte[] thriftParams) throws Exception { super(thriftParams); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerTypeHandler.java new file mode 100644 index 00000000000000..47c9a4756c760f --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerTypeHandler.java @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import com.google.common.util.concurrent.MoreExecutors; + +import java.sql.Connection; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; + +/** + * SQLServer-specific type handler. + * Key specializations: + * - Connection abort for incomplete result sets (avoids driver drain) + * - DATE/DATETIME: explicit type conversion in output converter + * - STRING: Time and byte[] handling + */ +public class SQLServerTypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getBigDecimal(columnIndex); + case BOOLEAN: + case SMALLINT: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + case DATE: + case DATEV2: + case DATETIME: + case DATETIMEV2: + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex); + case VARBINARY: + return rs.getObject(columnIndex, byte[].class); + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + switch (columnType.getType()) { + case DATE: + case DATEV2: + return createConverter( + input -> ((Date) input).toLocalDate(), LocalDate.class); + case DATETIME: + case DATETIMEV2: + return createConverter( + input -> ((Timestamp) input).toLocalDateTime(), LocalDateTime.class); + case STRING: + return createConverter(input -> { + if (input instanceof java.sql.Time) { + return timeToString((java.sql.Time) input); + } else if (input instanceof byte[]) { + return defaultByteArrayToHexString((byte[]) input); + } + return input.toString(); + }, String.class); + default: + return null; + } + } + + @Override + public void abortReadConnection(Connection conn, ResultSet rs) throws SQLException { + if (rs != null && !rs.isAfterLast()) { + // SQLServer driver attempts to drain results on close. + // Abort connection to prevent this behavior. + conn.abort(MoreExecutors.directExecutor()); + } + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaJdbcExecutor.java index 442e4efceda503..36708d73ae2ba3 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaJdbcExecutor.java @@ -33,6 +33,10 @@ import java.time.LocalDate; import java.time.LocalDateTime; +/** + * @deprecated Use {@link SapHanaTypeHandler} instead. + */ +@Deprecated public class SapHanaJdbcExecutor extends BaseJdbcExecutor { private static final Logger LOG = Logger.getLogger(SapHanaJdbcExecutor.class); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaTypeHandler.java new file mode 100644 index 00000000000000..ea772f63ac67da --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SapHanaTypeHandler.java @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; + +import com.zaxxer.hikari.HikariDataSource; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; + +/** + * SAP HANA-specific type handler. + * Key specializations: + * - Validation query: "SELECT 1 FROM DUMMY" + * - Uses getObject(Class) for all typed columns + */ +public class SapHanaTypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case BOOLEAN: + return rs.getObject(columnIndex, Boolean.class); + case TINYINT: + return rs.getObject(columnIndex, Byte.class); + case SMALLINT: + return rs.getObject(columnIndex, Short.class); + case INT: + return rs.getObject(columnIndex, Integer.class); + case BIGINT: + return rs.getObject(columnIndex, Long.class); + case FLOAT: + return rs.getObject(columnIndex, Float.class); + case DOUBLE: + return rs.getObject(columnIndex, Double.class); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case DATE: + case DATEV2: { + Date sqlDate = rs.getDate(columnIndex); + return rs.wasNull() ? null : sqlDate.toLocalDate(); + } + case DATETIME: + case DATETIMEV2: { + Timestamp ts = rs.getTimestamp(columnIndex); + return rs.wasNull() ? null : ts.toLocalDateTime(); + } + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex); + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public void setValidationQuery(HikariDataSource ds) { + ds.setConnectionTestQuery("SELECT 1 FROM DUMMY"); + } +} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoJdbcExecutor.java index 2e59449af5eae2..7697725ede3cda 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoJdbcExecutor.java @@ -35,6 +35,10 @@ import java.util.Collections; import java.util.List; +/** + * @deprecated Use {@link TrinoTypeHandler} instead. + */ +@Deprecated public class TrinoJdbcExecutor extends BaseJdbcExecutor { public TrinoJdbcExecutor(byte[] thriftParams) throws Exception { super(thriftParams); diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoTypeHandler.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoTypeHandler.java new file mode 100644 index 00000000000000..84e3c90d04737f --- /dev/null +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/TrinoTypeHandler.java @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.jdbc; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnValueConverter; + +import java.math.BigDecimal; +import java.sql.Array; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Trino/Presto-specific type handler. + * Key specializations: + * - DATETIME: uses Timestamp.class then converts in output converter + * - ARRAY: getArray() → Object[] → List + */ +public class TrinoTypeHandler extends DefaultTypeHandler { + + @Override + public Object getColumnValue(ResultSet rs, int columnIndex, ColumnType type, + ResultSetMetaData metadata) throws SQLException { + switch (type.getType()) { + case BOOLEAN: + return rs.getObject(columnIndex, Boolean.class); + case TINYINT: + return rs.getObject(columnIndex, Byte.class); + case SMALLINT: + return rs.getObject(columnIndex, Short.class); + case INT: + return rs.getObject(columnIndex, Integer.class); + case BIGINT: + return rs.getObject(columnIndex, Long.class); + case FLOAT: + return rs.getObject(columnIndex, Float.class); + case DOUBLE: + return rs.getObject(columnIndex, Double.class); + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return rs.getObject(columnIndex, BigDecimal.class); + case DATE: + case DATEV2: + return rs.getObject(columnIndex, LocalDate.class); + case DATETIME: + case DATETIMEV2: + return rs.getObject(columnIndex, Timestamp.class); + case CHAR: + case VARCHAR: + case STRING: + return rs.getObject(columnIndex, String.class); + case ARRAY: { + Array array = rs.getArray(columnIndex); + if (array == null) { + return null; + } + Object[] dataArray = (Object[]) array.getArray(); + if (dataArray.length == 0) { + return Collections.emptyList(); + } + return Arrays.asList(dataArray); + } + case VARBINARY: + return rs.getObject(columnIndex, byte[].class); + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + @Override + public ColumnValueConverter getOutputConverter(ColumnType columnType, String replaceString) { + switch (columnType.getType()) { + case DATETIME: + case DATETIMEV2: + return createConverter( + input -> ((Timestamp) input).toLocalDateTime(), LocalDateTime.class); + case ARRAY: + return createConverter( + input -> convertArray((List) input, columnType.getChildTypes().get(0)), + List.class); + default: + return null; + } + } + + private Object convertArray(List input, ColumnType childType) { + return input; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java index 79e7fb65a0e4a2..ef43af25084abc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java @@ -454,6 +454,8 @@ private Type dorisTypeToDoris(JdbcFieldSchema fieldSchema) { return ScalarType.createHllType(); case "BITMAP": return Type.BITMAP; + case "QUANTILE_STATE": + return Type.QUANTILE_STATE; case "VARBINARY": return ScalarType.createVarbinaryType(fieldSchema.requiredColumnSize()); default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java index 7e5a636455d952..8e622d4b2d9685 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java @@ -37,28 +37,44 @@ import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.common.UserException; import org.apache.doris.datasource.ExternalFunctionRules; -import org.apache.doris.datasource.ExternalScanNode; +import org.apache.doris.datasource.FileQueryScanNode; import org.apache.doris.datasource.jdbc.JdbcExternalTable; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.ScanContext; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.StatisticalType; +import org.apache.doris.qe.SessionVariable; +import org.apache.doris.spi.Split; import org.apache.doris.thrift.TExplainLevel; -import org.apache.doris.thrift.TJdbcScanNode; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileRangeDesc; import org.apache.doris.thrift.TOdbcTableType; -import org.apache.doris.thrift.TPlanNode; -import org.apache.doris.thrift.TPlanNodeType; +import org.apache.doris.thrift.TTableFormatFileDesc; import com.google.common.base.Joiner; -import com.google.common.base.MoreObjects; import com.google.common.collect.Lists; import org.jetbrains.annotations.NotNull; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; -public class JdbcScanNode extends ExternalScanNode { +/** + * JdbcScanNode extends FileQueryScanNode to integrate JDBC scanning into + * the unified FileScanner framework (FORMAT_JNI path). + * + *

This replaces the old ExternalScanNode-based JDBC_SCAN_NODE with: + *

+ *   FE: JdbcScanNode(extends FileQueryScanNode) → TFileScanRange(FORMAT_JNI)
+ *   BE: FileScanOperatorX → FileScanner → JdbcJniReader → JniConnector → JdbcJniScanner
+ * 
+ * + *

All predicate push-down logic (createJdbcFilters, getJdbcQueryStr, etc.) + * is preserved from the original implementation. + */ +public class JdbcScanNode extends FileQueryScanNode { private final List columns = new ArrayList(); private final List filters = new ArrayList(); @@ -73,7 +89,8 @@ public class JdbcScanNode extends ExternalScanNode { private long catalogId; public JdbcScanNode(PlanNodeId id, TupleDescriptor desc, boolean isJdbcExternalTable, ScanContext scanContext) { - super(id, desc, "JdbcScanNode", StatisticalType.JDBC_SCAN_NODE, scanContext, false); + super(id, desc, "JdbcScanNode", scanContext, false, + ConnectContext.get() != null ? ConnectContext.get().getSessionVariable() : new SessionVariable()); if (isJdbcExternalTable) { JdbcExternalTable jdbcExternalTable = (JdbcExternalTable) (desc.getTable()); tbl = jdbcExternalTable.getJdbcTable(); @@ -86,7 +103,8 @@ public JdbcScanNode(PlanNodeId id, TupleDescriptor desc, boolean isJdbcExternalT public JdbcScanNode(PlanNodeId id, TupleDescriptor desc, boolean isTableValuedFunction, String query, ScanContext scanContext) { - super(id, desc, "JdbcScanNode", StatisticalType.JDBC_SCAN_NODE, scanContext, false); + super(id, desc, "JdbcScanNode", scanContext, false, + ConnectContext.get() != null ? ConnectContext.get().getSessionVariable() : new SessionVariable()); this.isTableValuedFunction = isTableValuedFunction; this.query = query; tbl = (JdbcTable) desc.getTable(); @@ -95,15 +113,113 @@ public JdbcScanNode(PlanNodeId id, TupleDescriptor desc, boolean isTableValuedFu catalogId = tbl.getCatalogId(); } + // ========= FileQueryScanNode abstract method implementations ========= - /** - * Used for Nereids. Should NOT use this function in anywhere else. - */ @Override - public void init() throws UserException { - super.init(); - numNodes = numNodes <= 0 ? 1 : numNodes; - cardinality = -1; + public TFileFormatType getFileFormatType() { + return TFileFormatType.FORMAT_JNI; + } + + @Override + public List getPathPartitionKeys() { + // JDBC has no file path partitions + return Collections.emptyList(); + } + + @Override + public TableIf getTargetTable() { + return desc.getTable(); + } + + @Override + protected Map getLocationProperties() { + // JDBC does not need storage location properties + return Collections.emptyMap(); + } + + @Override + public List getSplits(int numBackends) throws UserException { + // JDBC always produces a single split — the query cannot be partitioned + createJdbcColumns(); + createJdbcFilters(); + + String querySql = isTableValuedFunction ? query : getJdbcQueryStr(); + + JdbcSplit split = new JdbcSplit( + querySql, + tbl.getJdbcUrl(), + tbl.getJdbcUser(), + tbl.getJdbcPasswd(), + tbl.getDriverClass(), + tbl.getDriverUrl(), + tbl.getCheckSum(), + tbl.getCatalogId(), + jdbcType, + tbl.getConnectionPoolMinSize(), + tbl.getConnectionPoolMaxSize(), + tbl.getConnectionPoolMaxWaitTime(), + tbl.getConnectionPoolMaxLifeTime(), + tbl.isConnectionPoolKeepAlive() + ); + + List splits = new ArrayList<>(); + splits.add(split); + return splits; + } + + @Override + protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { + if (!(split instanceof JdbcSplit)) { + return; + } + JdbcSplit jdbcSplit = (JdbcSplit) split; + + TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); + tableFormatFileDesc.setTableFormatType("jdbc"); + + // Build JDBC params map — these are passed to JdbcJniScanner via JniConnector + Map jdbcParams = new HashMap<>(); + jdbcParams.put("jdbc_url", jdbcSplit.getJdbcUrl()); + jdbcParams.put("jdbc_user", jdbcSplit.getJdbcUser()); + jdbcParams.put("jdbc_password", jdbcSplit.getJdbcPassword()); + jdbcParams.put("jdbc_driver_class", jdbcSplit.getDriverClass()); + jdbcParams.put("jdbc_driver_url", jdbcSplit.getDriverUrl()); + jdbcParams.put("jdbc_driver_checksum", jdbcSplit.getDriverChecksum()); + jdbcParams.put("query_sql", jdbcSplit.getQuerySql()); + jdbcParams.put("catalog_id", String.valueOf(jdbcSplit.getCatalogId())); + jdbcParams.put("table_type", jdbcSplit.getTableType().name()); + jdbcParams.put("connection_pool_min_size", + String.valueOf(jdbcSplit.getConnectionPoolMinSize())); + jdbcParams.put("connection_pool_max_size", + String.valueOf(jdbcSplit.getConnectionPoolMaxSize())); + jdbcParams.put("connection_pool_max_wait_time", + String.valueOf(jdbcSplit.getConnectionPoolMaxWaitTime())); + jdbcParams.put("connection_pool_max_life_time", + String.valueOf(jdbcSplit.getConnectionPoolMaxLifeTime())); + jdbcParams.put("connection_pool_keep_alive", + jdbcSplit.isConnectionPoolKeepAlive() ? "true" : "false"); + + tableFormatFileDesc.setJdbcParams(jdbcParams); + rangeDesc.setTableFormatParams(tableFormatFileDesc); + } + + // ========= JDBC-specific query generation (preserved from original) ========= + + @Override + protected void doInitialize() throws UserException { + super.doInitialize(); + } + + @Override + protected void convertPredicate() { + // Predicate push-down is handled in getSplits() via createJdbcFilters() + // Nothing needed here since JDBC manages its own filter push-down + } + + @Override + public int getNumInstances() { + // JDBC always uses a single instance — no parallelism at the data source + return 1; } private void createJdbcFilters() { @@ -243,46 +359,12 @@ public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { @Override public void finalizeForNereids() throws UserException { - createJdbcColumns(); - createJdbcFilters(); - createScanRangeLocations(); - } - - @Override - protected void createScanRangeLocations() throws UserException { - scanRangeLocations = Lists.newArrayList(createSingleScanRangeLocations(backendPolicy)); - } - - @Override - protected void toThrift(TPlanNode msg) { - msg.node_type = TPlanNodeType.JDBC_SCAN_NODE; - msg.jdbc_scan_node = new TJdbcScanNode(); - msg.jdbc_scan_node.setTupleId(desc.getId().asInt()); - msg.jdbc_scan_node.setTableName(tableName); - if (isTableValuedFunction) { - msg.jdbc_scan_node.setQueryString(query); - } else { - msg.jdbc_scan_node.setQueryString(getJdbcQueryStr()); - } - msg.jdbc_scan_node.setTableType(jdbcType); - msg.jdbc_scan_node.setIsTvf(isTableValuedFunction); - super.toThrift(msg); - } - - @Override - protected String debugString() { - MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this); - return helper.addValue(super.debugString()).toString(); + // FileQueryScanNode.doFinalize() calls convertPredicate() + createScanRangeLocations() + // JDBC-specific predicate preparation (createJdbcColumns/Filters) happens in getSplits() + doFinalize(); } - @Override - public int getNumInstances() { - ConnectContext context = ConnectContext.get(); - if (context == null) { - return 1; - } - return context.getSessionVariable().getParallelExecInstanceNum(scanContext.getClusterName()); - } + // ========= Static helper methods for predicate push-down ========= private static boolean shouldPushDownConjunct(TOdbcTableType tableType, Expr expr) { // Prevent pushing down expressions with NullLiteral to Oracle diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcSplit.java new file mode 100644 index 00000000000000..82ea106ccacff7 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcSplit.java @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.jdbc.source; + +import org.apache.doris.common.util.LocationPath; +import org.apache.doris.datasource.FileSplit; +import org.apache.doris.thrift.TOdbcTableType; + +import lombok.Getter; + + +/** + * JdbcSplit represents a single JDBC scan "split" (which is always one split + * since JDBC queries cannot be partitioned). + * + *

It extends {@link FileSplit} so it can integrate with the + * {@link org.apache.doris.datasource.FileQueryScanNode} framework. + * The path/start/length fields are set to dummy values since JDBC + * does not read files; the real parameters are in the JDBC-specific fields. + */ +@Getter +public class JdbcSplit extends FileSplit { + + private final String querySql; + private final String jdbcUrl; + private final String jdbcUser; + private final String jdbcPassword; + private final String driverClass; + private final String driverUrl; + private final String driverChecksum; + private final long catalogId; + private final TOdbcTableType tableType; + private final int connectionPoolMinSize; + private final int connectionPoolMaxSize; + private final int connectionPoolMaxWaitTime; + private final int connectionPoolMaxLifeTime; + private final boolean connectionPoolKeepAlive; + + public JdbcSplit(String querySql, String jdbcUrl, String jdbcUser, + String jdbcPassword, String driverClass, String driverUrl, + String driverChecksum, + long catalogId, TOdbcTableType tableType, + int connectionPoolMinSize, int connectionPoolMaxSize, + int connectionPoolMaxWaitTime, int connectionPoolMaxLifeTime, + boolean connectionPoolKeepAlive) { + // Use a dummy path — JDBC does not read actual files. + // start=0, length=0, fileLength=0 + super(LocationPath.of("jdbc://virtual"), + 0, 0, 0, 0, null, null); + this.querySql = querySql; + this.jdbcUrl = jdbcUrl; + this.jdbcUser = jdbcUser; + this.jdbcPassword = jdbcPassword; + this.driverClass = driverClass; + this.driverUrl = driverUrl; + this.driverChecksum = driverChecksum; + this.catalogId = catalogId; + this.tableType = tableType; + this.connectionPoolMinSize = connectionPoolMinSize; + this.connectionPoolMaxSize = connectionPoolMaxSize; + this.connectionPoolMaxWaitTime = connectionPoolMaxWaitTime; + this.connectionPoolMaxLifeTime = connectionPoolMaxLifeTime; + this.connectionPoolKeepAlive = connectionPoolKeepAlive; + } + + @Override + public Object getInfo() { + return null; + } + + @Override + public String getPathString() { + return "jdbc://virtual"; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index cb9b4d4a7f2344..ba038e62d55359 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -852,11 +852,19 @@ public PlanFragment visitPhysicalJdbcScan(PhysicalJdbcScan jdbcScan, PlanTransla table instanceof JdbcExternalTable, context.getScanContext()); jdbcScanNode.setNereidsId(jdbcScan.getId()); context.getNereidsIdToPlanNodeIdMap().put(jdbcScan.getId(), jdbcScanNode.getId()); + + TableNameInfo tableNameInfo = new TableNameInfo(null, "", ""); + TableRefInfo ref = new TableRefInfo(tableNameInfo, null, null); + BaseTableRefInfo tableRefInfo = new BaseTableRefInfo(ref, tableNameInfo, table); + tupleDescriptor.setRef(tableRefInfo); + if (jdbcScan.getStats() != null) { + jdbcScanNode.setCardinality((long) jdbcScan.getStats().getRowCount()); + } Utils.execWithUncheckedException(jdbcScanNode::init); context.addScanNode(jdbcScanNode, jdbcScan); translateRuntimeFilter(jdbcScan, jdbcScanNode, context); DataPartition dataPartition = DataPartition.RANDOM; - PlanFragment planFragment = new PlanFragment(context.nextFragmentId(), jdbcScanNode, dataPartition); + PlanFragment planFragment = createPlanFragment(jdbcScanNode, dataPartition, jdbcScan); context.addPlanFragment(planFragment); updateLegacyPlanIdToPhysicalPlan(planFragment.getPlanRoot(), jdbcScan); return planFragment; diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 615b4584bceb4a..08106df7969996 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -414,6 +414,8 @@ struct TTableFormatFileDesc { 8: optional TLakeSoulFileDesc lakesoul_params 9: optional i64 table_level_row_count = -1 10: optional TRemoteDorisFileDesc remote_doris_params + // JDBC connection parameters (used when table_format_type == "jdbc") + 11: optional map jdbc_params } // Deprecated, hive text talbe is a special format, not a serde type diff --git a/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog_query_hll_quantile.out b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog_query_hll_quantile.out new file mode 100644 index 00000000000000..47ea1c257cf0f2 --- /dev/null +++ b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog_query_hll_quantile.out @@ -0,0 +1,27 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +internal + +-- !sql -- +internal + +-- !hll_internal -- +1 1 +2 2 +3 1 + +-- !hll_jdbc -- +1 1 +2 2 +3 1 + +-- !quantile_internal -- +1 100 +2 250 +3 400 + +-- !quantile_jdbc -- +1 NaN +2 NaN +3 NaN + diff --git a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_write_transaction.out b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_write_transaction.out new file mode 100644 index 00000000000000..675b98201ada11 --- /dev/null +++ b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_write_transaction.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !write_no_txn -- +no_txn_test 100 + +-- !write_txn_1 -- +txn_test_1 200 + +-- !write_txn_2 -- +txn_batch_1 301 +txn_batch_2 302 +txn_batch_3 303 + +-- !write_txn_3 -- +txn_src 401 +txn_src_copy 402 + diff --git a/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog_query_hll_quantile.groovy b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog_query_hll_quantile.groovy new file mode 100644 index 00000000000000..3a45067eed1b67 --- /dev/null +++ b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog_query_hll_quantile.groovy @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_doris_jdbc_catalog_query_hll_quantile", "p0,external") { + qt_sql """select current_catalog()""" + + String jdbcUrl = context.config.jdbcUrl + "&sessionVariables=return_object_data_as_binary=true" + String jdbcUser = context.config.jdbcUser + String jdbcPassword = context.config.jdbcPassword + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-j-8.4.0.jar" + // String driver_url = "mysql-connector-j-8.4.0.jar" + + String catalog_name = "doris_jdbc_catalog_query_hll_quantile"; + String internal_db_name = "regression_test_jdbc_catalog_p0_query_hll_quantile"; + String hllTable = "test_hll_table" + String quantileTable = "test_quantile_table" + + sql """create database if not exists ${internal_db_name}; """ + + qt_sql """select current_catalog()""" + sql """drop catalog if exists ${catalog_name} """ + + sql """ CREATE CATALOG `${catalog_name}` PROPERTIES ( + "user" = "${jdbcUser}", + "type" = "jdbc", + "password" = "${jdbcPassword}", + "jdbc_url" = "${jdbcUrl}", + "driver_url" = "${driver_url}", + "driver_class" = "com.mysql.cj.jdbc.Driver" + )""" + + // ========== Test HLL type via JDBC catalog ========== + sql """switch internal""" + sql """use ${internal_db_name}""" + + sql """ drop table if exists ${hllTable} """ + sql """ create table `${hllTable}` ( + datekey int, + hour int, + user_id hll HLL_UNION + ) + aggregate key (datekey, hour) + distributed by hash(datekey, hour) buckets 1 + properties( + "replication_num" = "1" + ); """ + + sql """ insert into ${hllTable} values(20200622, 1, hll_hash(10001));""" + sql """ insert into ${hllTable} values(20200622, 2, hll_hash(10002));""" + sql """ insert into ${hllTable} values(20200622, 2, hll_hash(10003));""" + sql """ insert into ${hllTable} values(20200622, 3, hll_hash(10004));""" + + sql """ set return_object_data_as_binary=true """ + order_qt_hll_internal """ select hour, HLL_UNION_AGG(user_id) as cnt + from `${hllTable}` + where datekey=20200622 + group by hour order by hour; """ + + // Query HLL via JDBC external catalog + sql """ refresh catalog ${catalog_name} """ + sql """ switch ${catalog_name} """ + sql """ use ${internal_db_name} """ + + order_qt_hll_jdbc """ select hour, HLL_UNION_AGG(user_id) as cnt + from ${catalog_name}.${internal_db_name}.${hllTable} + where datekey=20200622 + group by hour order by hour; """ + + // ========== Test QUANTILE_STATE type via JDBC catalog ========== + sql """switch internal""" + sql """use ${internal_db_name}""" + + sql """ drop table if exists ${quantileTable} """ + sql """ create table `${quantileTable}` ( + datekey int, + hour int, + pv quantile_state QUANTILE_UNION + ) + aggregate key (datekey, hour) + distributed by hash(datekey, hour) buckets 1 + properties( + "replication_num" = "1" + ); """ + + sql """ insert into ${quantileTable} values(20200622, 1, to_quantile_state(100, 2048));""" + sql """ insert into ${quantileTable} values(20200622, 2, to_quantile_state(200, 2048));""" + sql """ insert into ${quantileTable} values(20200622, 2, to_quantile_state(300, 2048));""" + sql """ insert into ${quantileTable} values(20200622, 3, to_quantile_state(400, 2048));""" + + sql """ set return_object_data_as_binary=true """ + order_qt_quantile_internal """ select hour, quantile_percent(quantile_union(pv), 0.5) as median_val + from `${quantileTable}` + where datekey=20200622 + group by hour order by hour; """ + + // Query quantile_state via JDBC external catalog + sql """ refresh catalog ${catalog_name} """ + sql """ switch ${catalog_name} """ + sql """ use ${internal_db_name} """ + + order_qt_quantile_jdbc """ select hour, quantile_percent(quantile_union(pv), 0.5) as median_val + from ${catalog_name}.${internal_db_name}.${quantileTable} + where datekey=20200622 + group by hour order by hour; """ + + // clean + sql """switch internal""" + sql """use ${internal_db_name}""" + sql """ drop table if exists ${hllTable} """ + sql """ drop table if exists ${quantileTable} """ + sql """drop catalog if exists ${catalog_name} """ +} diff --git a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog_write_transaction.groovy b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog_write_transaction.groovy new file mode 100644 index 00000000000000..2e50d3bc36ebe3 --- /dev/null +++ b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog_write_transaction.groovy @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_mysql_jdbc_catalog_write_transaction", "p0,external") { + String enabled = context.config.otherConfigs.get("enableJdbcTest") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-j-8.4.0.jar" + // String driver_url = "mysql-connector-j-8.4.0.jar" + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + return; + } + + String catalog_name = "mysql_jdbc_write_transaction_test"; + String ex_db_name = "doris_test"; + String mysql_port = context.config.otherConfigs.get("mysql_57_port"); + String test_table = "test_insert"; + + sql """drop catalog if exists ${catalog_name} """ + + sql """create catalog if not exists ${catalog_name} properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false", + "driver_url" = "${driver_url}", + "driver_class" = "com.mysql.cj.jdbc.Driver" + );""" + + sql """switch ${catalog_name}""" + sql """use ${ex_db_name}""" + + // The test_insert table already exists in the MySQL test environment. + // It has columns: id varchar, name varchar, age int. + // We use UUID to ensure our test data doesn't conflict with other tests. + + // ========== Test 1: INSERT without transaction (default behavior) ========== + // By default, enable_odbc_transcation is false, so autoCommit stays true + // and each statement auto-commits. This should work regardless of Bug 1. + String uuid1 = UUID.randomUUID().toString(); + sql """ set enable_odbc_transcation = false """ + sql """ insert into ${test_table} values ('${uuid1}', 'no_txn_test', 100) """ + + // Read back — should be visible (auto-committed) + order_qt_write_no_txn """ select name, age from ${test_table} where id = '${uuid1}' order by age """ + + // ========== Test 2: INSERT with transaction enabled ========== + // This is the critical test for Bug 1. + // enable_odbc_transcation = true => JdbcJniWriter sets autoCommit=false + // and the writer must explicitly call conn.commit() before close(). + // Without the Bug 1 fix, the JDBC driver will rollback on close() and + // the data will be silently lost. + String uuid2 = UUID.randomUUID().toString(); + sql """ set enable_odbc_transcation = true """ + sql """ insert into ${test_table} values ('${uuid2}', 'txn_test_1', 200) """ + + // Read back — data should be visible if commit() was called + order_qt_write_txn_1 """ select name, age from ${test_table} where id = '${uuid2}' order by age """ + + // ========== Test 3: Multi-row INSERT with transaction ========== + // All rows should be committed atomically + String uuid3 = UUID.randomUUID().toString(); + sql """ set enable_odbc_transcation = true """ + sql """ insert into ${test_table} values + ('${uuid3}', 'txn_batch_1', 301), + ('${uuid3}', 'txn_batch_2', 302), + ('${uuid3}', 'txn_batch_3', 303) """ + + // All three rows should be visible (not rolled back) + order_qt_write_txn_2 """ select name, age from ${test_table} where id = '${uuid3}' order by age """ + + // ========== Test 4: INSERT INTO SELECT with transaction ========== + String uuid4 = UUID.randomUUID().toString(); + sql """ set enable_odbc_transcation = true """ + sql """ insert into ${test_table} values ('${uuid4}', 'txn_src', 401) """ + sql """ insert into ${test_table} select '${uuid4}', concat(name, '_copy'), age + 1 from ${test_table} where id = '${uuid4}' """ + + // Should have 2 rows: original + copy + order_qt_write_txn_3 """ select name, age from ${test_table} where id = '${uuid4}' order by age """ + + // ========== Test 5: Verify row count with assertTrue ========== + def result = sql """ select count(*) from ${test_table} where id = '${uuid3}' """ + assertTrue(result[0][0] == 3, "Expected 3 rows for uuid3 but got ${result[0][0]} — transaction may not have been committed") + + // clean + sql """ set enable_odbc_transcation = false """ + sql """switch internal""" + sql """drop catalog if exists ${catalog_name} """ +}