diff --git a/ci/jobs/scripts/workflow_hooks/filter_job.py b/ci/jobs/scripts/workflow_hooks/filter_job.py index e8921e2b378c..97e6ea6799a8 100644 --- a/ci/jobs/scripts/workflow_hooks/filter_job.py +++ b/ci/jobs/scripts/workflow_hooks/filter_job.py @@ -54,6 +54,15 @@ def should_skip_job(job_name): if _info_cache is None: _info_cache = Info() + # There is no way to prevent GitHub Actions from running the PR workflow on + # release branches, so we skip all jobs here. The ReleaseCI workflow is used + # for testing on release branches instead. + if ( + Labels.RELEASE in _info_cache.pr_labels + or Labels.RELEASE_LTS in _info_cache.pr_labels + ): + return True, "Skipped for release PR" + changed_files = _info_cache.get_kv_data("changed_files") if not changed_files: print("WARNING: no changed files found for PR - do not filter jobs") diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 2ee6530cdd02..39fa6f300df3 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,14 +2,14 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54516) +SET(VERSION_REVISION 54521) SET(VERSION_MAJOR 25) SET(VERSION_MINOR 8) -SET(VERSION_PATCH 16) -SET(VERSION_GITHASH 7a0b36cf8934881236312e9fea094baaf5c709a4) -SET(VERSION_DESCRIBE v25.8.16.20002.altinityantalya) -SET(VERSION_STRING 25.8.16.20002.altinityantalya) +SET(VERSION_PATCH 21) +SET(VERSION_GITHASH 2e1cd6354ae8898072e5dbf97aa6e5945761e3d7) +SET(VERSION_DESCRIBE v25.8.21.20001.altinityantalya) +SET(VERSION_STRING 25.8.21.20001.altinityantalya) # end of autochange -SET(VERSION_TWEAK 20002) +SET(VERSION_TWEAK 20001) SET(VERSION_FLAVOUR altinityantalya) diff --git a/contrib/libxml2 b/contrib/libxml2 index 74f3154320df..b7fa62cbe8ef 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 74f3154320df8950eceae4951975cc9dfc3a254d +Subproject commit b7fa62cbe8ef0df5869e000d5b690bdedd07f33e diff --git a/contrib/libxml2-cmake/CMakeLists.txt b/contrib/libxml2-cmake/CMakeLists.txt index 482793ee0c35..dfb35933e96c 100644 --- a/contrib/libxml2-cmake/CMakeLists.txt +++ b/contrib/libxml2-cmake/CMakeLists.txt @@ -30,7 +30,6 @@ set(SRCS "${LIBXML2_SOURCE_DIR}/xmlschemas.c" "${LIBXML2_SOURCE_DIR}/xmlschemastypes.c" "${LIBXML2_SOURCE_DIR}/xmlregexp.c" - "${LIBXML2_SOURCE_DIR}/xmlunicode.c" "${LIBXML2_SOURCE_DIR}/relaxng.c" "${LIBXML2_SOURCE_DIR}/catalog.c" "${LIBXML2_SOURCE_DIR}/HTMLparser.c" diff --git a/contrib/libxml2-cmake/README.MD b/contrib/libxml2-cmake/README.MD index 10782eccfc53..439da6b9d6ba 100644 --- a/contrib/libxml2-cmake/README.MD +++ b/contrib/libxml2-cmake/README.MD @@ -1,2 +1,2 @@ -./configure CPPFLAGS="-DHAVE_GETENTROPY=0" \ No newline at end of file +./configure CPPFLAGS="-DHAVE_GETENTROPY=0" diff --git a/contrib/libxml2-cmake/linux_x86_64/include/config.h b/contrib/libxml2-cmake/linux_x86_64/include/config.h index c1fbd48d765b..600e7570e990 100644 --- a/contrib/libxml2-cmake/linux_x86_64/include/config.h +++ b/contrib/libxml2-cmake/linux_x86_64/include/config.h @@ -1,6 +1,10 @@ /* config.h. Generated from config.h.in by configure. */ /* config.h.in. Generated from configure.ac by autoheader. */ +/* Define to 1 if you have the declaration of `getentropy', and to 0 if you + don't. */ +#define HAVE_DECL_GETENTROPY 0 + /* Define to 1 if you have the declaration of `glob', and to 0 if you don't. */ #define HAVE_DECL_GLOB 1 @@ -27,12 +31,6 @@ /* Define if readline library is available */ /* #undef HAVE_LIBREADLINE */ -/* Define to 1 if you have the header file. */ -/* #undef HAVE_LZMA_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_POLL_H */ - /* Define to 1 if you have the header file. */ #define HAVE_PTHREAD_H 1 @@ -79,7 +77,7 @@ #define PACKAGE_NAME "libxml2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libxml2 2.14.5" +#define PACKAGE_STRING "libxml2 2.15.1" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "libxml2" @@ -88,7 +86,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "2.14.5" +#define PACKAGE_VERSION "2.15.1" /* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for @@ -96,7 +94,7 @@ #define STDC_HEADERS 1 /* Version number of package */ -#define VERSION "2.14.5" +#define VERSION "2.15.1" /* System configuration directory (/etc) */ #define XML_SYSCONFDIR "/usr/local/etc" diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h index c8303df8f838..ff8b0ed8011b 100644 --- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h +++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h @@ -1,330 +1,253 @@ -/* - * Summary: compile-time version information - * Description: compile-time version information for the XML library +/** + * @file + * + * @brief compile-time version information + * + * compile-time version information for the XML library * - * Copy: See Copyright for the status of this software. + * @copyright See Copyright for the status of this software. * - * Author: Daniel Veillard + * @author Daniel Veillard */ #ifndef __XML_VERSION_H__ #define __XML_VERSION_H__ /** - * LIBXML_DOTTED_VERSION: - * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.14.5" +#define LIBXML_DOTTED_VERSION "2.15.1" /** - * LIBXML_VERSION: - * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 21403 +#define LIBXML_VERSION 21501 /** - * LIBXML_VERSION_STRING: - * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "21403" +#define LIBXML_VERSION_STRING "21501" /** - * LIBXML_VERSION_EXTRA: - * * extra version information, used to show a git commit description */ #define LIBXML_VERSION_EXTRA "" /** - * LIBXML_TEST_VERSION: - * * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(21403); +#define LIBXML_TEST_VERSION xmlCheckVersion(21501); +#if 1 /** - * LIBXML_THREAD_ENABLED: - * * Whether the thread support is configured in */ -#if 1 #define LIBXML_THREAD_ENABLED #endif +#if 0 /** - * LIBXML_THREAD_ALLOC_ENABLED: - * * Whether the allocation hooks are per-thread */ -#if 0 #define LIBXML_THREAD_ALLOC_ENABLED #endif /** - * LIBXML_TREE_ENABLED: - * * Always enabled since 2.14.0 */ #define LIBXML_TREE_ENABLED +#if 1 /** - * LIBXML_OUTPUT_ENABLED: - * * Whether the serialization/saving support is configured in */ -#if 1 #define LIBXML_OUTPUT_ENABLED #endif +#if 1 /** - * LIBXML_PUSH_ENABLED: - * * Whether the push parsing interfaces are configured in */ -#if 1 #define LIBXML_PUSH_ENABLED #endif +#if 1 /** - * LIBXML_READER_ENABLED: - * * Whether the xmlReader parsing interface is configured in */ -#if 1 #define LIBXML_READER_ENABLED #endif +#if 1 /** - * LIBXML_PATTERN_ENABLED: - * * Whether the xmlPattern node selection interface is configured in */ -#if 1 #define LIBXML_PATTERN_ENABLED #endif +#if 1 /** - * LIBXML_WRITER_ENABLED: - * * Whether the xmlWriter saving interface is configured in */ -#if 1 #define LIBXML_WRITER_ENABLED #endif +#if 1 /** - * LIBXML_SAX1_ENABLED: - * * Whether the older SAX1 interface is configured in */ -#if 1 #define LIBXML_SAX1_ENABLED #endif +#if 0 /** - * LIBXML_HTTP_ENABLED: - * - * Whether the HTTP support is configured in + * HTTP support was removed in 2.15 */ -#if 0 -#define LIBXML_HTTP_ENABLED +#define LIBXML_HTTP_STUBS_ENABLED #endif +#if 1 /** - * LIBXML_VALID_ENABLED: - * * Whether the DTD validation support is configured in */ -#if 1 #define LIBXML_VALID_ENABLED #endif +#if 1 /** - * LIBXML_HTML_ENABLED: - * * Whether the HTML support is configured in */ -#if 1 #define LIBXML_HTML_ENABLED #endif -/** - * LIBXML_LEGACY_ENABLED: - * +/* * Removed in 2.14 */ #undef LIBXML_LEGACY_ENABLED +#if 1 /** - * LIBXML_C14N_ENABLED: - * * Whether the Canonicalization support is configured in */ -#if 1 #define LIBXML_C14N_ENABLED #endif +#if 1 /** - * LIBXML_CATALOG_ENABLED: - * * Whether the Catalog support is configured in */ -#if 1 #define LIBXML_CATALOG_ENABLED +#define LIBXML_SGML_CATALOG_ENABLED #endif +#if 1 /** - * LIBXML_XPATH_ENABLED: - * * Whether XPath is configured in */ -#if 1 #define LIBXML_XPATH_ENABLED #endif +#if 1 /** - * LIBXML_XPTR_ENABLED: - * * Whether XPointer is configured in */ -#if 1 #define LIBXML_XPTR_ENABLED #endif +#if 1 /** - * LIBXML_XINCLUDE_ENABLED: - * * Whether XInclude is configured in */ -#if 1 #define LIBXML_XINCLUDE_ENABLED #endif +#if 1 /** - * LIBXML_ICONV_ENABLED: - * * Whether iconv support is available */ -#if 1 #define LIBXML_ICONV_ENABLED #endif +#if 0 /** - * LIBXML_ICU_ENABLED: - * * Whether icu support is available */ -#if 0 #define LIBXML_ICU_ENABLED #endif +#if 1 /** - * LIBXML_ISO8859X_ENABLED: - * * Whether ISO-8859-* support is made available in case iconv is not */ -#if 1 #define LIBXML_ISO8859X_ENABLED #endif +#if 1 /** - * LIBXML_DEBUG_ENABLED: - * * Whether Debugging module is configured in */ -#if 1 #define LIBXML_DEBUG_ENABLED #endif -/** - * LIBXML_UNICODE_ENABLED: - * +/* * Removed in 2.14 */ #undef LIBXML_UNICODE_ENABLED +#if 1 /** - * LIBXML_REGEXP_ENABLED: - * * Whether the regular expressions interfaces are compiled in */ -#if 1 #define LIBXML_REGEXP_ENABLED #endif +#if 1 /** - * LIBXML_AUTOMATA_ENABLED: - * * Whether the automata interfaces are compiled in */ -#if 1 #define LIBXML_AUTOMATA_ENABLED #endif +#if 1 /** - * LIBXML_RELAXNG_ENABLED: - * * Whether the RelaxNG validation interfaces are compiled in */ -#if 1 #define LIBXML_RELAXNG_ENABLED #endif +#if 1 /** - * LIBXML_SCHEMAS_ENABLED: - * * Whether the Schemas validation interfaces are compiled in */ -#if 1 #define LIBXML_SCHEMAS_ENABLED #endif +#if 0 /** - * LIBXML_SCHEMATRON_ENABLED: - * * Whether the Schematron validation interfaces are compiled in */ -#if 1 #define LIBXML_SCHEMATRON_ENABLED #endif +#if 1 /** - * LIBXML_MODULES_ENABLED: - * * Whether the module interfaces are compiled in */ -#if 1 #define LIBXML_MODULES_ENABLED /** - * LIBXML_MODULE_EXTENSION: - * * the string suffix used by dynamic modules (usually shared libraries) */ -#define LIBXML_MODULE_EXTENSION ".so" +#define LIBXML_MODULE_EXTENSION ".so" #endif +#if 0 /** - * LIBXML_ZLIB_ENABLED: - * * Whether the Zlib support is compiled in */ -#if 0 #define LIBXML_ZLIB_ENABLED #endif -/** - * LIBXML_LZMA_ENABLED: - * - * Whether the Lzma support is compiled in - */ -#if 0 -#define LIBXML_LZMA_ENABLED -#endif - #include #endif diff --git a/contrib/mongo-c-driver b/contrib/mongo-c-driver index 4ee76b070b26..ba0d1dbf2b74 160000 --- a/contrib/mongo-c-driver +++ b/contrib/mongo-c-driver @@ -1 +1 @@ -Subproject commit 4ee76b070b260de5da1e8c8144c028dfc37efbaf +Subproject commit ba0d1dbf2b743a5a96609e7fe6b642876f0900ed diff --git a/contrib/mongo-c-driver-cmake/CMakeLists.txt b/contrib/mongo-c-driver-cmake/CMakeLists.txt index 0139a052aef0..0b608517607c 100644 --- a/contrib/mongo-c-driver-cmake/CMakeLists.txt +++ b/contrib/mongo-c-driver-cmake/CMakeLists.txt @@ -4,14 +4,14 @@ if(NOT USE_MONGODB) return() endif() -set(libbson_VERSION_MAJOR 1) -set(libbson_VERSION_MINOR 27) -set(libbson_VERSION_PATCH 0) -set(libbson_VERSION 1.27.0) -set(libmongoc_VERSION_MAJOR 1) -set(libmongoc_VERSION_MINOR 27) -set(libmongoc_VERSION_PATCH 0) -set(libmongoc_VERSION 1.27.0) +set(libbson_VERSION_MAJOR 2) +set(libbson_VERSION_MINOR 2) +set(libbson_VERSION_PATCH 2) +set(libbson_VERSION 2.2.2) +set(libmongoc_VERSION_MAJOR 2) +set(libmongoc_VERSION_MINOR 2) +set(libmongoc_VERSION_PATCH 2) +set(libmongoc_VERSION 2.2.2) set(LIBBSON_SOURCES_ROOT "${ClickHouse_SOURCE_DIR}/contrib/mongo-c-driver/src") set(LIBBSON_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/libbson/src") @@ -102,12 +102,12 @@ set(MONGOC_HAVE_SCHED_GETCPU 0) set(MONGOC_HAVE_SS_FAMILY 0) configure_file( - ${LIBBSON_SOURCE_DIR}/bson/bson-config.h.in - ${LIBBSON_BINARY_DIR}/bson/bson-config.h + ${LIBBSON_SOURCE_DIR}/bson/config.h.in + ${LIBBSON_BINARY_DIR}/bson/config.h ) configure_file( - ${LIBBSON_SOURCE_DIR}/bson/bson-version.h.in - ${LIBBSON_BINARY_DIR}/bson/bson-version.h + ${LIBBSON_SOURCE_DIR}/bson/version.h.in + ${LIBBSON_BINARY_DIR}/bson/version.h ) set(COMMON_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/common/src") @@ -136,6 +136,8 @@ set(UTF8PROC_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/utf8proc-2.8.0") set(UTF8PROC_SOURCES "${UTF8PROC_SOURCE_DIR}/utf8proc.c") set(UTHASH_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/uthash") +set(MONGOC_CXX_COMPILER_ID "${CMAKE_CXX_COMPILER_ID}") +set(MONGOC_CXX_COMPILER_VERSION "${CMAKE_CXX_COMPILER_VERSION}") configure_file( ${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-config.h.in ${LIBMONGOC_BINARY_DIR}/mongoc/mongoc-config.h @@ -144,6 +146,10 @@ configure_file( ${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-version.h.in ${LIBMONGOC_BINARY_DIR}/mongoc/mongoc-version.h ) +configure_file( + ${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-config-private.h.in + ${LIBMONGOC_BINARY_DIR}/mongoc/mongoc-config-private.h +) add_library(_libmongoc ${LIBMONGOC_SOURCES} ${COMMON_SOURCES} ${UTF8PROC_SOURCES}) add_library(ch_contrib::libmongoc ALIAS _libmongoc) target_include_directories(_libmongoc SYSTEM PUBLIC ${LIBMONGOC_SOURCE_DIR} ${LIBMONGOC_BINARY_DIR} ${LIBMONGOC_SOURCE_DIR}/mongoc ${LIBMONGOC_BINARY_DIR}/mongoc ${COMMON_SOURCE_DIR} ${UTF8PROC_SOURCE_DIR} ${UTHASH_SOURCE_DIR} ) diff --git a/contrib/mongo-cxx-driver b/contrib/mongo-cxx-driver index 3166bdb49b71..4f5273939b5c 160000 --- a/contrib/mongo-cxx-driver +++ b/contrib/mongo-cxx-driver @@ -1 +1 @@ -Subproject commit 3166bdb49b717ce1bc30f46cc2b274ab1de7005b +Subproject commit 4f5273939b5cde587b34719f7c26364e502c00f4 diff --git a/contrib/mongo-cxx-driver-cmake/CMakeLists.txt b/contrib/mongo-cxx-driver-cmake/CMakeLists.txt index 212e099d378c..8c750e1082a4 100644 --- a/contrib/mongo-cxx-driver-cmake/CMakeLists.txt +++ b/contrib/mongo-cxx-driver-cmake/CMakeLists.txt @@ -8,66 +8,115 @@ endif() set(BSONCXX_SOURCES_DIR "${ClickHouse_SOURCE_DIR}/contrib/mongo-cxx-driver/src/bsoncxx") set(BSONCXX_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/mongo-cxx-driver/src/bsoncxx") +include(GenerateExportHeader) + set(BSONCXX_SOURCES + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/private/itoa.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/private/version.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/config/config.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/config/export.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/config/version.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/detail/postlude.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/detail/prelude.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/array/element.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/array/value.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/array/view.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/builder/core.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/config.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/export.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/version.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/decimal128.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/document/element.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/document/value.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/document/view.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/exception/error_code.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/exception/exception.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/json.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/oid.cpp - ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/private/itoa.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/string/view_or_value.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/types.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/types/bson_value/value.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/types/bson_value/view.cpp ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/validate.cpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/vector.cpp ) + set(BSONCXX_POLY_USE_IMPLS ON) configure_file( - ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/config.hpp.in - ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/config.hpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/config/config.hpp.in + ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v1/config/config.hpp ) configure_file( - ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/version.hpp.in - ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/version.hpp + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v1/config/version.hpp.in + ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v1/config/version.hpp ) configure_file( - ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/private/config.hh.in - ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/private/config.hh + ${BSONCXX_SOURCES_DIR}/lib/bsoncxx/private/config/config.hh.in + ${BSONCXX_BINARY_DIR}/lib/bsoncxx/private/config/config.hh ) add_library(_bsoncxx ${BSONCXX_SOURCES}) add_library(ch_contrib::bsoncxx ALIAS _bsoncxx) -target_include_directories(_bsoncxx SYSTEM PUBLIC "${BSONCXX_SOURCES_DIR}/include/bsoncxx/v_noabi" "${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi" "${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi") -target_compile_definitions(_bsoncxx PUBLIC BSONCXX_STATIC) +target_include_directories(_bsoncxx SYSTEM PUBLIC + "${BSONCXX_SOURCES_DIR}/include" + "${BSONCXX_SOURCES_DIR}/include/bsoncxx/v_noabi" + "${BSONCXX_SOURCES_DIR}/lib" + "${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi" + + "${BSONCXX_BINARY_DIR}/lib" + "${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi" +) target_link_libraries(_bsoncxx ch_contrib::libbson) -include(GenerateExportHeader) +# Taken from mongo-cxx-driver/src/bsoncxx/CMakeLists.txt +set(bsoncxx_export_header_custom_content "") +string(APPEND bsoncxx_export_header_custom_content [[ + +#undef BSONCXX_DEPRECATED_EXPORT +#undef BSONCXX_DEPRECATED_NO_EXPORT + +#if defined(_MSC_VER) +#define BSONCXX_ABI_CDECL __cdecl +#else +#define BSONCXX_ABI_CDECL +#endif + +#define BSONCXX_ABI_EXPORT_CDECL(...) BSONCXX_ABI_EXPORT __VA_ARGS__ BSONCXX_ABI_CDECL + +]] +) generate_export_header(_bsoncxx - BASE_NAME BSONCXX - EXPORT_MACRO_NAME BSONCXX_API - NO_EXPORT_MACRO_NAME BSONCXX_PRIVATE - EXPORT_FILE_NAME ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/export.hpp + BASE_NAME BSONCXX_ABI + EXPORT_MACRO_NAME BSONCXX_ABI_EXPORT + DEPRECATED_MACRO_NAME BSONCXX_DEPRECATED + EXPORT_FILE_NAME ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/v1/config/export.hpp STATIC_DEFINE BSONCXX_STATIC + CUSTOM_CONTENT_FROM_VARIABLE bsoncxx_export_header_custom_content ) - set(MONGOCXX_SOURCES_DIR "${ClickHouse_SOURCE_DIR}/contrib/mongo-cxx-driver/src/mongocxx") set(MONGOCXX_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/mongo-cxx-driver/src/mongocxx") set(MONGOCXX_SOURCES + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/private/bson.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/private/conversions.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/private/mongoc.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/private/numeric_casting.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/config/config.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/config/export.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/config/version.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/detail/postlude.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/detail/prelude.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/bulk_write.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/change_stream.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/client.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/client_encryption.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/client_session.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/collection.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/config.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/export.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/version.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/cursor.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/database.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/command_failed_event.cpp @@ -84,9 +133,16 @@ set(MONGOCXX_SOURCES ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_closed_event.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_description.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_opening_event.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/authentication_exception.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/bulk_write_exception.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/error_code.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/exception.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/gridfs_exception.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/logic_error.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/operation_exception.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/query_exception.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/server_error_code.cpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/write_exception.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/gridfs/bucket.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/gridfs/downloader.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/gridfs/uploader.cpp @@ -111,7 +167,6 @@ set(MONGOCXX_SOURCES ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/client_encryption.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/client_session.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/count.cpp - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/create_collection.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/data_key.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/delete.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/distinct.cpp @@ -136,10 +191,6 @@ set(MONGOCXX_SOURCES ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/update.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/pipeline.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/pool.cpp - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/conversions.cpp - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/libbson.cpp - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/libmongoc.cpp - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/numeric_casting.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/read_concern.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/read_preference.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/bulk_write.cpp @@ -156,37 +207,65 @@ set(MONGOCXX_SOURCES ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/validation_criteria.cpp ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/write_concern.cpp ) + set(MONGOCXX_COMPILER_VERSION "${CMAKE_CXX_COMPILER_VERSION}") set(MONGOCXX_COMPILER_ID "${CMAKE_CXX_COMPILER_ID}") -set(MONGOCXX_LINK_WITH_STATIC_MONGOC 1) -set(MONGOCXX_BUILD_STATIC 1) + if(ENABLE_SSL) set(MONGOCXX_ENABLE_SSL 1) endif() +set(BSONCXX_STATIC 1) +set(MONGOCXX_STATIC 1) + configure_file( - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/config.hpp.in - ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/config.hpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/config/config.hpp.in + ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v1/config/config.hpp ) configure_file( - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/version.hpp.in - ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/version.hpp + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v1/config/version.hpp.in + ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v1/config/version.hpp ) configure_file( - ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/private/config.hh.in - ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/private/config.hh + ${MONGOCXX_SOURCES_DIR}/lib/mongocxx/private/config/config.hh.in + ${MONGOCXX_BINARY_DIR}/lib/mongocxx/private/config/config.hh ) add_library(_mongocxx ${MONGOCXX_SOURCES}) add_library(ch_contrib::mongocxx ALIAS _mongocxx) -target_include_directories(_mongocxx SYSTEM PUBLIC "${MONGOCXX_SOURCES_DIR}/include/mongocxx/v_noabi" "${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi" "${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi") -target_compile_definitions(_mongocxx PUBLIC MONGOCXX_STATIC) +target_include_directories(_mongocxx SYSTEM PUBLIC + "${MONGOCXX_SOURCES_DIR}/include" + "${MONGOCXX_SOURCES_DIR}/include/mongocxx/v_noabi" + "${MONGOCXX_SOURCES_DIR}/lib" + "${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi" + + "${MONGOCXX_BINARY_DIR}/lib" + "${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi" +) target_link_libraries(_mongocxx ch_contrib::bsoncxx ch_contrib::libmongoc) +# Taken from mongo-cxx-driver/src/mongocxx/CMakeLists.txt +set(mongocxx_export_header_custom_content "") +string(APPEND mongocxx_export_header_custom_content [[ + +#undef MONGOCXX_DEPRECATED_EXPORT +#undef MONGOCXX_DEPRECATED_NO_EXPORT + +#if defined(_MSC_VER) +#define MONGOCXX_ABI_CDECL __cdecl +#else +#define MONGOCXX_ABI_CDECL +#endif + +#define MONGOCXX_ABI_EXPORT_CDECL(...) MONGOCXX_ABI_EXPORT __VA_ARGS__ MONGOCXX_ABI_CDECL + +]] +) generate_export_header(_mongocxx - BASE_NAME MONGOCXX - EXPORT_MACRO_NAME MONGOCXX_API - NO_EXPORT_MACRO_NAME MONGOCXX_PRIVATE - EXPORT_FILE_NAME ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/export.hpp + BASE_NAME MONGOCXX_ABI + EXPORT_MACRO_NAME MONGOCXX_ABI_EXPORT + DEPRECATED_MACRO_NAME MONGOCXX_DEPRECATED + EXPORT_FILE_NAME ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/v1/config/export.hpp STATIC_DEFINE MONGOCXX_STATIC + CUSTOM_CONTENT_FROM_VARIABLE mongocxx_export_header_custom_content ) diff --git a/contrib/postgres b/contrib/postgres index 5ad0c31d0c3a..c37596dd61c5 160000 --- a/contrib/postgres +++ b/contrib/postgres @@ -1 +1 @@ -Subproject commit 5ad0c31d0c3a76ed64655f4d397934b5ecc9696f +Subproject commit c37596dd61c5f2b8b7521fdbcdabc651bd9412c4 diff --git a/contrib/postgres-cmake/pg_config.h b/contrib/postgres-cmake/pg_config.h index 169b0af039ea..12767588b94d 100644 --- a/contrib/postgres-cmake/pg_config.h +++ b/contrib/postgres-cmake/pg_config.h @@ -593,7 +593,7 @@ #define PACKAGE_NAME "PostgreSQL" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PostgreSQL 18.0" +#define PACKAGE_STRING "PostgreSQL 18.3" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "postgresql" @@ -602,7 +602,7 @@ #define PACKAGE_URL "https://www.postgresql.org/" /* Define to the version of this package. */ -#define PACKAGE_VERSION "18.0" +#define PACKAGE_VERSION "18.3" /* Define to the name of a signed 128-bit integer type. */ #define PG_INT128_TYPE __int128 @@ -618,19 +618,19 @@ #define PG_MAJORVERSION_NUM 18 /* PostgreSQL minor version number */ -#define PG_MINORVERSION_NUM 0 +#define PG_MINORVERSION_NUM 3 /* Define to best printf format archetype, usually gnu_printf if available. */ #define PG_PRINTF_ATTRIBUTE gnu_printf /* PostgreSQL version as a string */ -#define PG_VERSION "18.0" +#define PG_VERSION "18.3" /* PostgreSQL version as a number */ -#define PG_VERSION_NUM 180000 +#define PG_VERSION_NUM 180003 /* A string containing the version number, platform, and C compiler */ -#define PG_VERSION_STR "PostgreSQL 18.0 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 15.2.1 20250813, 64-bit" +#define PG_VERSION_STR "PostgreSQL 18.3 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 15.2.1 20250813, 64-bit" /* Define to 1 to allow profiling output to be saved separately for each process. */ diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 93350ee596b2..1c3a58c94c89 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1200,8 +1200,8 @@ try server_settings[ServerSetting::max_thread_pool_size], server_settings[ServerSetting::max_thread_pool_free_size], server_settings[ServerSetting::thread_pool_queue_size], - has_trace_collector ? server_settings[ServerSetting::global_profiler_real_time_period_ns] : 0, - has_trace_collector ? server_settings[ServerSetting::global_profiler_cpu_time_period_ns] : 0); + has_trace_collector ? server_settings[ServerSetting::global_profiler_real_time_period_ns].value : 0, + has_trace_collector ? server_settings[ServerSetting::global_profiler_cpu_time_period_ns].value : 0); if (has_trace_collector) { diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 979434eaa0d0..50901ce22154 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -562,12 +562,50 @@ struct AccessRights::Node friend bool operator!=(const Node & left, const Node & right) { return !(left == right); } + /// Checks whether `this` node's access rights are a superset of `other`'s bool contains(const Node & other) const { - Node tmp_node = *this; - tmp_node.makeIntersection(other); - /// If we get the same node after the intersection, our node is fully covered by the given one. - return tmp_node == other; + /// The check traverses children from both sides: + /// 1) For each child in `other`, find the matching node in `this` and verify containment. + /// 2) For each child in `this`, find the matching node in `other` and verify containment. + /// + /// The reverse traversal (step 2) is needed to handle partial revokes correctly. + /// Example: GRANT SELECT ON *.*, REVOKE SELECT ON foo.* + /// + /// this: other: + /// root (SELECT) root (SELECT) + /// | + /// "foo" (SELECT) + /// | + /// "" (leaf, USAGE) + /// + /// Step 1 alone would pass because `other` has no children to check against, but `this` does not contain `other` because + /// `this` has revoked SELECT on "foo". + + if (!flags.contains(other.flags)) + return false; + + if (other.children) + { + for (const auto & other_child : *other.children) + { + Node this_child = tryGetLeaf(other_child.node_name, other_child.level, !other_child.isLeaf()); + if (!this_child.contains(other_child)) + return false; + } + } + + if (children) + { + for (const auto & this_child : *children) + { + Node other_child = other.tryGetLeaf(this_child.node_name, this_child.level, !this_child.isLeaf()); + if (!this_child.contains(other_child)) + return false; + } + } + + return true; } void makeUnion(const Node & other) diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index b7d1c102085f..7e6f3362a902 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -924,6 +924,59 @@ TEST(AccessRights, ContainsWithWildcardsAndPartialRevokes) lhs.grantWildcard(AccessType::SELECT, "testing"); rhs.grantWildcard(AccessType::SELECT, "test"); ASSERT_FALSE(lhs.contains(rhs)); + + lhs = {}; + rhs = {}; + lhs.grantWithGrantOption(AccessType::SET_DEFINER); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + rhs.grantWithGrantOption(AccessType::SET_DEFINER); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-2"); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-3"); + ASSERT_TRUE(lhs.contains(rhs)); + + lhs = {}; + rhs = {}; + rhs.grantWithGrantOption(AccessType::SET_DEFINER); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + lhs.grantWithGrantOption(AccessType::SET_DEFINER); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-2"); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-3"); + ASSERT_FALSE(lhs.contains(rhs)); + + lhs = {}; + rhs = {}; + lhs.grantWithGrantOption(AccessType::SET_DEFINER); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-2"); + rhs.grantWithGrantOption(AccessType::SET_DEFINER); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-2"); + ASSERT_TRUE(lhs.contains(rhs)); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::CREATE_ROLE); + lhs.grant(AccessType::ROLE_ADMIN); + lhs.grantWithGrantOption(AccessType::SET_DEFINER); + lhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + rhs.grantWithGrantOption(AccessType::SET_DEFINER); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-1"); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-2"); + rhs.revoke(AccessType::SET_DEFINER, "internal-user-3"); + ASSERT_TRUE(lhs.contains(rhs)); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + lhs.revoke(AccessType::SELECT, "secret_db1"); + rhs.grant(AccessType::SELECT); + rhs.revoke(AccessType::SELECT, "secret_db1"); + rhs.revoke(AccessType::SELECT, "secret_db2"); + rhs.revoke(AccessType::SELECT, "secret_db3"); + ASSERT_TRUE(lhs.contains(rhs)); + ASSERT_FALSE(rhs.contains(lhs)); } TEST(AccessRights, ColumnLevelWildcardOperations) diff --git a/src/AggregateFunctions/AggregateFunctionGroupNumericIndexedVectorDataBSI.h b/src/AggregateFunctions/AggregateFunctionGroupNumericIndexedVectorDataBSI.h index a65a1fb1ca12..ffcf8816b915 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupNumericIndexedVectorDataBSI.h +++ b/src/AggregateFunctions/AggregateFunctionGroupNumericIndexedVectorDataBSI.h @@ -462,6 +462,19 @@ class BSINumericIndexedVector */ void pointwiseAddInplace(const BSINumericIndexedVector & rhs) { + /// Self-addition requires a deep copy because the full adder logic below + /// performs in-place XOR on shared bitmaps (`sum->rb_xor(*addend)` where + /// `sum` and `addend` alias the same Roaring bitmap via `shallowCopyFrom`), + /// which triggers an assertion in CRoaring (`assert(x1 != x2)`) and would + /// produce incorrect results (A XOR A = 0) in release builds. + if (this == &rhs) + { + BSINumericIndexedVector copy; + copy.deepCopyFrom(rhs); + pointwiseAddInplace(copy); + return; + } + if (isEmpty()) { deepCopyFrom(rhs); @@ -538,6 +551,16 @@ class BSINumericIndexedVector */ void pointwiseSubtractInplace(const BSINumericIndexedVector & rhs) { + /// Self-subtraction requires a deep copy for the same reason as + /// `pointwiseAddInplace`: in-place XOR on aliased bitmaps is undefined. + if (this == &rhs) + { + BSINumericIndexedVector copy; + copy.deepCopyFrom(rhs); + pointwiseSubtractInplace(copy); + return; + } + auto total_indexes = getAllIndex(); total_indexes->rb_or(*rhs.getAllIndex()); diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index 0776099685b9..b246a7861553 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -152,8 +152,8 @@ class CrossToInnerJoinVisitor : public InDepthQueryTreeVisitorWithContext -#include -#include #include #include #include #include #include #include +#include #include #include #include @@ -35,28 +34,13 @@ class RegexpFunctionRewriteVisitor : public InDepthQueryTreeVisitorWithContextas(); - if (!function_node || !function_node->isOrdinaryFunction() || !isString(function_node->getResultType())) + if (!function_node || !function_node->isOrdinaryFunction() || !isString(removeNullable(function_node->getResultType()))) return; /// If a regular expression without alternatives starts with ^ or ends with an unescaped $, rewrite /// replaceRegexpAll with replaceRegexpOne. if (function_node->getFunctionName() == "replaceRegexpAll" || Poco::toLower(function_node->getFunctionName()) == "regexp_replace") - { - if (!handleReplaceRegexpAll(*function_node)) - return; - - /// After optimization, function_node might now be "replaceRegexpOne", so continue processing - } - - /// If a replaceRegexpOne function has a regexp that matches entire haystack, and a replacement of nothing other - /// than \1 and some subpatterns in the regexp, or \0 and no subpatterns in the regexp, rewrite it with extract. - if (function_node->getFunctionName() == "replaceRegexpOne") - { - if (!handleReplaceRegexpOne(*function_node)) - return; - - /// After optimization, function_node might now be "extract", so continue processing - } + handleReplaceRegexpAll(*function_node); /// If an extract function has a regexp with some subpatterns and the regexp starts with ^.* or ending with an /// unescaped .*$, remove this prefix and/or suffix. @@ -114,62 +98,6 @@ class RegexpFunctionRewriteVisitor : public InDepthQueryTreeVisitorWithContextas(); - if (!constant_node) - return false; - - if (auto constant_type = constant_node->getResultType(); !isString(constant_type)) - return false; - - String replacement = constant_node->getValue().safeGet(); - bool replacement_zero = replacement == "\\0"; - bool replacement_one = replacement == "\\1"; - if (!replacement_zero && !replacement_one) - return false; - - const auto * regexp_node = function_node_arguments_nodes[1]->as(); - if (!regexp_node) - return false; - - if (auto regexp_type = regexp_node->getResultType(); !isString(regexp_type)) - return false; - - String regexp = regexp_node->getValue().safeGet(); - - /// Currently only look for ^...$ patterns without alternatives. - bool starts_with_caret = regexp.front() == '^'; - if (!starts_with_caret) - return false; - - bool ends_with_unescaped_dollar = false; - if (!regexp.empty() && regexp.back() == '$') - ends_with_unescaped_dollar = isUnescaped(regexp, regexp.size() - 1); - - if (!ends_with_unescaped_dollar) - return false; - - /// Analyze the regular expression to detect presence of alternatives (e.g., 'a|b'). If any alternatives are - /// found, return false to indicate the regexp is not suitable for optimization. - RegexpAnalysisResult result = OptimizedRegularExpression::analyze(regexp); - if (!result.alternatives.empty()) - return false; - - if ((replacement_one && result.has_capture) || (replacement_zero && !result.has_capture)) - { - function_node_arguments_nodes.resize(2); - resolveOrdinaryFunctionNodeByName(function_node, "extract", getContext()); - return true; - } - - return false; - } - void handleExtract(FunctionNode & function_node) { auto & function_node_arguments_nodes = function_node.getArguments().getNodes(); diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 216d5a90bf24..745f9c974ab1 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -997,8 +997,17 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I { auto & resolved_column = resolved_identifier_candidate->as(); auto using_column_node_it = using_column_name_to_column_node.find(resolved_column.getColumnName()); + if (using_column_node_it == using_column_name_to_column_node.end()) + return; + + const auto & using_column_list = using_column_node_it->second->as().getExpressionOrThrow()->as(); + auto matches_using_column = [&](const auto & node) { return node->isEqual(*resolved_identifier_candidate); }; + if (std::ranges::none_of(using_column_list.getNodes(), matches_using_column)) + return; + if (using_column_node_it != using_column_name_to_column_node.end() && !using_column_node_it->second->getColumnType()->equals(*resolved_column.getColumnType())) + { // std::cerr << "... fixing type for " << resolved_column.dumpTree() << std::endl; auto resolved_column_clone = std::static_pointer_cast(resolved_column.clone()); diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 26d34be495f3..00d004e33426 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -5385,56 +5385,88 @@ void QueryAnalyzer::resolveCrossJoin(QueryTreeNodePtr & cross_join_node, Identif } } -static NameSet getColumnsFromTableExpression(const QueryTreeNodePtr & table_expression) +static NameSet getColumnsFromTableExpression(const QueryTreeNodePtr & root_table_expression) { NameSet existing_columns; - switch (table_expression->getNodeType()) + std::stack nodes_to_process; + nodes_to_process.push(root_table_expression.get()); + + while (!nodes_to_process.empty()) { - case QueryTreeNodeType::TABLE: { - const auto * table_node = table_expression->as(); - chassert(table_node); + const auto * table_expression = nodes_to_process.top(); + nodes_to_process.pop(); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns(); - for (const auto & column : table_node->getStorageSnapshot()->getColumns(get_column_options)) - existing_columns.insert(column.name); + switch (table_expression->getNodeType()) + { + case QueryTreeNodeType::TABLE: + { + const auto * table_node = table_expression->as(); + chassert(table_node); - return existing_columns; - } - case QueryTreeNodeType::TABLE_FUNCTION: { - const auto * table_function_node = table_expression->as(); - chassert(table_function_node); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns(); + for (const auto & column : table_node->getStorageSnapshot()->getColumns(get_column_options)) + existing_columns.insert(column.name); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns(); - for (const auto & column : table_function_node->getStorageSnapshot()->getColumns(get_column_options)) - existing_columns.insert(column.name); + break; + } + case QueryTreeNodeType::TABLE_FUNCTION: + { + const auto * table_function_node = table_expression->as(); + chassert(table_function_node); - return existing_columns; - } - case QueryTreeNodeType::QUERY: { - const auto * query_node = table_expression->as(); - chassert(query_node); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns(); + for (const auto & column : table_function_node->getStorageSnapshot()->getColumns(get_column_options)) + existing_columns.insert(column.name); - for (const auto & column : query_node->getProjectionColumns()) - existing_columns.insert(column.name); + break; + } + case QueryTreeNodeType::QUERY: + { + const auto * query_node = table_expression->as(); + chassert(query_node); - return existing_columns; - } - case QueryTreeNodeType::UNION: { - const auto * union_node = table_expression->as(); - chassert(union_node); + for (const auto & column : query_node->getProjectionColumns()) + existing_columns.insert(column.name); - for (const auto & column : union_node->computeProjectionColumns()) - existing_columns.insert(column.name); + break; + } + case QueryTreeNodeType::UNION: + { + const auto * union_node = table_expression->as(); + chassert(union_node); - return existing_columns; + for (const auto & column : union_node->computeProjectionColumns()) + existing_columns.insert(column.name); + break; + } + case QueryTreeNodeType::JOIN: + { + const auto * join_node = table_expression->as(); + chassert(join_node); + + nodes_to_process.push(join_node->getLeftTableExpression().get()); + nodes_to_process.push(join_node->getRightTableExpression().get()); + break; + } + case QueryTreeNodeType::CROSS_JOIN: + { + const auto * cross_join_node = table_expression->as(); + chassert(cross_join_node); + for (const auto & table_expr : cross_join_node->getTableExpressions()) + nodes_to_process.push(table_expr.get()); + break; + } + default: + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected TableNode, TableFunctionNode, QueryNode or UnionNode, got {}: {}", + table_expression->getNodeTypeName(), + table_expression->formatASTForErrorMessage()); + } } - default: - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Expected TableNode, TableFunctionNode, QueryNode or UnionNode, got {}: {}", - table_expression->getNodeTypeName(), - table_expression->formatASTForErrorMessage()); } + return existing_columns; } /// Resolve join node in scope @@ -5528,8 +5560,14 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS while (existing_columns.contains(column_name_type.name)) column_name_type.name = "_" + column_name_type.name; + auto [expression_source, is_single_source] = getExpressionSource(resolved_nodes.front()); + /// Do not support `SELECT t1.a + t2.a AS id ... USING id` + if (!is_single_source) + return nullptr; + /// Create ColumnNode with expression from parent projection - return std::make_shared(std::move(column_name_type), resolved_nodes.front(), left_table_expression); + return std::make_shared(std::move(column_name_type), resolved_nodes.front(), + expression_source ? expression_source : left_table_expression); } } } diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index a873091f7960..7c2142c90f4c 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -104,7 +104,7 @@ bool isStorageUsedInTree(const StoragePtr & storage, const IQueryTreeNode * root if (table_node || table_function_node) { const auto & table_storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - if (table_storage->getStorageID() == storage->getStorageID()) + if (table_storage && table_storage->getStorageID() == storage->getStorageID()) return true; } @@ -975,12 +975,7 @@ void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const Stri function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } -/** Returns: - * {_, false} - multiple sources - * {nullptr, true} - no sources (for constants) - * {source, true} - single source - */ -std::pair getExpressionSourceImpl(const QueryTreeNodePtr & node) +std::pair getExpressionSource(const QueryTreeNodePtr & node) { if (const auto * column = node->as()) { @@ -996,7 +991,7 @@ std::pair getExpressionSourceImpl(const QueryTreeNodePtr const auto & args = func->getArguments().getNodes(); for (const auto & arg : args) { - auto [arg_source, is_ok] = getExpressionSourceImpl(arg); + auto [arg_source, is_ok] = getExpressionSource(arg); if (!is_ok) return {nullptr, false}; @@ -1015,14 +1010,6 @@ std::pair getExpressionSourceImpl(const QueryTreeNodePtr return {nullptr, false}; } -QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node) -{ - auto [source, is_ok] = getExpressionSourceImpl(node); - if (!is_ok) - return nullptr; - return source; -} - /** There are no limits on the maximum size of the result for the subquery. * Since the result of the query is not the result of the entire query. */ diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 215bc816cccc..9a19af2b4e0d 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -157,8 +157,10 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Arguments and parameters are taken from the node. void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); -/// Checks that node has only one source and returns it -QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); +/// Returns single source of expression node. +/// First element of pair is source node, can be nullptr if there are no sources or multiple sources. +/// Second element of pair is true if there is at most one source, false if there are multiple sources. +std::pair getExpressionSource(const QueryTreeNodePtr & node); /// Update mutable context for subquery execution void updateContextForSubqueryExecution(ContextMutablePtr & mutable_context); diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index ba0e6718d6f1..48b1fcd96f50 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -833,6 +833,11 @@ void RestorerFromBackup::createDatabase(const String & database_name) const auto create_query_context = Context::createCopy(query_context); create_query_context->setSetting("allow_deprecated_database_ordinary", 1); + /// We shouldn't use the progress callback copied from the `query_context` because it was set in a protocol handler (e.g. HTTPHandler) + /// for the "RESTORE ASYNC" query which could have already finished (the restore process is working in the background). + /// TODO: Get rid of using `query_context` in class RestorerFromBackup. + create_query_context->setProgressCallback(nullptr); + #if CLICKHOUSE_CLOUD if (shared_catalog && SharedDatabaseCatalog::instance().shouldRestoreDatabase(create_database_query)) { @@ -1075,6 +1080,11 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name) create_query_context->setUnderRestore(true); + /// We shouldn't use the progress callback copied from the `query_context` because it was set in a protocol handler (e.g. HTTPHandler) + /// for the "RESTORE ASYNC" query which could have already finished (the restore process is working in the background). + /// TODO: Get rid of using `query_context` in class RestorerFromBackup. + create_query_context->setProgressCallback(nullptr); + /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some /// database-specific things). database->createTableRestoredFromBackup( diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 4e1bbe3232f1..710be4059b83 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -212,7 +212,7 @@ class ColumnDynamic final : public COWHelper, Colum ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { - return create(variant_column_ptr->filter(filt, result_size_hint), variant_info, max_dynamic_types, global_max_dynamic_types); + return create(variant_column_ptr->filter(filt, result_size_hint), variant_info, max_dynamic_types, global_max_dynamic_types, statistics); } void expand(const Filter & mask, bool inverted) override @@ -222,17 +222,17 @@ class ColumnDynamic final : public COWHelper, Colum ColumnPtr permute(const Permutation & perm, size_t limit) const override { - return create(variant_column_ptr->permute(perm, limit), variant_info, max_dynamic_types, global_max_dynamic_types); + return create(variant_column_ptr->permute(perm, limit), variant_info, max_dynamic_types, global_max_dynamic_types, statistics); } ColumnPtr index(const IColumn & indexes, size_t limit) const override { - return create(variant_column_ptr->index(indexes, limit), variant_info, max_dynamic_types, global_max_dynamic_types); + return create(variant_column_ptr->index(indexes, limit), variant_info, max_dynamic_types, global_max_dynamic_types, statistics); } ColumnPtr replicate(const Offsets & replicate_offsets) const override { - return create(variant_column_ptr->replicate(replicate_offsets), variant_info, max_dynamic_types, global_max_dynamic_types); + return create(variant_column_ptr->replicate(replicate_offsets), variant_info, max_dynamic_types, global_max_dynamic_types, statistics); } MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override @@ -241,7 +241,7 @@ class ColumnDynamic final : public COWHelper, Colum MutableColumns scattered_columns; scattered_columns.reserve(num_columns); for (auto & scattered_variant_column : scattered_variant_columns) - scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types, global_max_dynamic_types)); + scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types, global_max_dynamic_types, statistics)); return scattered_columns; } diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 7ca7701a1e83..d5d6d8876817 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1149,7 +1149,7 @@ ColumnPtr ColumnObject::filter(const Filter & filt, ssize_t result_size_hint) co filtered_dynamic_paths[path] = column->filter(filt, result_size_hint); auto filtered_shared_data = shared_data->filter(filt, result_size_hint); - return ColumnObject::create(filtered_typed_paths, filtered_dynamic_paths, filtered_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); + return ColumnObject::create(filtered_typed_paths, filtered_dynamic_paths, filtered_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types, statistics); } void ColumnObject::expand(const Filter & mask, bool inverted) @@ -1174,7 +1174,7 @@ ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const permuted_dynamic_paths[path] = column->permute(perm, limit); auto permuted_shared_data = shared_data->permute(perm, limit); - return ColumnObject::create(permuted_typed_paths, permuted_dynamic_paths, permuted_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); + return ColumnObject::create(permuted_typed_paths, permuted_dynamic_paths, permuted_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types, statistics); } ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const @@ -1190,7 +1190,7 @@ ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const indexed_dynamic_paths[path] = column->index(indexes, limit); auto indexed_shared_data = shared_data->index(indexes, limit); - return ColumnObject::create(indexed_typed_paths, indexed_dynamic_paths, indexed_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); + return ColumnObject::create(indexed_typed_paths, indexed_dynamic_paths, indexed_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types, statistics); } ColumnPtr ColumnObject::replicate(const Offsets & replicate_offsets) const @@ -1206,7 +1206,7 @@ ColumnPtr ColumnObject::replicate(const Offsets & replicate_offsets) const replicated_dynamic_paths[path] = column->replicate(replicate_offsets); auto replicated_shared_data = shared_data->replicate(replicate_offsets); - return ColumnObject::create(replicated_typed_paths, replicated_dynamic_paths, replicated_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); + return ColumnObject::create(replicated_typed_paths, replicated_dynamic_paths, replicated_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types, statistics); } MutableColumns ColumnObject::scatter(ColumnIndex num_columns, const Selector & selector) const @@ -1237,7 +1237,7 @@ MutableColumns ColumnObject::scatter(ColumnIndex num_columns, const Selector & s MutableColumns result_columns; result_columns.reserve(num_columns); for (size_t i = 0; i != num_columns; ++i) - result_columns.emplace_back(ColumnObject::create(std::move(scattered_typed_paths[i]), std::move(scattered_dynamic_paths[i]), std::move(scattered_shared_data_columns[i]), max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types)); + result_columns.emplace_back(ColumnObject::create(std::move(scattered_typed_paths[i]), std::move(scattered_dynamic_paths[i]), std::move(scattered_shared_data_columns[i]), max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types, statistics)); return result_columns; } diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index fc3f00915eb8..2d8ebcfc5c40 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -203,6 +203,8 @@ ColumnVariant::ColumnVariant(DB::MutableColumnPtr local_discriminators_, DB::Mut global_to_local_discriminators[local_to_global_discriminators[i]] = i; } } + + validateState(); } namespace @@ -1759,5 +1761,35 @@ void ColumnVariant::fixDynamicStructure() variant->fixDynamicStructure(); } +void ColumnVariant::validateState() const +{ + const auto & local_discriminators_data = getLocalDiscriminators(); + const auto & offsets_data = getOffsets(); + if (local_discriminators_data.size() != offsets_data.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of discriminators and offsets should be equal, but {} and {} were given", local_discriminators_data.size(), offsets_data.size()); + + std::vector actual_variant_sizes(variants.size()); + for (size_t i = 0; i != variants.size(); ++i) + actual_variant_sizes[i] = variants[i]->size(); + + std::vector expected_variant_sizes(variants.size(), 0); + for (size_t i = 0; i != local_discriminators_data.size(); ++i) + { + auto local_discr = local_discriminators_data[i]; + if (local_discr != NULL_DISCRIMINATOR) + { + ++expected_variant_sizes[local_discr]; + if (offsets_data[i] >= actual_variant_sizes[local_discr]) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Offset at position {} is {}, but variant {} ({}) has size {}", i, offsets_data[i], static_cast(local_discr), variants[local_discr]->getName(), variants[local_discr]->size()); + } + } + + for (size_t i = 0; i != variants.size(); ++i) + { + if (variants[i]->size() != expected_variant_sizes[i]) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Variant {} ({}) has size {}, but expected {}", i, variants[i]->getName(), variants[i]->size(), expected_variant_sizes[i]); + } +} + } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index a1bdbe7d4039..c537397d46cb 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -347,6 +347,8 @@ class ColumnVariant final : public COWHelper, Colum void takeDynamicStructureFromColumn(const ColumnPtr & source_column) override; void fixDynamicStructure() override; + void validateState() const; + private: void insertFromImpl(const IColumn & src_, size_t n, const std::vector * global_discriminators_mapping); void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping, const Discriminator * skip_discriminator); diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 476738471079..216407b8907d 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -129,6 +129,7 @@ static struct InitFiu ONCE(database_iceberg_gcs) \ REGULAR(rmt_delay_execute_drop_range) \ REGULAR(rmt_delay_commit_part) \ + REGULAR(patch_parts_reverse_column_order) \ ONCE(smt_commit_exception_before_op) \ ONCE(backup_add_empty_memory_table) \ REGULAR(refresh_task_stop_racing_for_running_refresh) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d31e3eec9c36..e306e270f8d6 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -689,6 +689,7 @@ The server successfully detected this situation and will download merged part fr M(CachedReadBufferReadFromCacheHits, "Number of times the read from filesystem cache hit the cache.", ValueType::Number) \ M(CachedReadBufferReadFromCacheMisses, "Number of times the read from filesystem cache miss the cache.", ValueType::Number) \ M(CachedReadBufferReadFromSourceMicroseconds, "Time reading from filesystem cache source (from remote filesystem, etc)", ValueType::Microseconds) \ + M(CachedReadBufferWaitReadBufferMicroseconds, "Time spend waiting for internal read buffer (includes cache waiting)", ValueType::Microseconds) \ M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache", ValueType::Microseconds) \ M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)", ValueType::Bytes) \ M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache", ValueType::Bytes) \ @@ -759,12 +760,6 @@ The server successfully detected this situation and will download merged part fr M(ThreadpoolReaderSubmitLookupInCacheMicroseconds, "How much time we spent checking if content is cached", ValueType::Microseconds) \ M(AsynchronousReaderIgnoredBytes, "Number of bytes ignored during asynchronous reading", ValueType::Bytes) \ \ - M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)", ValueType::Microseconds) \ - M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file", ValueType::Microseconds) \ - M(FileSegmentCacheWriteMicroseconds, "Metric per file segment. Time spend writing data to cache", ValueType::Microseconds) \ - M(FileSegmentPredownloadMicroseconds, "Metric per file segment. Time spent pre-downloading data to cache (pre-downloading - finishing file segment download (after someone who failed to do that) up to the point current thread was requested to do)", ValueType::Microseconds) \ - M(FileSegmentUsedBytes, "Metric per file segment. How many bytes were actually used from current file segment", ValueType::Bytes) \ - \ M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)", ValueType::Number) \ \ M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.", ValueType::Number) \ diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 55501ea1d568..20db52cca1f2 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -789,6 +789,15 @@ void ZooKeeper::sendThread() /// After we popped element from the queue, we must register callbacks (even in the case when expired == true right now), /// because they must not be lost (callbacks must be called because the user will wait for them). + if (info.watch) + info.request->has_watch = true; + + if (info.request->add_root_path) + info.request->addRootPath(args.chroot); + + /// Insert into operations AFTER mutating the request (has_watch, addRootPath) + /// to avoid a data race: receiveThread reads from operations concurrently, + /// and the request object is shared via shared_ptr. if (info.request->xid != close_xid) { CurrentMetrics::add(CurrentMetrics::ZooKeeperRequest); @@ -796,19 +805,11 @@ void ZooKeeper::sendThread() operations[info.request->xid] = info; } - if (info.watch) - { - info.request->has_watch = true; - } - if (requests_queue.isFinished()) { break; } - if (info.request->add_root_path) - info.request->addRootPath(args.chroot); - info.request->probably_sent = true; info.request->write(getWriteBuffer(), use_xid_64); flushWriteBuffer(); diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index d15c142df6f0..5da770f67566 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -209,12 +209,6 @@ M(ThreadpoolReaderSubmitReadSynchronouslyMicroseconds) \ M(ThreadpoolReaderSubmitLookupInCacheMicroseconds) \ M(AsynchronousReaderIgnoredBytes) \ -\ - M(FileSegmentWaitReadBufferMicroseconds) \ - M(FileSegmentReadMicroseconds) \ - M(FileSegmentCacheWriteMicroseconds) \ - M(FileSegmentPredownloadMicroseconds) \ - M(FileSegmentUsedBytes) \ \ M(ReadBufferSeekCancelConnection) \ \ diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index cdac684740aa..11be234d7584 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -559,6 +559,37 @@ struct KeeperStorageBase::Delta Operation operation; }; +std::string_view deltaTypeToString(const Operation & operation) +{ + /// Using std::visit ensures compile-time exhaustiveness checking - + /// adding a new type to Operation will cause a compilation error until handled here + return std::visit([](const T &) -> std::string_view + { + if constexpr (std::is_same_v) + return "CreateNodeDelta"; + else if constexpr (std::is_same_v) + return "RemoveNodeDelta"; + else if constexpr (std::is_same_v) + return "UpdateNodeStatDelta"; + else if constexpr (std::is_same_v) + return "UpdateNodeDataDelta"; + else if constexpr (std::is_same_v) + return "SetACLDelta"; + else if constexpr (std::is_same_v) + return "AddAuthDelta"; + else if constexpr (std::is_same_v) + return "ErrorDelta"; + else if constexpr (std::is_same_v) + return "SubDeltaEnd"; + else if constexpr (std::is_same_v) + return "FailedMultiDelta"; + else if constexpr (std::is_same_v) + return "CloseSessionDelta"; + else + static_assert(sizeof(T) == 0, "Unhandled Operation type in deltaTypeToString"); + }, operation); +} + KeeperStorageBase::DeltaIterator KeeperStorageBase::DeltaRange::begin() const { return begin_it; @@ -702,7 +733,7 @@ void KeeperStorage::UncommittedState::UncommittedNode::materializeACL template void KeeperStorage::UncommittedState::applyDelta(const Delta & delta, uint64_t * digest) { - chassert(!delta.path.empty()); + chassert(!delta.path.empty(), fmt::format("Path is empty for delta of type '{}'", deltaTypeToString(delta.operation))); UncommittedNode * uncommitted_node = nullptr; auto node_it = nodes.end(); @@ -822,7 +853,7 @@ bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool template void KeeperStorage::UncommittedState::rollbackDelta(const Delta & delta) { - chassert(!delta.path.empty()); + chassert(!delta.path.empty(), fmt::format("Path is empty for delta of type '{}'", deltaTypeToString(delta.operation))); std::visit( [&](const DeltaType & operation) @@ -1195,16 +1226,17 @@ void KeeperStorage::applyUncommittedState(KeeperStorage & other, int6 zxids_to_apply.insert(transaction.zxid); } - auto it = uncommitted_state.deltas.begin(); - - for (; it != uncommitted_state.deltas.end(); ++it) + std::list uncommitted_deltas_to_apply; + for (const auto & uncommitted_delta : uncommitted_state.deltas) { - if (!zxids_to_apply.contains(it->zxid)) + if (!zxids_to_apply.contains(uncommitted_delta.zxid)) continue; - other.uncommitted_state.applyDelta(*it, /*digest=*/nullptr); - other.uncommitted_state.deltas.push_back(*it); + uncommitted_deltas_to_apply.push_back(uncommitted_delta); } + + other.uncommitted_state.applyDeltas(uncommitted_deltas_to_apply, /*digest=*/nullptr); + other.uncommitted_state.addDeltas(std::move(uncommitted_deltas_to_apply)); } template diff --git a/src/Core/BaseSettings.h b/src/Core/BaseSettings.h index 3f9e2ed2c010..39c54ecac96c 100644 --- a/src/Core/BaseSettings.h +++ b/src/Core/BaseSettings.h @@ -532,6 +532,10 @@ void BaseSettings::readBinary(ReadBuffer & in) size_t index = accessor.find(name); std::ignore = BaseSettingsHelpers::readFlags(in); + + if (index == static_cast(-1)) + BaseSettingsHelpers::throwSettingNotFound(name); + accessor.readBinary(*this, index, in); } } diff --git a/src/DataTypes/DataTypeFunction.cpp b/src/DataTypes/DataTypeFunction.cpp index 51eb20f023b5..6c2386610cfd 100644 --- a/src/DataTypes/DataTypeFunction.cpp +++ b/src/DataTypes/DataTypeFunction.cpp @@ -36,10 +36,16 @@ bool DataTypeFunction::equals(const IDataType & rhs) const void DataTypeFunction::updateHashImpl(SipHash & hash) const { + /// Argument types and return type can be nullptr when the lambda is not yet resolved. hash.update(argument_types.size()); for (const auto & arg_type : argument_types) - arg_type->updateHash(hash); + { + hash.update(arg_type != nullptr); + if (arg_type) + arg_type->updateHash(hash); + } + hash.update(return_type != nullptr); if (return_type) return_type->updateHash(hash); } diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index e164e10d6bd1..b8b7d49bfedb 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -217,8 +217,10 @@ void SerializationObject::enumerateStreams(EnumerateStreamsSettings & settings, { shared_data_serialization_version = SerializationObjectSharedData::SerializationVersion(settings.object_shared_data_serialization_version); /// Avoid creating buckets in shared data for Wide part if shared data is empty. - if (settings.data_part_type != MergeTreeDataPartType::Wide || !column_object->getStatistics() || !column_object->getStatistics()->shared_data_paths_statistics.empty()) + if (settings.data_part_type != MergeTreeDataPartType::Wide || !column_object->getStatistics() + || !column_object->getStatistics()->shared_data_paths_statistics.empty()) num_buckets = settings.object_shared_data_buckets; + } shared_data_serialization = std::make_shared(shared_data_serialization_version, dynamic_type, num_buckets); diff --git a/src/DataTypes/Serializations/SerializationObjectSharedData.cpp b/src/DataTypes/Serializations/SerializationObjectSharedData.cpp index 52676c4c09d2..78aa333eef86 100644 --- a/src/DataTypes/Serializations/SerializationObjectSharedData.cpp +++ b/src/DataTypes/Serializations/SerializationObjectSharedData.cpp @@ -682,10 +682,10 @@ std::shared_ptr SerializationO structure_state.last_granule_structure.clear(); size_t rows_to_read = limit + rows_offset; - StructureGranule current_granule; - std::swap(structure_state.last_granule_structure, current_granule); while (rows_to_read != 0) { + auto & current_granule = structure_state.last_granule_structure; + /// Calculate remaining rows in current granule that can be read. size_t remaining_rows_in_granule = current_granule.num_rows - current_granule.limit - current_granule.offset; @@ -736,12 +736,7 @@ std::shared_ptr SerializationO } result->push_back(current_granule); - current_granule.clear(); } - - /// Remember the state of the last read granule because it can be partially read. - if (!result->empty()) - structure_state.last_granule_structure = result->back(); } /// Add deserialized data into cache. diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index 5e6638a7df4b..2e8e8559e005 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -68,7 +68,10 @@ size_t deserializeOffsets(IColumn::Offsets & offsets, skipped_values_rows = 0; size_t max_rows_to_read = offset + limit; - if (max_rows_to_read && state.num_trailing_defaults >= max_rows_to_read) + if (max_rows_to_read == 0) + return 0; + + if (state.num_trailing_defaults >= max_rows_to_read) { state.num_trailing_defaults -= max_rows_to_read; return limit; @@ -111,7 +114,7 @@ size_t deserializeOffsets(IColumn::Offsets & offsets, size_t next_total_rows = total_rows + group_size; group_size += state.num_trailing_defaults; - if (max_rows_to_read && next_total_rows >= max_rows_to_read) + if (next_total_rows >= max_rows_to_read) { /// If it was not last group in granule, /// we have to add current non-default value at further reads. diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 8bf1a829abe2..31dec1e5ab09 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -257,6 +257,9 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian size_t & total_size_of_variants) const { const ColumnVariant & col = assert_cast(column); + if (offset == 0) + col.validateState(); + if (const size_t size = col.size(); limit == 0 || offset + limit > size) limit = size - offset; @@ -315,9 +318,17 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian addVariantElementToPath(settings.path, i); /// We can use the same offset/limit as for whole Variant column if (i == non_empty_global_discr) - variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), offset, limit, settings, variant_state->variant_states[i]); + { + const auto & variant_column = col.getVariantByGlobalDiscriminator(i); + if (variant_column.size() < offset + limit) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Variant {} has less rows ({}) than expected rows to serialize ({})", variant_names[i], variant_column.size(), offset + limit); + + variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(variant_column, offset, limit, settings, variant_state->variant_states[i]); + } else + { variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]); + } settings.path.pop_back(); } variants_statistics[variant_names[non_empty_global_discr]] += limit; @@ -442,6 +453,10 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variant_serializations.size(); ++i) { + const auto & variant_column = col.getVariantByGlobalDiscriminator(i); + if (variant_column.size() < variant_offsets_and_limits[i].first + variant_offsets_and_limits[i].second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Variant {} has less rows ({}) than expected rows to serialize ({})", variant_names[i], variant_column.size(), variant_offsets_and_limits[i].first + variant_offsets_and_limits[i].second); + addVariantElementToPath(settings.path, i); variant_serializations[i]->serializeBinaryBulkWithMultipleStreams( col.getVariantByGlobalDiscriminator(i), @@ -639,6 +654,9 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( last_non_empty_discr = i; } + if (col.getVariantByLocalDiscriminator(i).size() < variant_limits[i]) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of variant {} is expected to be not less than {} according to discriminators, but it is {}", variant_names[i], variant_limits[i], col.getVariantByLocalDiscriminator(i).size()); + variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); } @@ -676,6 +694,8 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( addColumnWithNumReadRowsToSubstreamsCache(cache, settings.path, col.getOffsetsPtr(), col.getOffsetsPtr()->size() - prev_size); } settings.path.pop_back(); + + col.validateState(); } std::pair, std::vector> SerializationVariant::deserializeCompactDiscriminators( diff --git a/src/Databases/DataLake/DatabaseDataLake.cpp b/src/Databases/DataLake/DatabaseDataLake.cpp index decc4da5ef0c..41693567bbd8 100644 --- a/src/Databases/DataLake/DatabaseDataLake.cpp +++ b/src/Databases/DataLake/DatabaseDataLake.cpp @@ -554,8 +554,12 @@ DatabaseTablesIteratorPtr DatabaseDataLake::getTablesIterator( if (filter_by_table_name && !filter_by_table_name(table_name)) continue; - [[maybe_unused]] bool inserted = tables.emplace(table_name, futures[future_index].get()).second; - chassert(inserted); + auto table_ptr = futures[future_index].get(); + if (table_ptr) + { + [[maybe_unused]] bool inserted = tables.emplace(table_name, table_ptr).second; + chassert(inserted); + } future_index++; } return std::make_unique(tables, getDatabaseName()); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 1775113d1bd1..3ffd3781d86a 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -19,12 +19,7 @@ namespace ProfileEvents { -extern const Event FileSegmentWaitReadBufferMicroseconds; -extern const Event FileSegmentReadMicroseconds; -extern const Event FileSegmentCacheWriteMicroseconds; -extern const Event FileSegmentPredownloadMicroseconds; -extern const Event FileSegmentUsedBytes; - +extern const Event CachedReadBufferWaitReadBufferMicroseconds; extern const Event CachedReadBufferReadFromSourceMicroseconds; extern const Event CachedReadBufferReadFromCacheMicroseconds; extern const Event CachedReadBufferCacheWriteMicroseconds; @@ -72,7 +67,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( , cache(cache_) , settings(settings_) , read_until_position(read_until_position_ ? *read_until_position_ : file_size_) - , implementation_buffer_creator(implementation_buffer_creator_) + , implementation_buffer_creator(std::move(implementation_buffer_creator_)) , query_id(query_id_) , current_buffer_id(getRandomASCIIString(8)) , user(user_) @@ -106,13 +101,9 @@ void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( .file_segment_size = range.size(), .read_from_cache_attempted = true, .read_buffer_id = current_buffer_id, - .profile_counters = std::make_shared( - current_file_segment_counters.getPartiallyAtomicSnapshot()), .user_id = user.user_id, }; - current_file_segment_counters.reset(); - switch (type) { case CachedOnDiskReadBufferFromFile::ReadType::CACHED: @@ -443,10 +434,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme read_buffer_for_file_segment->getFileOffsetOfBufferEnd(), file_segment.getInfoForLog()); - current_file_segment_counters.increment( - ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds()); - - ProfileEvents::increment(ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::CachedReadBufferWaitReadBufferMicroseconds, watch.elapsedMicroseconds()); [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; chassert(download_current_segment == file_segment.isDownloader()); @@ -582,8 +570,6 @@ bool CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) Stopwatch predownload_watch(CLOCK_MONOTONIC); SCOPE_EXIT({ predownload_watch.stop(); - current_file_segment_counters.increment( - ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds()); }); OpenTelemetry::SpanHolder span("CachedOnDiskReadBufferFromFile::predownload"); @@ -615,7 +601,6 @@ bool CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) watch.stop(); auto elapsed = watch.elapsedMicroseconds(); - current_file_segment_counters.increment(ProfileEvents::FileSegmentReadMicroseconds, elapsed); ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceMicroseconds, elapsed); } @@ -801,7 +786,6 @@ bool CachedOnDiskReadBufferFromFile::writeCache(char * data, size_t size, size_t watch.stop(); auto elapsed = watch.elapsedMicroseconds(); - current_file_segment_counters.increment(ProfileEvents::FileSegmentCacheWriteMicroseconds, elapsed); ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteMicroseconds, elapsed); ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteBytes, size); @@ -1013,7 +997,6 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() watch.stop(); auto elapsed = watch.elapsedMicroseconds(); - current_file_segment_counters.increment(ProfileEvents::FileSegmentReadMicroseconds, elapsed); // We don't support implementation_buffer implementations that use nextimpl_working_buffer_offset. chassert(implementation_buffer->position() == implementation_buffer->buffer().begin()); @@ -1117,8 +1100,6 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() swap.reset(); - current_file_segment_counters.increment(ProfileEvents::FileSegmentUsedBytes, available()); - if (size == 0 && file_offset_of_buffer_end < read_until_position) { size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer); @@ -1178,6 +1159,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() file_segment.getInfoForLog()); } + swap.reset(); + // No necessary because of the SCOPE_EXIT above, but useful for logging below. if (download_current_segment) file_segment.completePartAndResetDownloader(); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 4f26a94f91f8..455b454fa39c 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -142,9 +142,8 @@ class CachedOnDiskReadBufferFromFile : public ReadBufferFromFileBase FileCacheUserInfo user; bool allow_seeks_after_first_read; - [[maybe_unused]]bool use_external_buffer; + bool use_external_buffer; CurrentMetrics::Increment metric_increment{CurrentMetrics::FilesystemCacheReadBuffers}; - ProfileEvents::Counters current_file_segment_counters; FileCacheQueryLimit::QueryContextHolderPtr query_context_holder; diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index 566a57a0871d..0d7adc934dc2 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -215,7 +215,6 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s .file_segment_size = file_segment_range.size(), .read_from_cache_attempted = false, .read_buffer_id = {}, - .profile_counters = nullptr, }; cache_log->add(std::move(elem)); diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 8d483a5581dd..911273d57a7d 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -85,7 +85,7 @@ std::unique_ptr CachedObjectStorage::readObject( /// NOL auto global_context = Context::getGlobalContextInstance(); auto modified_read_settings = read_settings.withNestedBuffer(); - auto read_buffer_creator = [=, this]() + auto read_buffer_creator = [this, object, read_settings, read_hint, file_size]() { return object_storage->readObject(object, patchSettings(read_settings), read_hint, file_size); }; diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index ae60f9ce7e0a..e77fa113fa4f 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1996,7 +1996,7 @@ class ObjectJSONNode : public JSONExtractTreeNode if (!sorted_paths_to_skip.empty()) { auto it = std::lower_bound(sorted_paths_to_skip.begin(), sorted_paths_to_skip.end(), path); - if (it != sorted_paths_to_skip.begin() && path.starts_with(*std::prev(it))) + if (it != sorted_paths_to_skip.begin() && path.starts_with(*std::prev(it) + ".")) return true; } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 745ac3650300..20d096330965 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -396,6 +396,7 @@ struct ToYearWeekImpl return yw.first * 100 + yw.second; } + static constexpr bool hasMonotonicity() { return true; } using FactorTransform = ZeroTransform; }; @@ -431,6 +432,7 @@ struct ToStartOfWeekImpl return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode); } + static constexpr bool hasMonotonicity() { return true; } using FactorTransform = ZeroTransform; }; @@ -464,6 +466,7 @@ struct ToLastDayOfWeekImpl return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode); } + static constexpr bool hasMonotonicity() { return true; } using FactorTransform = ZeroTransform; }; @@ -494,6 +497,11 @@ struct ToWeekImpl return yw.second; } + /// toWeek() is not monotonic because week numbers can wrap at year boundaries + /// (e.g. ISO week 52 -> week 1 in late December), depending on the week_mode. + /// See https://github.com/ClickHouse/ClickHouse/issues/90240 + static constexpr bool hasMonotonicity() { return false; } + using FactorTransform = ToStartOfYearImpl; }; @@ -1591,6 +1599,7 @@ struct ToDayOfWeekImpl return time_zone.toDayOfWeek(DayNum(d), mode); } + static constexpr bool hasMonotonicity() { return true; } using FactorTransform = ToMondayImpl; }; diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 1b3fc680171d..648b8b97d124 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,14 @@ class FunctionFile : public IFunction, WithContext { public: static constexpr auto name = "file"; - static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + + static FunctionPtr create(ContextPtr context_) + { + if (context_ && context_->getApplicationType() != Context::ApplicationType::LOCAL) + context_->checkAccess(AccessType::READ, toStringSource(AccessTypeObjects::Source::FILE)); + + return std::make_shared(context_); + } explicit FunctionFile(ContextPtr context_) : WithContext(context_) {} bool isVariadic() const override { return true; } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index ec7e23d9f587..ba33c9e5abe2 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -3588,13 +3588,13 @@ struct ToDateMonotonicity } else if ( ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() <= DATE_LUT_MAX_DAY_NUM) && (right.isNull() || right.safeGet() > DATE_LUT_MAX_DAY_NUM))) || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() <= DATE_LUT_MAX_DAY_NUM) && (right.isNull() || right.safeGet() > DATE_LUT_MAX_DAY_NUM))) || (( (left.getType() == Field::Types::Float64 || left.isNull()) && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF)))) + && ((left.isNull() || left.safeGet() <= DATE_LUT_MAX_DAY_NUM) && (right.isNull() || right.safeGet() > DATE_LUT_MAX_DAY_NUM)))) || !isNativeNumber(type)) { return {}; diff --git a/src/Functions/IFunctionCustomWeek.h b/src/Functions/IFunctionCustomWeek.h index 99941e5c186c..9978586506c0 100644 --- a/src/Functions/IFunctionCustomWeek.h +++ b/src/Functions/IFunctionCustomWeek.h @@ -41,7 +41,7 @@ class IFunctionCustomWeek : public IFunction return true; } - bool hasInformationAboutMonotonicity() const override { return true; } + bool hasInformationAboutMonotonicity() const override { return Transform::hasMonotonicity(); } Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override { diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index d829240946a5..162fd90d7bec 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -125,9 +125,10 @@ struct Main return left[i] >= right; } - static constexpr bool lessOrEqual(const IColumn & left, const Result & right, size_t i, size_t) noexcept { return left[i] >= right; } + static bool lessOrEqual(const IColumn & left, const Result & right, size_t i, size_t) { return left[i] >= right; } - static constexpr bool lessOrEqual(const Array& arr, const Field& rhs, size_t pos, size_t) noexcept { + static bool lessOrEqual(const Array & arr, const Field & rhs, size_t pos, size_t) + { return accurateLessOrEqual(rhs, arr[pos]); } @@ -666,7 +667,7 @@ class FunctionArrayIndex : public IFunction * @return {nullptr, null_map_item} if there are four arguments but the third is missing. * @return {null_map_data, null_map_item} if there are four arguments. */ - static NullMaps getNullMaps(const ColumnsWithTypeAndName & arguments) noexcept + static NullMaps getNullMaps(const ColumnsWithTypeAndName & arguments) { if (arguments.size() < 3) return {nullptr, nullptr}; diff --git a/src/Functions/bech32.cpp b/src/Functions/bech32.cpp index 3391d164b5e2..b469d2615e20 100644 --- a/src/Functions/bech32.cpp +++ b/src/Functions/bech32.cpp @@ -99,6 +99,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int BAD_ARGUMENTS; } /// Encode string to Bech32 or Bech32m address @@ -278,6 +279,24 @@ class EncodeToBech32Representation : public IFunction uint8_t witness_version = have_witness_version ? witness_version_col->getUInt(i) : default_witness_version; + /** Witness version is a versioning mechanism for Bitcoin SegWit addresses: + * - Version 0: Original SegWit (BIP-141, BIP-173), uses Bech32 encoding + * - Version 1: Taproot (BIP-341, BIP-350), uses Bech32m encoding + * - Versions 2-16: Reserved for future protocol upgrades + * + * The witness version must be in range [0, 16] per the SegWit specification. + * It also must fit in the bech32 charset which is 5 bits (0-31), otherwise + * indexing into the CHARSET array in bech32::encode will cause a buffer overflow. + */ + if (witness_version > 16) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid witness version {} for function {}, expected value in range [0, 16]", + witness_version, + name); + } + bech32_data input_5bit; input_5bit.push_back(witness_version); convertbits<8, 5, true>(input_5bit, input); /// squash input from 8-bit -> 5-bit bytes diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index f4fc2ba6de96..5e1a9a210950 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -603,7 +603,9 @@ class FunctionFormatDateTimeImpl : public IFunction for (UInt32 i = scale; i > 0; --i) { - dest[i - 1] += fractional_second % 10; + /// Use assignment instead of `+=` to avoid reading uninitialized memory + /// when the output buffer is not pre-filled with the template (variable-width formatters path). + dest[i - 1] = '0' + (fractional_second % 10); fractional_second /= 10; } return scale; @@ -617,7 +619,9 @@ class FunctionFormatDateTimeImpl : public IFunction for (UInt32 i = scale; i > 0; --i) { - dest[i - 1] += fractional_second % 10; + /// Use assignment instead of `+=` to avoid reading uninitialized memory + /// when the output buffer is not pre-filled with the template (variable-width formatters path). + dest[i - 1] = '0' + (fractional_second % 10); fractional_second /= 10; } return scale; @@ -819,6 +823,11 @@ class FunctionFormatDateTimeImpl : public IFunction return min_represent_digits; } auto str = toString(fractional_second); + /// Left-pad with zeros to `scale` digits, because `toString` does not preserve leading zeros + /// (e.g. fractional_second=5, scale=3 gives "5" but we need "005"). + /// Without this, the buffer would be left partially uninitialized. + if (str.size() < scale) + str.insert(0, scale - str.size(), '0'); if (min_represent_digits > scale) { for (UInt64 i = 0; i < min_represent_digits - scale; ++i) diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp index 15deed86c256..00d400b11f0a 100644 --- a/src/Functions/reverseUTF8.cpp +++ b/src/Functions/reverseUTF8.cpp @@ -46,26 +46,31 @@ struct ReverseUTF8Impl ColumnString::Offset j = prev_offset; while (j < offsets[i]) { + size_t remaining = offsets[i] - j; + + unsigned int char_len; if (data[j] < 0xC0) - { - res_data[offsets[i] + prev_offset - 1 - j] = data[j]; - j += 1; - } + char_len = 1; else if (data[j] < 0xE0) - { - memcpy(&res_data[offsets[i] + prev_offset - 1 - j - 1], &data[j], 2); - j += 2; - } + char_len = 2; else if (data[j] < 0xF0) + char_len = 3; + else + char_len = 4; + + /// If not enough bytes remaining, treat as single byte (invalid UTF-8). + if (char_len > remaining) + char_len = 1; + + if (char_len == 1) { - memcpy(&res_data[offsets[i] + prev_offset - 1 - j - 2], &data[j], 3); - j += 3; + res_data[offsets[i] + prev_offset - 1 - j] = data[j]; } else { - memcpy(&res_data[offsets[i] + prev_offset - 1 - j - 3], &data[j], 4); - j += 4; + memcpy(&res_data[offsets[i] + prev_offset - j - char_len], &data[j], char_len); } + j += char_len; } prev_offset = offsets[i]; diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index c3ffa173cac3..ce8d3aa7091a 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -209,7 +209,7 @@ off_t AsynchronousReadBufferFromFileDescriptor::seek(off_t offset, int whence) } else if (whence == SEEK_CUR) { - new_pos = file_offset_of_buffer_end - (working_buffer.end() - pos) + offset; + new_pos = static_cast(getPosition()) + offset; } else { @@ -217,13 +217,15 @@ off_t AsynchronousReadBufferFromFileDescriptor::seek(off_t offset, int whence) } /// Position is unchanged. - if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end) + if (new_pos == static_cast(getPosition())) return new_pos; bool read_from_prefetch = false; while (true) { - if (file_offset_of_buffer_end - working_buffer.size() <= new_pos && new_pos <= file_offset_of_buffer_end) + if (bytes_to_ignore == 0 + && file_offset_of_buffer_end - working_buffer.size() <= new_pos + && new_pos <= file_offset_of_buffer_end) { /// Position is still inside the buffer. /// Probably it is at the end of the buffer - then we will load data on the following 'next' call. @@ -289,6 +291,7 @@ void AsynchronousReadBufferFromFileDescriptor::rewind() working_buffer.resize(0); pos = working_buffer.begin(); file_offset_of_buffer_end = 0; + bytes_to_ignore = 0; } std::optional AsynchronousReadBufferFromFileDescriptor::tryGetFileSize() diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index e15a41474256..fe4fa55d886f 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -62,7 +62,7 @@ class AsynchronousReadBufferFromFileDescriptor : public ReadBufferFromFileBase off_t getPosition() override { - return file_offset_of_buffer_end - (working_buffer.end() - pos); + return file_offset_of_buffer_end - (working_buffer.end() - pos) + bytes_to_ignore; } /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen. diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 2b5381e96400..1b43109a05f3 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -319,9 +319,20 @@ namespace if (!roles_to_revoke.empty()) { if (admin_option) + { grantee.granted_roles.revokeAdminOption(grantee.granted_roles.findGrantedWithAdminOption(roles_to_revoke)); + } else - grantee.granted_roles.revoke(grantee.granted_roles.findGranted(roles_to_revoke)); + { + auto found_roles_to_revoke = grantee.granted_roles.findGranted(roles_to_revoke); + grantee.granted_roles.revoke(found_roles_to_revoke); + + if constexpr (std::is_same_v) + { + for (const auto & id : found_roles_to_revoke) + grantee.default_roles.ids.erase(id); + } + } } if (!roles_to_grant.empty()) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 300a43f3d779..32971bad2ef2 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -842,6 +842,9 @@ static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Nod case ActionsDAG::ActionType::ARRAY_JOIN: { auto key = arguments.at(0); + if (!key.column) + break; + key.column = key.column->convertToFullColumnIfConst(); const auto * array = getArrayJoinColumnRawPtr(key.column); diff --git a/src/Interpreters/CancellationChecker.cpp b/src/Interpreters/CancellationChecker.cpp index 1d037a5f14bf..5812ae193ca8 100644 --- a/src/Interpreters/CancellationChecker.cpp +++ b/src/Interpreters/CancellationChecker.cpp @@ -10,6 +10,14 @@ namespace DB { +/// Align all timeouts to a grid to allow batching of timeout processing. +/// Tasks may be cancelled slightly later than their exact timeout, but never before. +static constexpr UInt64 CANCELLATION_GRID_MS = 100; + +/// Maximum allowed timeout is 1 year in milliseconds. +/// This prevents overflow in chrono calculations and ensures reasonable behavior. +static constexpr Int64 MAX_TIMEOUT_MS = 365LL * 24 * 60 * 60 * 1000; + struct CancellationChecker::QueryToTrack { QueryToTrack(QueryStatusPtr query_, UInt64 timeout_, UInt64 endtime_, OverflowMode overflow_mode_) @@ -63,38 +71,46 @@ void CancellationChecker::terminateThread() cond_var.notify_all(); } -bool CancellationChecker::removeQueryFromSet(QueryStatusPtr query) -{ - auto it = std::ranges::find(query_set, query, &QueryToTrack::query); - - if (it == query_set.end()) - return false; - - LOG_TEST(log, "Removing query {} from done tasks", query->getClientInfo().current_query_id); - query_set.erase(it); - return true; -} - -void CancellationChecker::appendTask(const QueryStatusPtr & query, const Int64 timeout, OverflowMode overflow_mode) +bool CancellationChecker::appendTask(const QueryStatusPtr & query, const Int64 timeout, OverflowMode overflow_mode) { if (timeout <= 0) // Avoid cases when the timeout is less or equal zero { LOG_TEST(log, "Did not add the task because the timeout is 0, query_id: {}", query->getClientInfo().current_query_id); - return; + return false; } + + /// Cap timeout to 1 year to prevent overflow in chrono calculations. + /// std::condition_variable::wait_for converts milliseconds to nanoseconds internally + /// (multiplying by 1,000,000), which overflows for values close to INT64_MAX. + const Int64 capped_timeout = std::min(timeout, MAX_TIMEOUT_MS); + std::unique_lock lock(m); - LOG_TEST(log, "Added to set. query: {}, timeout: {} milliseconds", query->getInfo().query, timeout); + LOG_TEST(log, "Added to set. query: {}, timeout: {} milliseconds", query->getInfo().query, capped_timeout); const auto now = std::chrono::steady_clock::now(); - const UInt64 end_time = std::chrono::duration_cast(now.time_since_epoch()).count() + timeout; - query_set.emplace(query, timeout, end_time, overflow_mode); - cond_var.notify_all(); + const UInt64 now_ms = std::chrono::duration_cast(now.time_since_epoch()).count(); + /// Round up to the next grid boundary to enable batching of timeout checks. + /// This ensures tasks are never cancelled before their timeout, only slightly after. + const UInt64 end_time = ((now_ms + capped_timeout + CANCELLATION_GRID_MS - 1) / CANCELLATION_GRID_MS) * CANCELLATION_GRID_MS; + auto iter = query_set.emplace(query, capped_timeout, end_time, overflow_mode); + if (iter == query_set.begin()) // Only notify if the new task is the earliest one + cond_var.notify_all(); + return true; } void CancellationChecker::appendDoneTasks(const QueryStatusPtr & query) { - std::unique_lock lock(m); - removeQueryFromSet(query); - cond_var.notify_all(); + std::unique_lock lock(m); + + auto it = std::ranges::find(query_set, query, &QueryToTrack::query); + if (it == query_set.end()) + return; + + LOG_TEST(log, "Removing query {} from done tasks", query->getClientInfo().current_query_id); + query_set.erase(it); + + // Note that there is no need to notify the worker thread here. Even if we have just removed the earliest task, + // it will wake up before the next task anyway and fix its timeout to a proper value on wake-up. + // This optimization avoids unnecessary contention on the mutex. } void CancellationChecker::workerFunction() @@ -107,20 +123,19 @@ void CancellationChecker::workerFunction() while (!stop_thread) { UInt64 now_ms = 0; - std::chrono::steady_clock::duration duration_milliseconds = std::chrono::milliseconds(0); - if (!query_set.empty()) { - const auto next_task_it = query_set.begin(); - - // Convert UInt64 timeout to std::chrono::steady_clock::time_point - duration_milliseconds = std::chrono::milliseconds(next_task_it->timeout); - - auto end_time_ms = next_task_it->endtime; auto now = std::chrono::steady_clock::now(); now_ms = std::chrono::duration_cast(now.time_since_epoch()).count(); - if ((end_time_ms <= now_ms && duration_milliseconds.count() != 0)) + + /// Batch all tasks that have reached their deadline. + /// Since deadlines are aligned to a grid, multiple tasks often expire together. + while (!query_set.empty()) { + auto next_task_it = query_set.begin(); + if (next_task_it->endtime > now_ms || next_task_it->timeout == 0) + break; + LOG_DEBUG( log, "Cancelling the task because of the timeout: {} ms, query_id: {}", @@ -129,7 +144,6 @@ void CancellationChecker::workerFunction() tasks_to_cancel.push_back(*next_task_it); query_set.erase(next_task_it); - continue; } } @@ -142,20 +156,24 @@ void CancellationChecker::workerFunction() continue; } - /// if last time we checked there were no queries, + /// if there are no queries, /// wakeup on first query that was added so we can setup /// proper timeout for waking up the thread - if (!now_ms) + if (query_set.empty()) { cond_var.wait(lock, [&] { return stop_thread || !query_set.empty(); }); } else { - chassert(duration_milliseconds.count()); + chassert(!query_set.empty()); cond_var.wait_for( lock, - duration_milliseconds, - [&, now_ms] { return stop_thread || (!query_set.empty() && query_set.begin()->endtime < now_ms); }); + std::chrono::milliseconds(query_set.begin()->endtime - now_ms), + [&] { + /// Use fresh time to avoid spinning when the predicate is re-evaluated after spurious wakeups. + UInt64 fresh_now_ms = std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); + return stop_thread || (!query_set.empty() && query_set.begin()->endtime <= fresh_now_ms); + }); } } } diff --git a/src/Interpreters/CancellationChecker.h b/src/Interpreters/CancellationChecker.h index fcc9d43f1543..7a4f0e089de6 100644 --- a/src/Interpreters/CancellationChecker.h +++ b/src/Interpreters/CancellationChecker.h @@ -42,9 +42,6 @@ class CancellationChecker std::mutex m; std::condition_variable cond_var; - // Function to execute when a task's endTime is reached - bool removeQueryFromSet(QueryStatusPtr query); - static void cancelTask(CancellationChecker::QueryToTrack task); const LoggerPtr log; @@ -59,8 +56,8 @@ class CancellationChecker void terminateThread(); - // Method to add a new task to the multiset - void appendTask(const QueryStatusPtr & query, Int64 timeout, OverflowMode overflow_mode); + // Method to add a new task to the multiset. Returns true if the task was added. + [[nodiscard]] bool appendTask(const QueryStatusPtr & query, Int64 timeout, OverflowMode overflow_mode); // Used when some task is done void appendDoneTasks(const QueryStatusPtr & query); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 455ffe68508c..77e144f91b2c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5033,25 +5033,32 @@ void Context::startClusterDiscovery() /// On repeating calls updates existing clusters and adds new clusters, doesn't delete old clusters void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_discovery, const String & config_name) { - std::lock_guard lock(shared->clusters_mutex); - if (ConfigHelper::getBool(*config, "allow_experimental_cluster_discovery") && enable_discovery && !shared->cluster_discovery) { - shared->cluster_discovery = std::make_unique(*config, getGlobalContext(), getMacros()); - } + std::lock_guard lock(shared->clusters_mutex); + if (ConfigHelper::getBool(*config, "allow_experimental_cluster_discovery") && enable_discovery && !shared->cluster_discovery) + { + shared->cluster_discovery = std::make_unique(*config, getGlobalContext(), getMacros()); + } - /// Do not update clusters if this part of config wasn't changed. - if (shared->clusters && isSameConfiguration(*config, *shared->clusters_config, config_name)) - return; + /// Do not update clusters if this part of config wasn't changed. + if (shared->clusters && isSameConfiguration(*config, *shared->clusters_config, config_name)) + return; - auto old_clusters_config = shared->clusters_config; - shared->clusters_config = config; + auto old_clusters_config = shared->clusters_config; + shared->clusters_config = config; - if (!shared->clusters) - shared->clusters = std::make_shared(*shared->clusters_config, *settings, getMacros(), config_name); - else - shared->clusters->updateClusters(*shared->clusters_config, *settings, config_name, old_clusters_config); + if (!shared->clusters) + shared->clusters = std::make_shared(*shared->clusters_config, *settings, getMacros(), config_name); + else + shared->clusters->updateClusters(*shared->clusters_config, *settings, config_name, old_clusters_config); - ++shared->clusters_version; + ++shared->clusters_version; + } + { + SharedLockGuard lock(shared->mutex); + if (shared->ddl_worker) + shared->ddl_worker->notifyHostIDsUpdated(); + } } size_t Context::getClustersVersion() const diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index a0b48e0d1cfb..53efd9b77975 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -2,13 +2,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -188,6 +188,26 @@ ZooKeeperPtr DDLWorker::getAndSetZooKeeper() return current_zookeeper; } +void DDLWorker::notifyHostIDsUpdated() +{ + LOG_INFO(log, "Host IDs updated"); + host_ids_updated = true; +} + +void DDLWorker::updateHostIDs(const std::vector & hosts) +{ + std::lock_guard lock{checked_host_id_set_mutex}; + for (const auto & host : hosts) + { + if (!checked_host_id_set.contains(host.toString())) + { + LOG_INFO(log, "Found new host ID: {}", host.toString()); + notifyHostIDsUpdated(); + return; + } + } +} + DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper, bool /*dry_run*/) { @@ -223,6 +243,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r { /// Stage 1: parse entry task->entry.parse(node_data); + updateHostIDs(task->entry.hosts); } catch (...) { @@ -1203,14 +1224,18 @@ void DDLWorker::runMainThread() bool reinitialized = !initialized; /// Reinitialize DDLWorker state (including ZooKeeper connection) if required - if (!initialized) + if (reinitialized) { /// Stopped if (!initializeMainThread()) break; + LOG_DEBUG(log, "Initialized DDLWorker thread"); } + if (host_ids_updated.exchange(false)) + markReplicasActive(/*reinitialized=*/false); + cleanup_event->set(); try { @@ -1277,25 +1302,16 @@ void DDLWorker::runMainThread() void DDLWorker::initializeReplication() { auto zookeeper = getZooKeeper(); - zookeeper->createAncestors(fs::path(replicas_dir) / ""); - - NameSet host_id_set; - for (const auto & it : context->getClusters()) - { - auto cluster = it.second; - for (const auto & host_ids : cluster->getHostIDs()) - for (const auto & host_id : host_ids) - host_id_set.emplace(host_id); - } - - createReplicaDirs(zookeeper, host_id_set); } void DDLWorker::createReplicaDirs(const ZooKeeperPtr & zookeeper, const NameSet & host_ids) { for (const auto & host_id : host_ids) + { + LOG_INFO(log, "Creating replica dir for host id {}", host_id); zookeeper->createAncestors(fs::path(replicas_dir) / host_id / ""); + } } void DDLWorker::markReplicasActive(bool reinitialized) @@ -1319,22 +1335,86 @@ void DDLWorker::markReplicasActive(bool reinitialized) const auto maybe_secure_port = context->getTCPPortSecure(); const auto port = context->getTCPPort(); + auto all_host_ids = getAllHostIDsFromClusters(); + + // Add interserver IO host IDs for Replicated DBs + try + { + auto host_port = context->getInterserverIOAddress(); + HostID interserver_io_host_id = {host_port.first, port}; + all_host_ids.emplace(interserver_io_host_id.toString()); + LOG_INFO(log, "Add interserver IO host ID {}", interserver_io_host_id.toString()); + if (maybe_secure_port) + { + HostID interserver_io_secure_host_id = {host_port.first, *maybe_secure_port}; + all_host_ids.emplace(interserver_io_secure_host_id.toString()); + LOG_INFO(log, "Add interserver IO secure host ID {}", interserver_io_secure_host_id.toString()); + } + } + catch (const Exception & e) + { + LOG_INFO(log, "Unable to get interserver IO address, error {}", e.what()); + } + + createReplicaDirs(zookeeper, all_host_ids); + + if (reinitialized) + { + // Reset all active_node_holders + for (auto & it : active_node_holders) + { + auto & active_node_holder = it.second.second; + if (active_node_holder) + active_node_holder->setAlreadyRemoved(); + active_node_holder.reset(); + } + active_node_holders.clear(); + } + + Coordination::Stat replicas_stat; Strings host_ids = zookeeper->getChildren(replicas_dir, &replicas_stat); NameSet local_host_ids; + NameSet checking_host_ids; + checking_host_ids.reserve(host_ids.size()); for (const auto & host_id : host_ids) { + bool is_self_host = false; try { HostID host = HostID::fromString(host_id); - if (DDLTask::isSelfHostID(log, host, maybe_secure_port, port)) - local_host_ids.emplace(host_id); + checking_host_ids.insert(host.toString()); + + is_self_host = DDLTask::isSelfHostID(log, host, maybe_secure_port, port); } catch (const Exception & e) { LOG_WARNING(log, "Unable to check if host {} is a local address, exception: {}", host_id, e.displayText()); continue; } + + LOG_INFO(log, "Self host_id ({}) = {}", host_id, is_self_host); + if (is_self_host) + { + local_host_ids.emplace(host_id); + continue; + } + + if (!reinitialized) + { + /// Remove this host_id from active_node_holders + auto it = active_node_holders.find(host_id); + if (it != active_node_holders.end()) + { + auto & active_node_holder = it->second.second; + if (active_node_holder) + active_node_holder->setAlreadyRemoved(); + active_node_holder.reset(); + + active_node_holders.erase(it); + } + continue; + } } for (const auto & host_id : local_host_ids) @@ -1377,17 +1457,9 @@ void DDLWorker::markReplicasActive(bool reinitialized) active_node_holders[host_id] = {active_node_holder_zookeeper, active_node_holder}; } - if (active_node_holders.empty()) { - for (const auto & it : context->getClusters()) - { - const auto & cluster = it.second; - if (!cluster->getHostIDs().empty()) - { - LOG_WARNING(log, "There are clusters with host ids but no local host found for this replica."); - break; - } - } + std::lock_guard lock{checked_host_id_set_mutex}; + checked_host_id_set = checking_host_ids; } } @@ -1454,4 +1526,16 @@ void DDLWorker::runCleanupThread() } } +NameSet DDLWorker::getAllHostIDsFromClusters() const +{ + NameSet host_id_set; + for (const auto & it : context->getClusters()) + { + auto cluster = it.second; + for (const auto & host_ids : cluster->getHostIDs()) + for (const auto & host_id : host_ids) + host_id_set.emplace(host_id); + } + return host_id_set; +} } diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index abf0a9f84098..46289ee92f89 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -1,15 +1,17 @@ #pragma once +#include +#include +#include #include #include +#include #include #include #include #include #include #include -#include -#include #include #include @@ -94,6 +96,9 @@ class DDLWorker /// Should be called in `initializeMainThread` only, so if it is expired, `runMainThread` will reinitialized the state. ZooKeeperPtr getAndSetZooKeeper(); + void notifyHostIDsUpdated(); + void updateHostIDs(const std::vector & hosts); + protected: class ConcurrentSet @@ -173,6 +178,8 @@ class DDLWorker void runMainThread(); void runCleanupThread(); + NameSet getAllHostIDsFromClusters() const; + ContextMutablePtr context; LoggerPtr log; @@ -209,6 +216,7 @@ class DDLWorker /// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago Int64 cleanup_delay_period = 60; // minute (in seconds) + std::atomic_bool host_ids_updated{false}; /// Delete node if its age is greater than that Int64 task_max_lifetime = 7 * 24 * 60 * 60; // week (in seconds) /// How many tasks could be in the queue @@ -221,6 +229,10 @@ class DDLWorker std::atomic_uint64_t subsequent_errors_count = 0; String last_unexpected_error; + mutable std::mutex checked_host_id_set_mutex; + NameSet checked_host_id_set TSA_GUARDED_BY(checked_host_id_set_mutex); + + const CurrentMetrics::Metric * max_entry_metric; const CurrentMetrics::Metric * max_pushed_entry_metric; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index be910cf3de23..62d4b07b2242 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -89,6 +89,7 @@ namespace ErrorCodes namespace Setting { extern const SettingsBool fsync_metadata; + extern const SettingsBool allow_experimental_analyzer; } namespace MergeTreeSetting @@ -383,6 +384,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return {}; } + bool analyzer = context_->getSettingsRef()[Setting::allow_experimental_analyzer]; if (table_id.hasUUID()) { /// Shortcut for tables which have persistent UUID @@ -401,7 +403,8 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( } return {}; } - else + /// In old analyzer resolving done in multiple places, so we ignore TABLE_UUID_MISMATCH error. + else if (analyzer) { const auto & table_storage_id = db_and_table.second->getStorageID(); if (db_and_table.first->getDatabaseName() != table_id.database_name || diff --git a/src/Interpreters/DistributedQueryStatusSource.cpp b/src/Interpreters/DistributedQueryStatusSource.cpp index 59a82959d0d0..ac1f8fa19604 100644 --- a/src/Interpreters/DistributedQueryStatusSource.cpp +++ b/src/Interpreters/DistributedQueryStatusSource.cpp @@ -102,7 +102,8 @@ NameSet DistributedQueryStatusSource::getOfflineHosts(const NameSet & hosts_to_w if (offline.size() == hosts_to_wait.size()) { /// Avoid reporting that all hosts are offline - LOG_WARNING(log, "Did not find active hosts, will wait for all {} hosts. This should not happen often", offline.size()); + LOG_WARNING( + log, "Did not find active hosts, will wait for all hosts: {}. This should not happen often", fmt::join(hosts_to_wait, ", ")); return {}; } diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index f518ef46ba37..d88168f16617 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -42,7 +42,6 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() {"size", std::make_shared(), "Read size"}, {"read_type", std::make_shared(), "Read type: READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, READ_FROM_FS_BYPASSING_CACHE"}, {"read_from_cache_attempted", std::make_shared(), "Whether reading from cache was attempted"}, - {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared()), "Profile events collected while reading this file segment"}, {"read_buffer_id", std::make_shared(), "Internal implementation read buffer id"}, {"user_id", std::make_shared(), "User id of the user which created the file segment"}, }; @@ -66,17 +65,6 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); - - if (profile_counters) - { - auto * column = columns[i++].get(); - ProfileEvents::dumpToMapColumn(*profile_counters, column, true); - } - else - { - columns[i++]->insertDefault(); - } - columns[i++]->insert(read_buffer_id); columns[i++]->insert(user_id); } diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 94c3986a1ab2..3d5393f15ab5 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -35,7 +35,6 @@ struct FilesystemCacheLogElement size_t file_segment_size = 0; bool read_from_cache_attempted; String read_buffer_id{}; - std::shared_ptr profile_counters = nullptr; String user_id{}; static std::string name() { return "FilesystemCacheLog"; } diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index f957b218c4c9..231c7ec02dd6 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -528,6 +528,17 @@ class GraceHashJoin::DelayedBlocks : public IBlocksStream if (not_processed) { auto res = not_processed->next(); + if (res.is_last && res.next_block) + { + res.next_block->filterBySelector(); + auto next_block = std::move(*res.next_block).getSourceBlock(); + if (next_block.rows() > 0) + { + auto new_res = hash_join->joinBlock(std::move(next_block)); + std::lock_guard lock(extra_block_mutex); + not_processed_results.emplace_back(std::move(new_res)); + } + } if (!res.is_last) { std::lock_guard lock(extra_block_mutex); @@ -602,6 +613,17 @@ class GraceHashJoin::DelayedBlocks : public IBlocksStream auto res = hash_join->joinBlock(block); auto next = res->next(); + if (next.is_last && next.next_block) + { + next.next_block->filterBySelector(); + auto next_block = std::move(*next.next_block).getSourceBlock(); + if (next_block.rows() > 0) + { + auto new_res = hash_join->joinBlock(std::move(next_block)); + std::lock_guard lock(extra_block_mutex); + not_processed_results.emplace_back(std::move(new_res)); + } + } if (!next.is_last) { std::lock_guard lock(extra_block_mutex); diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 4241c4e129ee..ffec6ef4910d 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -193,7 +193,7 @@ class HashJoinMethods /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. template - static size_t joinRightColumnsWithAddtitionalFilter( + static size_t joinRightColumnsWithAdditionalFilter( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, diff --git a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h index ee4e90f8da04..13e7a07aed93 100644 --- a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h +++ b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h @@ -306,7 +306,7 @@ size_t HashJoinMethods::joinRightColumnsSwitchMu if (added_columns.additional_filter_expression) { const bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; - return joinRightColumnsWithAddtitionalFilter( + return joinRightColumnsWithAdditionalFilter( std::forward>(key_getter_vector), mapv, added_columns, @@ -815,7 +815,7 @@ static ColumnPtr buildAdditionalFilter( template template -size_t HashJoinMethods::joinRightColumnsWithAddtitionalFilter( +size_t HashJoinMethods::joinRightColumnsWithAdditionalFilter( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 273b76768fcb..9ff7b85ae85f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1518,7 +1518,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) create.sql_security = std::make_shared(); if (create.sql_security) - processSQLSecurityOption(getContext(), create.sql_security->as(), create.is_materialized_view, /* skip_check_permissions= */ mode >= LoadingStrictnessLevel::SECONDARY_CREATE); + processSQLSecurityOption(getContext(), create.sql_security->as(), create.is_materialized_view, mode); DDLGuardPtr ddl_guard; @@ -2453,7 +2453,7 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr } } -void InterpreterCreateQuery::processSQLSecurityOption(ContextMutablePtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view, bool skip_check_permissions) +void InterpreterCreateQuery::processSQLSecurityOption(ContextMutablePtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view, LoadingStrictnessLevel mode) { /// If no SQL security is specified, apply default from default_*_view_sql_security setting. if (!sql_security.type) @@ -2494,27 +2494,30 @@ void InterpreterCreateQuery::processSQLSecurityOption(ContextMutablePtr context_ } /// Checks the permissions for the specified definer user. - if (sql_security.definer && !skip_check_permissions) + if (sql_security.definer) { auto definer_name = sql_security.definer->toString(); if (definer_name != current_user_name) context_->checkAccess(AccessType::SET_DEFINER, definer_name); - auto & access_control = context_->getAccessControl(); - const auto user = access_control.read(definer_name); - if (access_control.isEphemeral(access_control.getID(definer_name))) + if (mode <= LoadingStrictnessLevel::CREATE) { - definer_name = user->getName() + ":definer"; - sql_security.definer = std::make_shared(definer_name); - auto new_user = typeid_cast>(user->clone()); - new_user->setName(definer_name); - new_user->authentication_methods.clear(); - new_user->authentication_methods.emplace_back(AuthenticationType::NO_AUTHENTICATION); - access_control.insertOrReplace(new_user); + auto & access_control = context_->getAccessControl(); + const auto user = access_control.read(definer_name); + if (access_control.isEphemeral(access_control.getID(definer_name))) + { + definer_name = user->getName() + ":definer"; + sql_security.definer = std::make_shared(definer_name); + auto new_user = typeid_cast>(user->clone()); + new_user->setName(definer_name); + new_user->authentication_methods.clear(); + new_user->authentication_methods.emplace_back(AuthenticationType::NO_AUTHENTICATION); + access_control.insertOrReplace(new_user); + } } } - if (sql_security.type == SQLSecurityType::NONE && !skip_check_permissions) + if (sql_security.type == SQLSecurityType::NONE) context_->checkAccess(AccessType::ALLOW_SQL_SECURITY_NONE); } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 65e7ac5962a2..ab5f77a9fd36 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -84,8 +84,7 @@ class InterpreterCreateQuery : public IInterpreter, WithMutableContext void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override; /// Check access right, validate definer statement and replace `CURRENT USER` with actual name. - static void processSQLSecurityOption( - ContextMutablePtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, bool skip_check_permissions = false); + static void processSQLSecurityOption(ContextMutablePtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE); private: struct TableProperties diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 32c5ecd0a164..be8912cc183e 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -329,11 +330,11 @@ ProcessList::EntryPtr ProcessList::insert( processes.end(), query); - CancellationChecker::getInstance().appendTask(query, query_context->getSettingsRef()[Setting::max_execution_time].totalMilliseconds(), query_context->getSettingsRef()[Setting::timeout_overflow_mode]); + bool registered_in_cancellation_checker = CancellationChecker::getInstance().appendTask(query, query_context->getSettingsRef()[Setting::max_execution_time].totalMilliseconds(), query_context->getSettingsRef()[Setting::timeout_overflow_mode]); increaseQueryKindAmount(query_kind); - res = std::make_shared(*this, process_it); + res = std::make_shared(*this, process_it, registered_in_cancellation_checker); (*process_it)->setUserProcessList(&user_process_list); (*process_it)->setProcessListEntry(res); @@ -367,6 +368,14 @@ ProcessList::EntryPtr ProcessList::insert( ProcessListEntry::~ProcessListEntry() { + if (registered_in_cancellation_checker) + { + /// We need to block the overcommit tracker here to avoid lock inversion because OvercommitTracker takes a lock on the ProcessList::mutex. + /// When task is added, we lock the ProcessList::mutex, and then the CancellationChecker mutex. + OvercommitTrackerBlockerInThread blocker; + CancellationChecker::getInstance().appendDoneTasks(*it); + } + LockAndOverCommitTrackerBlocker lock(parent.getMutex()); String user = (*it)->getClientInfo().current_user; @@ -401,8 +410,6 @@ ProcessListEntry::~ProcessListEntry() if (auto query_user = parent.queries_to_user.find(query_id); query_user != parent.queries_to_user.end()) parent.queries_to_user.erase(query_user); - CancellationChecker::getInstance().appendDoneTasks(*it); - /// This removes the memory_tracker of one request. parent.processes.erase(it); diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 8b6cd93946c1..645acf0f67ea 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -343,10 +343,11 @@ class ProcessListEntry ProcessList & parent; Container::iterator it; + bool registered_in_cancellation_checker = false; public: - ProcessListEntry(ProcessList & parent_, Container::iterator it_) - : parent(parent_), it(it_) {} + ProcessListEntry(ProcessList & parent_, Container::iterator it_, bool registered_in_cancellation_checker_) + : parent(parent_), it(it_), registered_in_cancellation_checker(registered_in_cancellation_checker_) {} ~ProcessListEntry(); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 05768ab19284..fe68def3d353 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1362,7 +1362,14 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join = std::make_shared(); if (remove_duplicates) + { + Aliases aliases; + NameSet name_set; + + normalize(query, aliases, name_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext(), select_options.is_create_parameterized_view); renameDuplicatedColumns(select_query); + } + /// Perform it before analyzing JOINs, because it may change number of columns with names unique and break some logic inside JOINs if (settings[Setting::optimize_normalize_count_variants]) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 38d0b4c19efd..243fe3fcba0b 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1706,6 +1706,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!sql_security) sql_security_p.parse(pos, sql_security, expected); + /// Accept COMMENT before AS SELECT for forward compatibility with newer versions + /// that may format views as: CREATE VIEW ... COMMENT 'text' AS SELECT ... + auto comment = parseComment(pos, expected); + /// AS SELECT ... if (!s_as.ignore(pos, expected)) return false; @@ -1713,7 +1717,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!select_p.parse(pos, select, expected)) return false; - auto comment = parseComment(pos, expected); + if (!comment) + comment = parseComment(pos, expected); auto query = std::make_shared(); node = query; diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index bc1f16d8d18a..2bdb0651f230 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -368,6 +369,47 @@ std::optional analyzeWindow( } } + /// When `group_by_use_nulls = 1` with CUBE/ROLLUP/GROUPING SETS, GROUP BY keys become Nullable + /// in the data flowing into window functions. But the aggregate function was created during analysis + /// with the original (non-nullable) argument types. We need to re-create the aggregate function + /// with the actual (nullable) argument types so that the Null combinator is properly applied. + for (auto & window_description : window_descriptions) + { + for (auto & window_function : window_description.window_functions) + { + bool types_changed = false; + DataTypes actual_argument_types; + actual_argument_types.reserve(window_function.argument_names.size()); + + for (size_t i = 0; i < window_function.argument_names.size(); ++i) + { + const auto * dag_node = before_window_actions->dag.tryFindInOutputs(window_function.argument_names[i]); + if (dag_node && !window_function.argument_types[i]->equals(*dag_node->result_type)) + { + actual_argument_types.push_back(dag_node->result_type); + types_changed = true; + } + else + { + actual_argument_types.push_back(window_function.argument_types[i]); + } + } + + if (types_changed) + { + AggregateFunctionProperties properties; + auto new_function = AggregateFunctionFactory::instance().get( + window_function.aggregate_function->getName(), + NullsAction::EMPTY, + actual_argument_types, + window_function.function_parameters, + properties); + window_function.aggregate_function = std::move(new_function); + window_function.argument_types = std::move(actual_argument_types); + } + } + } + ColumnsWithTypeAndName window_functions_additional_columns; for (auto & window_description : window_descriptions) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 938f25b89fa8..c0e92c2989aa 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -344,7 +344,7 @@ const ActionsDAG::Node * addMonotonicChain(ActionsDAG & dag, const ActionsDAG::N args.push_back(&dag.addColumn({child->column, child->result_type, child->result_name})); } - return &dag.addFunction(node->function_base, std::move(args), {}); + return &dag.addFunction(node->function_base, std::move(args), node->result_name); } struct SortingInputOrder diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3f23bf393f07..309fcf1beeed 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -2002,7 +2002,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( find_exact_ranges, query_info_.isFinal()); - MergeTreeDataSelectExecutor::filterPartsByQueryConditionCache(result.parts_with_ranges, query_info_, vector_search_parameters, context_, log); + MergeTreeDataSelectExecutor::filterPartsByQueryConditionCache(result.parts_with_ranges, query_info_, vector_search_parameters, mutations_snapshot, context_, log); if (indexes->use_skip_indexes && !indexes->skip_indexes.useful_indices.empty() && query_info_.isFinal() && settings[Setting::use_skip_indexes_if_final_exact_mode]) diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index 180b0c11a3cc..412cd6c425d6 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -9,15 +9,11 @@ IntersectOrExceptTransform::IntersectOrExceptTransform(SharedHeader header_, Ope : IProcessor(InputPorts(2, header_), {header_}) , current_operator(operator_) { - const Names & columns = header_->getNames(); - size_t num_columns = columns.empty() ? header_->columns() : columns.size(); + size_t num_columns = header_->columns(); - key_columns_pos.reserve(columns.size()); + key_columns_pos.reserve(num_columns); for (size_t i = 0; i < num_columns; ++i) - { - auto pos = columns.empty() ? i : header_->getPositionByName(columns[i]); - key_columns_pos.emplace_back(pos); - } + key_columns_pos.emplace_back(i); } diff --git a/src/Storages/Kafka/StorageKafka2.cpp b/src/Storages/Kafka/StorageKafka2.cpp index c55da9c24790..6a1a1cde5e84 100644 --- a/src/Storages/Kafka/StorageKafka2.cpp +++ b/src/Storages/Kafka/StorageKafka2.cpp @@ -206,6 +206,11 @@ void StorageKafka2::partialShutdown() task->holder->deactivate(); } is_active = false; + /// Reset the active node holder while the old ZooKeeper session is still alive (even if expired). + /// EphemeralNodeHolder stores a raw ZooKeeper reference, so resetting it here prevents a + /// use-after-free: setZooKeeper() called afterwards may free the old session, and the holder's + /// destructor would then access a dangling reference when checking zookeeper.expired(). + replica_is_active_node = nullptr; } bool StorageKafka2::activate() diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 63d2d0177fb6..cf81e3f86f92 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -207,6 +207,9 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s void IMergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names & column_names) { + if (block.rows() == 0) + return; + if (!initialized) hyperrectangle.reserve(column_names.size()); diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index b767f5b623c8..5287e609cd91 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,11 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; } +namespace FailPoints +{ + extern const char patch_parts_reverse_column_order[]; +} + namespace { @@ -328,6 +334,17 @@ void addPatchPartsColumns( required_virtuals.insert(patch_system_columns.begin(), patch_system_columns.end()); Names patch_columns_to_read_names(patch_columns_to_read_set.begin(), patch_columns_to_read_set.end()); + + fiu_do_on(FailPoints::patch_parts_reverse_column_order, + { + /// Simulate non-deterministic NameSet iteration producing different column + /// orderings for different patches. This reproduces the bug fixed in + /// getUpdatedHeader (applyPatches.cpp) where sortColumns() normalizes order + /// before the positional assertCompatibleHeader comparison. + if (i % 2 == 1) + std::reverse(patch_columns_to_read_names.begin(), patch_columns_to_read_names.end()); + }); + result.patch_columns[i] = storage_snapshot->getColumnsByNames(options, patch_columns_to_read_names); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 63c09ad78d56..26d7fe0cdfd8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -10067,6 +10067,9 @@ bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & const auto & snapshot_data = assert_cast(*storage_snapshot->data); const auto & mutations_snapshot = snapshot_data.mutations_snapshot; + if (!mutations_snapshot) + return !settings[Setting::apply_mutations_on_fly] && !settings[Setting::apply_patch_parts]; + return !mutations_snapshot->hasDataMutations() && !mutations_snapshot->hasPatchParts(); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9f6f19239945..c6e8b7bac16e 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1060,6 +1060,7 @@ void MergeTreeDataSelectExecutor::filterPartsByQueryConditionCache( RangesInDataParts & parts_with_ranges, const SelectQueryInfo & select_query_info, const std::optional & vector_search_parameters, + const MergeTreeData::MutationsSnapshotPtr & mutations_snapshot, const ContextPtr & context, LoggerPtr log) { @@ -1067,7 +1068,9 @@ void MergeTreeDataSelectExecutor::filterPartsByQueryConditionCache( if (!settings[Setting::use_query_condition_cache] || !settings[Setting::allow_experimental_analyzer] || (!select_query_info.prewhere_info && !select_query_info.filter_actions_dag) - || (vector_search_parameters.has_value())) /// vector search has filter in the ORDER BY + || (vector_search_parameters.has_value()) /// vector search has filter in the ORDER BY + || select_query_info.isFinal() + || (mutations_snapshot->hasDataMutations() || mutations_snapshot->hasPatchParts())) return; QueryConditionCachePtr query_condition_cache = context->getQueryConditionCache(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 7be2415ed133..7d2b5ab231e3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -215,6 +215,7 @@ class MergeTreeDataSelectExecutor RangesInDataParts & parts_with_ranges, const SelectQueryInfo & select_query_info, const std::optional & vector_search_parameters, + const MergeTreeData::MutationsSnapshotPtr & mutations_snapshot, const ContextPtr & context, LoggerPtr log); diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index af1ba6953f81..59d6dd6e1192 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -493,7 +493,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( { if (function_name == "has" || function_name == "mapContainsKey" || function_name == "mapContains") { - out.key_column = *key_index; + out.key_column = *map_key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); auto & value = const_value.safeGet(); @@ -502,7 +502,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( } if (function_name == "mapContainsKeyLike") { - out.key_column = *key_index; + out.key_column = *map_key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); auto & value = const_value.safeGet(); diff --git a/src/Storages/MergeTree/PatchParts/PatchPartsUtils.cpp b/src/Storages/MergeTree/PatchParts/PatchPartsUtils.cpp index 15bda514a3d0..e00bd7be6e51 100644 --- a/src/Storages/MergeTree/PatchParts/PatchPartsUtils.cpp +++ b/src/Storages/MergeTree/PatchParts/PatchPartsUtils.cpp @@ -68,6 +68,14 @@ StorageMetadataPtr getPatchPartMetadata(ColumnsDescription patch_part_desc, Cont { StorageInMemoryMetadata part_metadata; + /// Ensure patch part system columns are present. + /// They may be missing when creating empty coverage parts + /// (e.g. DROP PART for a patch part), because createEmptyPart + /// only includes data columns from table metadata. + for (const auto & col : getPatchPartSystemColumns()) + if (!patch_part_desc.has(col.name)) + patch_part_desc.add(ColumnDescription(col.name, col.type)); + /// Use hash of column names to put patch parts with different structure to different partitions. auto part_identifier = std::make_shared("_part"); auto columns_hash = getColumnsHash(patch_part_desc.getNamesOfPhysical()); diff --git a/src/Storages/MergeTree/PatchParts/applyPatches.cpp b/src/Storages/MergeTree/PatchParts/applyPatches.cpp index 6b5149b92844..0cf987543c02 100644 --- a/src/Storages/MergeTree/PatchParts/applyPatches.cpp +++ b/src/Storages/MergeTree/PatchParts/applyPatches.cpp @@ -230,9 +230,13 @@ IColumn::Patch CombinedPatchBuilder::createPatchForColumn(const String & column_ for (const auto & patch_block : all_patch_blocks) { + const auto & patch_column = patch_block.getByName(column_name).column; + if (!patch_column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column {} has null data in patch block", column_name); + IColumn::Patch::Source source = { - .column = *patch_block.getByName(column_name).column, + .column = *patch_column, .versions = getColumnUInt64Data(patch_block, PartDataVersionColumn::name), }; @@ -266,12 +270,18 @@ Block getUpdatedHeader(const PatchesToApply & patches, const NameSet & updated_c for (const auto & column : patch->patch_blocks[0]) { - /// Ignore columns that are not updated. - if (!updated_columns.contains(column.name)) + /// Ignore columns that are not updated or have no data. + if (!updated_columns.contains(column.name) || !column.column) header.erase(column.name); } - headers.push_back(std::move(header)); + /// Sort columns by name so that assertCompatibleHeader below compares + /// matching columns at the same positions. Patch blocks may arrive with + /// different column orderings because addPatchPartsColumns collects names + /// from a NameSet (unordered_set) whose iteration order is non-deterministic. + /// Downstream consumers use name-based lookups, so order does not matter + /// for correctness — only for this positional compatibility check. + headers.push_back(header.sortColumns()); } for (size_t i = 1; i < headers.size(); ++i) @@ -293,7 +303,7 @@ bool canApplyPatchesRaw(const PatchesToApply & patches) { for (const auto & column : patch->patch_blocks.front()) { - if (!isPatchPartSystemColumn(column.name) && !canApplyPatchInplace(*column.column)) + if (!isPatchPartSystemColumn(column.name) && column.column && !canApplyPatchInplace(*column.column)) return false; } } @@ -325,9 +335,16 @@ void applyPatchesToBlockRaw( chassert(patch_to_apply->patch_blocks.size() == 1); const auto & patch_block = patch_to_apply->patch_blocks.front(); + if (!patch_block.has(result_column.name)) + continue; + + const auto & patch_column = patch_block.getByName(result_column.name).column; + if (!patch_column) + continue; + IColumn::Patch::Source source = { - .column = *patch_block.getByName(result_column.name).column, + .column = *patch_column, .versions = getColumnUInt64Data(patch_block, PartDataVersionColumn::name), }; diff --git a/src/Storages/MergeTree/StorageFromMergeTreeProjection.cpp b/src/Storages/MergeTree/StorageFromMergeTreeProjection.cpp index 9e8240b61955..74ddfa19ef9a 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeProjection.cpp +++ b/src/Storages/MergeTree/StorageFromMergeTreeProjection.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -30,6 +32,8 @@ void StorageFromMergeTreeProjection::read( size_t max_block_size, size_t num_streams) { + context->checkAccess(AccessType::SELECT, parent_storage->getStorageID()); + const auto & snapshot_data = assert_cast(*storage_snapshot->data); const auto & parts = snapshot_data.parts; diff --git a/src/Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h b/src/Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h index eee1b4dbaae1..2735064ab4f9 100644 --- a/src/Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h +++ b/src/Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h @@ -176,12 +176,22 @@ class DataLakeConfiguration : public BaseStorageConfiguration, public std::enabl return std::nullopt; } + bool supportsTotalRows() const override + { + return DataLakeMetadata::supportsTotalRows(); + } + std::optional totalRows(ContextPtr local_context) override { assertInitializedDL(); return current_metadata->totalRows(local_context); } + bool supportsTotalBytes() const override + { + return DataLakeMetadata::supportsTotalBytes(); + } + std::optional totalBytes(ContextPtr local_context) override { assertInitializedDL(); diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index 28d7faf1a765..c29681a51742 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -139,7 +139,9 @@ class IDataLakeMetadata : boost::noncopyable virtual void modifyFormatSettings(FormatSettings &, const Context &) const {} + static constexpr bool supportsTotalRows() { return false; } virtual std::optional totalRows(ContextPtr) const { return {}; } + static constexpr bool supportsTotalBytes() { return false; } virtual std::optional totalBytes(ContextPtr) const { return {}; } /// Some data lakes specify information for reading files from disks. diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h index ad1b60e86820..67900904de5f 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h @@ -93,7 +93,9 @@ class IcebergMetadata : public IDataLakeMetadata IcebergHistory getHistory(ContextPtr local_context) const; + static constexpr bool supportsTotalRows() { return true; } std::optional totalRows(ContextPtr Local_context) const override; + static constexpr bool supportsTotalBytes() { return true; } std::optional totalBytes(ContextPtr Local_context) const override; ColumnMapperPtr getColumnMapperForObject(ObjectInfoPtr object_info) const override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 1fe70fc5d96e..889186adb022 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -329,6 +329,9 @@ bool StorageObjectStorage::updateExternalDynamicMetadataIfExists(ContextPtr quer std::optional StorageObjectStorage::totalRows(ContextPtr query_context) const { + if (!configuration->supportsTotalRows()) + return std::nullopt; + configuration->update( object_storage, query_context, @@ -340,6 +343,9 @@ std::optional StorageObjectStorage::totalRows(ContextPtr query_context) std::optional StorageObjectStorage::totalBytes(ContextPtr query_context) const { + if (!configuration->supportsTotalBytes()) + return std::nullopt; + configuration->update( object_storage, query_context, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index e13e76bb90f6..38d76f851245 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -141,7 +141,9 @@ class StorageObjectStorageConfiguration virtual bool isDataLakeConfiguration() const { return false; } + virtual bool supportsTotalRows() const { return false; } virtual std::optional totalRows(ContextPtr) { return {}; } + virtual bool supportsTotalBytes() const { return false; } virtual std::optional totalBytes(ContextPtr) { return {}; } virtual bool hasExternalDynamicMetadata() { return false; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 1fc4860831e1..ebab6fe533b4 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -437,6 +437,23 @@ Chunk StorageObjectStorageSource::generate() } #endif + /// Convert any Const columns to full columns before returning. + /// This is necessary because when chunks with different Const values (e.g., partition columns + /// from different files in DeltaLake) are squashed together during INSERT, the squashing code + /// doesn't properly handle merging Const columns with different constant values. + /// By converting to full columns here, we ensure the values are preserved correctly. + if (chunk.hasColumns()) + { + size_t chunk_num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + { + if (column->isConst()) + column = column->cloneResized(chunk_num_rows)->convertToFullColumnIfConst(); + } + chunk.setColumns(std::move(columns), chunk_num_rows); + } + return chunk; } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 21a5e11f52df..7e7e43db21c7 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -553,7 +554,6 @@ void StorageBuffer::read( static void appendBlock(LoggerPtr log, const Block & from, Block & to) { size_t rows = from.rows(); - size_t old_rows = to.rows(); size_t old_bytes = to.bytes(); if (to.empty()) @@ -564,7 +564,15 @@ static void appendBlock(LoggerPtr log, const Block & from, Block & to) from.checkNumberOfRows(); to.checkNumberOfRows(); + /// Take checkpoints of all destination columns before any modifications + /// to be able to rollback in case of an exception in the middle of insertion. + ColumnCheckpoints checkpoints; + checkpoints.reserve(to.columns()); + for (size_t column_no = 0; column_no < to.columns(); ++column_no) + checkpoints.push_back(to.getByPosition(column_no).column->getCheckpoint()); + MutableColumnPtr last_col; + size_t mutated_columns = 0; try { MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; @@ -590,6 +598,7 @@ static void appendBlock(LoggerPtr log, const Block & from, Block & to) LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global); last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column)); } + ++mutated_columns; /// In case of ColumnAggregateFunction aggregate states will /// be allocated from the query context but can be destroyed from the @@ -622,10 +631,11 @@ static void appendBlock(LoggerPtr log, const Block & from, Block & to) try { - for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) + for (size_t column_no = 0; column_no < mutated_columns; ++column_no) { ColumnPtr & col_to = to.getByPosition(column_no).column; - /// If there is no column, then the exception was thrown in the middle of append, in the insertRangeFrom() + /// If there is no column, the exception was thrown in the middle of append, + /// during insertRangeFrom() — move last_col back so we can roll it back. if (!col_to) { col_to = std::move(last_col); @@ -635,8 +645,11 @@ static void appendBlock(LoggerPtr log, const Block & from, Block & to) /// But if there is still nothing, abort if (!col_to) throw Exception(ErrorCodes::LOGICAL_ERROR, "No column to rollback"); - if (col_to->size() != old_rows) - col_to = col_to->cut(0, old_rows); + + /// Rollback to the state before the exception. + auto mutable_col = IColumn::mutate(std::move(col_to)); + mutable_col->rollback(*checkpoints[column_no]); + col_to = std::move(mutable_col); } } catch (...) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f77244086486..0c10675356cf 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -1085,10 +1085,11 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0); auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef()[Setting::max_compress_block_size]; + auto self = std::static_pointer_cast(shared_from_this()); auto with_retries = std::make_shared ( getLogger(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())), - [&] { return getClient(); }, + [self] { return self->getClient(); }, BackupKeeperSettings(backup_entries_collector.getContext()), backup_entries_collector.getContext()->getProcessListElement() ); diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 64775988ac05..2385a67ffa30 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB { @@ -37,6 +39,9 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co if (!insert_queue) return; + const auto current_user_id = context->getUserID(); + const bool show_all = context->getAccess()->isGranted(AccessType::SHOW_USERS); + for (size_t shard_num = 0; shard_num < insert_queue->getPoolSize(); ++shard_num) { auto [queue, queue_lock] = insert_queue->getQueueLocked(shard_num); @@ -45,6 +50,9 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co { const auto & [key, data] = elem; + if (!show_all && key.user_id != current_user_id) + continue; + auto time_in_microseconds = [](const time_point & timestamp) { auto time_diff = duration_cast(steady_clock::now() - timestamp); diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index f486973be6d1..5706a1b7ec76 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -47,12 +48,20 @@ void fillJemallocBins(MutableColumns & res_columns) auto ndalloc = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.ndalloc", MALLCTL_ARENAS_ALL, bin).c_str()); auto nmalloc = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.nmalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + auto nregs = getJeMallocValue(fmt::format("arenas.bin.{}.nregs", bin).c_str()); + auto curslabs = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.curslabs", MALLCTL_ARENAS_ALL, bin).c_str()); + auto curregs = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.curregs", MALLCTL_ARENAS_ALL, bin).c_str()); + size_t col_num = 0; res_columns.at(col_num++)->insert(bin_index); res_columns.at(col_num++)->insert(0); res_columns.at(col_num++)->insert(size); res_columns.at(col_num++)->insert(nmalloc); res_columns.at(col_num++)->insert(ndalloc); + + res_columns.at(col_num++)->insert(nregs); + res_columns.at(col_num++)->insert(curslabs); + res_columns.at(col_num++)->insert(curregs); } /// Bins for large allocations @@ -69,6 +78,10 @@ void fillJemallocBins(MutableColumns & res_columns) res_columns.at(col_num++)->insert(size); res_columns.at(col_num++)->insert(nmalloc); res_columns.at(col_num++)->insert(ndalloc); + + res_columns.at(col_num++)->insertDefault(); + res_columns.at(col_num++)->insertDefault(); + res_columns.at(col_num++)->insertDefault(); } } @@ -93,14 +106,24 @@ StorageSystemJemallocBins::StorageSystemJemallocBins(const StorageID & table_id_ ColumnsDescription StorageSystemJemallocBins::getColumnsDescription() { - return ColumnsDescription + auto description = ColumnsDescription { { "index", std::make_shared(), "Index of the bin ordered by size."}, { "large", std::make_shared(), "True for large allocations and False for small."}, { "size", std::make_shared(), "Size of allocations in this bin."}, { "allocations", std::make_shared(), "Number of allocations."}, { "deallocations", std::make_shared(), "Number of deallocations."}, + { "nregs", std::make_shared(), "Number of regions per slab."}, + { "curslabs", std::make_shared(), "Current number of slabs."}, + { "curregs", std::make_shared(), "Current number of regions for this size class."}, }; + + description.setAliases({ + {"availregs", std::make_shared(), "nregs * curslabs"}, + {"util", std::make_shared(), "curregs / availregs"}, + }); + + return description; } Pipe StorageSystemJemallocBins::read( diff --git a/src/Storages/System/StorageSystemJemalloc.h b/src/Storages/System/StorageSystemJemalloc.h index 0cd29d991310..d457998b5c6f 100644 --- a/src/Storages/System/StorageSystemJemalloc.h +++ b/src/Storages/System/StorageSystemJemalloc.h @@ -6,8 +6,6 @@ namespace DB { -class Context; - class StorageSystemJemallocBins final : public IStorage { public: diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 0d6a6ab176a2..728167de8807 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -107,6 +107,9 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName()); size_t total_bytes_to_read = 0; + if (context->getApplicationType() != Context::ApplicationType::LOCAL) + context->checkAccess(AccessType::READ, toStringSource(AccessTypeObjects::Source::FILE)); + Strings paths; std::optional archive_info; if (path_to_archive.empty()) diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 94b754ca77a3..552850dc9a69 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace DB @@ -315,6 +316,22 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con cached_columns = getActualTableStructure(context, is_insert_query); assert(cluster); + + bool has_local_shard = false; + for (const auto & shard_info : cluster->getShardsInfo()) + { + if (shard_info.isLocal()) + { + has_local_shard = true; + break; + } + } + + if (has_local_shard && !is_insert_query) + context->checkAccess(AccessType::SELECT, remote_table_id); + else if (has_local_shard) + context->checkAccess(AccessType::INSERT, remote_table_id); + StoragePtr res = std::make_shared( StorageID(getDatabaseName(), table_name), cached_columns, diff --git a/tests/integration/test_distributed_ddl_on_database_cluster/__init__.py b/tests/integration/test_distributed_ddl_on_database_cluster/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_distributed_ddl_on_database_cluster/configs/config.d/remote_servers.xml b/tests/integration/test_distributed_ddl_on_database_cluster/configs/config.d/remote_servers.xml new file mode 100644 index 000000000000..d6a932c56c3c --- /dev/null +++ b/tests/integration/test_distributed_ddl_on_database_cluster/configs/config.d/remote_servers.xml @@ -0,0 +1,4 @@ + + + + diff --git a/tests/integration/test_distributed_ddl_on_database_cluster/configs/config.d/settings.xml b/tests/integration/test_distributed_ddl_on_database_cluster/configs/config.d/settings.xml new file mode 100644 index 000000000000..02708c22026a --- /dev/null +++ b/tests/integration/test_distributed_ddl_on_database_cluster/configs/config.d/settings.xml @@ -0,0 +1,8 @@ + + + /clickhouse/task_queue/ddl + 10 + 3600 + 5 + + \ No newline at end of file diff --git a/tests/integration/test_distributed_ddl_on_database_cluster/configs/users.d/query_log.xml b/tests/integration/test_distributed_ddl_on_database_cluster/configs/users.d/query_log.xml new file mode 100644 index 000000000000..ef8abbd91741 --- /dev/null +++ b/tests/integration/test_distributed_ddl_on_database_cluster/configs/users.d/query_log.xml @@ -0,0 +1,9 @@ + + + + + 1 + 1 + + + diff --git a/tests/integration/test_distributed_ddl_on_database_cluster/test.py b/tests/integration/test_distributed_ddl_on_database_cluster/test.py new file mode 100755 index 000000000000..c54170a56e23 --- /dev/null +++ b/tests/integration/test_distributed_ddl_on_database_cluster/test.py @@ -0,0 +1,76 @@ +import os +import sys +import time +import uuid +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", + main_configs=["configs/config.d/settings.xml"], + user_configs=["configs/users.d/query_log.xml"], + with_zookeeper=True, + macros={"shard": 1, "replica": 1}, +) +node2 = cluster.add_instance( + "node2", + main_configs=["configs/config.d/settings.xml"], + user_configs=["configs/users.d/query_log.xml"], + with_zookeeper=True, + macros={"shard": 1, "replica": 2}, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_waiting_replicated_database_hosts(started_cluster): + node1.query("DROP DATABASE IF EXISTS db SYNC") + node2.query("DROP DATABASE IF EXISTS db SYNC") + + node1.query("DROP TABLE IF EXISTS t SYNC") + node2.query("DROP TABLE IF EXISTS t SYNC") + + node1.query( + "CREATE DATABASE db ENGINE=Replicated('/test/db', '{shard}', '{replica}')" + ) + node2.query( + "CREATE DATABASE db ENGINE=Replicated('/test/db', '{shard}', '{replica}')" + ) + + query_id = str(uuid.uuid4()) + node1.query( + "CREATE TABLE t ON CLUSTER 'db' (x INT, y INT) ENGINE=MergeTree ORDER BY x", + settings={"distributed_ddl_output_mode": "throw_only_active"}, + query_id=query_id, + ) + assert ( + node2.query("SELECT count() FROM system.tables WHERE name='t'").strip() == "1" + ) + node1.query("SYSTEM FLUSH LOGS") + assert ( + node1.query( + f"SELECT count() FROM system.text_log WHERE query_id='{query_id}' AND level='Warning' AND message LIKE '%Did not find active hosts%'" + ).strip() + == "0" + ) + + node1.query("DROP DATABASE IF EXISTS db SYNC") + node2.query("DROP DATABASE IF EXISTS db SYNC") + + node1.query("DROP TABLE IF EXISTS t SYNC") + node2.query("DROP TABLE IF EXISTS t SYNC") diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 47a82eedc8e4..dfdcd876b5c2 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -3577,3 +3577,129 @@ def test_write_column_order(started_cluster): ) assert num_rows * 2 == int(instance.query(f"SELECT count() FROM {table_name}")) + + +def test_network_activity_with_system_tables(started_cluster): + instance = started_cluster.instances["node1"] + bucket = started_cluster.minio_bucket + table_name = randomize_table_name("test_network_activity_with_system_tables") + result_file = f"{table_name}_data" + + schema = pa.schema([("id", pa.int32(), False), ("name", pa.string(), False)]) + empty_arrays = [pa.array([], type=pa.int32()), pa.array([], type=pa.string())] + write_deltalake( + f"s3://root/{result_file}", + pa.Table.from_arrays(empty_arrays, schema=schema), + storage_options=get_storage_options(started_cluster), + mode="overwrite", + ) + + instance.query( + f""" + CREATE TABLE {table_name} (id Int32, name String) ENGINE = DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', '{minio_secret_key}') + """ + ) + + instance.query( + f"INSERT INTO {table_name} SELECT number as name, toString(number) as id from numbers(10)" + ) + + query_id = f"{table_name}_query" + instance.query( + f"SELECT * FROM system.tables WHERE name = '{table_name}'", query_id=query_id + ) + + instance.query("SYSTEM FLUSH LOGS text_log") + + assert 0 == int( + instance.query( + f"SELECT count() FROM system.text_log WHERE query_id = '{query_id}' AND message LIKE '%Initialized scan state%'" + ) + ) + + +@pytest.mark.parametrize("cluster", [False, True]) +def test_partition_columns_3(started_cluster, cluster): + """Test for bug https://github.com/ClickHouse/ClickHouse/issues/95526 + + Reproduces issue where partition column values become incorrect when inserting + from DeltaLake into ClickHouse with many columns and type conversions. + """ + node = started_cluster.instances["node1"] + table_name = randomize_table_name("test_partition_columns_jumbled") + + schema = pa.schema( + [ + ("id", pa.int32()), + ("region", pa.string()), + ("state", pa.string()), + ] + ) + + data = [ + pa.array([1, 2], type=pa.int32()), + pa.array(["west", "east"], type=pa.string()), + pa.array(["CA", "NY"], type=pa.string()), + ] + + storage_options = { + "AWS_ENDPOINT_URL": f"http://{started_cluster.minio_ip}:{started_cluster.minio_port}", + "AWS_ACCESS_KEY_ID": minio_access_key, + "AWS_SECRET_ACCESS_KEY": minio_secret_key, + "AWS_ALLOW_HTTP": "true", + "AWS_S3_ALLOW_UNSAFE_RENAME": "true", + } + path = f"s3://root/{table_name}" + table = pa.Table.from_arrays(data, schema=schema) + + write_deltalake( + path, table, storage_options=storage_options, partition_by=["region", "state"] + ) + + if cluster: + delta_function = f""" + deltaLakeCluster( + cluster, + 'http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , + '{minio_access_key}', + '{minio_secret_key}') + """ + else: + delta_function = f""" + deltaLake( + 'http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , + '{minio_access_key}', + '{minio_secret_key}', + SETTINGS allow_experimental_delta_kernel_rs=0) + """ + + dst_table = f"{table_name}_dst" + node.query(f""" + CREATE TABLE {dst_table} ( + id Int32, + region String, + state String + ) ENGINE = MergeTree() + ORDER BY id + """) + + node.query(f""" + INSERT INTO {dst_table} + SELECT * FROM {delta_function} + """) + + result_from_delta = node.query( + f"SELECT * FROM {delta_function} ORDER BY id", + settings={"allow_experimental_delta_kernel_rs": 1, "use_hive_partitioning": 0}, + ).strip() + + result_from_table = node.query( + f"SELECT * FROM {dst_table} ORDER BY id" + ).strip() + + assert result_from_delta == result_from_table, \ + f"Partition columns jumbled!\nFrom DeltaLake:\n{result_from_delta}\n\nFrom table:\n{result_from_table}" + + expected = "1\twest\tCA\n2\teast\tNY" + assert result_from_table == expected, \ + f"Data doesn't match!\nExpected:\n{expected}\n\nGot:\n{result_from_table}" diff --git a/tests/queries/0_stateless/02253_empty_part_checksums.reference b/tests/queries/0_stateless/02253_empty_part_checksums.reference index 65a8c9ee65e7..d5a418ff4619 100644 --- a/tests/queries/0_stateless/02253_empty_part_checksums.reference +++ b/tests/queries/0_stateless/02253_empty_part_checksums.reference @@ -5,4 +5,4 @@ 0 1 0 -0_0_0_0 Wide 370db59d5dcaef5d762b11d319c368c7 514a8be2dac94fd039dbd230065e58a4 b324ada5cd6bb14402c1e59200bd003a +0_0_0_0 Wide 85adbaf60cad8c08f040d4cb27830cf4 e73297470a3016870e8f281b48b2dd68 b324ada5cd6bb14402c1e59200bd003a diff --git a/tests/queries/0_stateless/02374_analyzer_join_using.reference b/tests/queries/0_stateless/02374_analyzer_join_using.reference index e83f8f37aba6..63e648c14d68 100644 --- a/tests/queries/0_stateless/02374_analyzer_join_using.reference +++ b/tests/queries/0_stateless/02374_analyzer_join_using.reference @@ -102,12 +102,12 @@ SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeN FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError UNKNOWN_IDENTIFIER } SELECT 'First JOIN INNER second JOIN INNER'; First JOIN INNER second JOIN INNER -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -123,12 +123,12 @@ SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN INNER second JOIN LEFT'; First JOIN INNER second JOIN LEFT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -144,13 +144,13 @@ SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN INNER second JOIN RIGHT'; First JOIN INNER second JOIN RIGHT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +4 UInt64 1 UInt64 String 1 UInt32 String 5 UInt64 Join_3_Value_4 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -168,13 +168,13 @@ SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN INNER second JOIN FULL'; First JOIN INNER second JOIN FULL -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 1 UInt64 String 1 UInt32 String 5 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -192,12 +192,12 @@ SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN LEFT second JOIN INNER'; First JOIN LEFT second JOIN INNER -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -213,13 +213,13 @@ SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN LEFT second JOIN LEFT'; First JOIN LEFT second JOIN LEFT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +2 UInt64 3 UInt64 Join_1_Value_2 String 1 UInt32 String 1 UInt64 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -237,13 +237,13 @@ SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN LEFT second JOIN RIGHT'; First JOIN LEFT second JOIN RIGHT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +4 UInt64 1 UInt64 String 1 UInt32 String 5 UInt64 Join_3_Value_4 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -261,14 +261,14 @@ SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN LEFT second JOIN FULL'; First JOIN LEFT second JOIN FULL -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 1 UInt64 String 1 UInt32 String 5 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +2 UInt64 3 UInt64 Join_1_Value_2 String 1 UInt32 String 1 UInt64 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -288,12 +288,12 @@ SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN RIGHT second JOIN INNER'; First JOIN RIGHT second JOIN INNER -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 1 UInt32 Join_1_Value_0 String 1 UInt64 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt32 Join_1_Value_1 String 2 UInt64 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -309,13 +309,13 @@ SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN RIGHT second JOIN LEFT'; First JOIN RIGHT second JOIN LEFT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -3 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 String +0 UInt64 1 UInt32 Join_1_Value_0 String 1 UInt64 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt32 Join_1_Value_1 String 2 UInt64 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +3 UInt64 1 UInt32 String 4 UInt64 Join_2_Value_3 String 1 UInt64 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -333,13 +333,13 @@ SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN RIGHT second JOIN RIGHT'; First JOIN RIGHT second JOIN RIGHT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt32 Join_1_Value_0 String 1 UInt64 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt32 Join_1_Value_1 String 2 UInt64 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +4 UInt64 1 UInt32 String 1 UInt64 String 5 UInt64 Join_3_Value_4 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -357,14 +357,14 @@ SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN RIGHT second JOIN FULL'; First JOIN RIGHT second JOIN FULL -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -3 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 String +0 UInt64 1 UInt32 String 1 UInt64 String 5 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt32 Join_1_Value_0 String 1 UInt64 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt32 Join_1_Value_1 String 2 UInt64 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +3 UInt64 1 UInt32 String 4 UInt64 Join_2_Value_3 String 1 UInt64 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -384,13 +384,13 @@ SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING ( SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN FULL second JOIN INNER'; First JOIN FULL second JOIN INNER -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 1 UInt64 String 4 UInt32 Join_2_Value_3 String 1 UInt64 Join_3_Value_0 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -406,14 +406,14 @@ SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN FULL second JOIN LEFT'; First JOIN FULL second JOIN LEFT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 1 UInt64 String 4 UInt32 Join_2_Value_3 String 1 UInt64 Join_3_Value_0 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +2 UInt64 3 UInt64 Join_1_Value_2 String 1 UInt32 String 1 UInt64 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -433,14 +433,14 @@ SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN FULL second JOIN RIGHT'; First JOIN FULL second JOIN RIGHT -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt64 String 4 UInt32 Join_2_Value_3 String 1 UInt64 Join_3_Value_0 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +4 UInt64 1 UInt64 String 1 UInt32 String 5 UInt64 Join_3_Value_4 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) @@ -458,15 +458,15 @@ SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (i SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'First JOIN FULL second JOIN FULL'; First JOIN FULL second JOIN FULL -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; -0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String -0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String -0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String -1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String -2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 1 UInt64 String 1 UInt32 String 5 UInt64 Join_3_Value_4 String +0 UInt64 1 UInt64 String 4 UInt32 Join_2_Value_3 String 1 UInt64 Join_3_Value_0 String +0 UInt64 1 UInt64 Join_1_Value_0 String 1 UInt32 Join_2_Value_0 String 1 UInt64 Join_3_Value_0 String +1 UInt64 2 UInt64 Join_1_Value_1 String 2 UInt32 Join_2_Value_1 String 2 UInt64 Join_3_Value_1 String +2 UInt64 3 UInt64 Join_1_Value_2 String 1 UInt32 String 1 UInt64 String SELECT '--'; -- SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) diff --git a/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 b/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 index 20e452d3e0d2..f1be214407b6 100644 --- a/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 +++ b/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 @@ -64,8 +64,8 @@ FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING SELECT 'First JOIN {{ first_join_type }} second JOIN {{ second_join_type }}'; -SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), -t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +SELECT id AS using_id, toTypeName(using_id), t1.id + 1 AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id + 1 AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id + 1 AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id) ORDER BY ALL; diff --git a/tests/queries/0_stateless/03100_lwu_45_query_condition_cache.reference b/tests/queries/0_stateless/03100_lwu_45_query_condition_cache.reference new file mode 100644 index 000000000000..ddc60e3ba0f5 --- /dev/null +++ b/tests/queries/0_stateless/03100_lwu_45_query_condition_cache.reference @@ -0,0 +1,2 @@ +0 +100000 diff --git a/tests/queries/0_stateless/03100_lwu_45_query_condition_cache.sql b/tests/queries/0_stateless/03100_lwu_45_query_condition_cache.sql new file mode 100644 index 000000000000..f1db5ce9adda --- /dev/null +++ b/tests/queries/0_stateless/03100_lwu_45_query_condition_cache.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS t_lwu_condition_cache; + +SET use_query_condition_cache = 1; +SET enable_lightweight_update = 1; +SET apply_patch_parts = 1; + +CREATE TABLE t_lwu_condition_cache +( + id UInt64 DEFAULT generateSnowflakeID(), + exists UInt8 +) +ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 8192, enable_block_number_column = 1, enable_block_offset_column = 1; + +INSERT INTO t_lwu_condition_cache (exists) SELECT 0 FROM numbers(100000); + +SELECT count() FROM t_lwu_condition_cache WHERE exists; + +UPDATE t_lwu_condition_cache SET exists = 1 WHERE 1; + +SELECT count() FROM t_lwu_condition_cache WHERE exists; + +DROP TABLE IF EXISTS t_lwu_condition_cache; diff --git a/tests/queries/0_stateless/03389_regexp_rewrite_nullable_group_by.reference b/tests/queries/0_stateless/03389_regexp_rewrite_nullable_group_by.reference new file mode 100644 index 000000000000..d099bc72639f --- /dev/null +++ b/tests/queries/0_stateless/03389_regexp_rewrite_nullable_group_by.reference @@ -0,0 +1,4 @@ +abc123 +abc123 +\N +\N diff --git a/tests/queries/0_stateless/03389_regexp_rewrite_nullable_group_by.sql b/tests/queries/0_stateless/03389_regexp_rewrite_nullable_group_by.sql new file mode 100644 index 000000000000..47b44703fbff --- /dev/null +++ b/tests/queries/0_stateless/03389_regexp_rewrite_nullable_group_by.sql @@ -0,0 +1,4 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/88218 +-- RegexpFunctionRewritePass must handle Nullable result types from group_by_use_nulls +SET enable_analyzer = 1; +SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', '\\1') GROUP BY 1, toLowCardinality(9), 1 WITH CUBE SETTINGS group_by_use_nulls=1; diff --git a/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.reference b/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.reference index 54c9b5d8fda7..12102a657fa5 100644 --- a/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.reference +++ b/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.reference @@ -38,62 +38,9 @@ FROM system.one AS __table1 EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity('abc123'), '^123|456$', ''); SELECT replaceRegexpAll(identity(\'abc123\'), \'^123|456$\', \'\') AS `replaceRegexpAll(identity(\'abc123\'), \'^123|456$\', \'\')` FROM system.one AS __table1 --- Rule 2: If a replaceRegexpOne function has a replacement of nothing other than \1 and some subpatterns in the regexp, or \0 and no subpatterns in the regexp, rewrite it with extract. +-- Rule 2 (replaceRegexpOne -> extract) was removed because extract returns empty string on non-match, +-- while replaceRegexpOne returns the original string, making them semantically different. --- NOTE: \0 is specially treated as NUL instead of capture group reference. Need to use \\0 instead. - --- Only \0, no capture group (should rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc123$', '\\0'); -SELECT extract(identity(\'abc123\'), \'^abc123$\') AS `replaceRegexpOne(identity(\'abc123\'), \'^abc123$\', \'\\\\\\\\0\')` -FROM system.one AS __table1 --- Only \1, with one capture group (should rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', '\1'); -SELECT extract(identity(\'abc123\'), \'^(abc)$\') AS `replaceRegexpOne(identity(\'abc123\'), \'^(abc)$\', \'\\\\\\\\1\')` -FROM system.one AS __table1 --- Only \1, no capture group (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc$', '\1'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^abc$\', \'\\\\1\') AS `replaceRegexpOne(identity(\'abc123\'), \'^abc$\', \'\\\\\\\\1\')` -FROM system.one AS __table1 --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc', '\\0'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^abc\', \'\\\\0\') AS `replaceRegexpOne(identity(\'abc123\'), \'^abc\', \'\\\\\\\\0\')` -FROM system.one AS __table1 --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), 'abc$', '\\0'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'abc$\', \'\\\\0\') AS `replaceRegexpOne(identity(\'abc123\'), \'abc$\', \'\\\\\\\\0\')` -FROM system.one AS __table1 --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), 'abc', '\\0'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'abc\', \'\\\\0\') AS `replaceRegexpOne(identity(\'abc123\'), \'abc\', \'\\\\\\\\0\')` -FROM system.one AS __table1 --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc\\$', '\\0'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^abc\\\\$\', \'\\\\0\') AS `replaceRegexpOne(identity(\'abc123\'), \'^abc\\\\\\\\$\', \'\\\\\\\\0\')` -FROM system.one AS __table1 --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^ab|c$', '\\0'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^ab|c$\', \'\\\\0\') AS `replaceRegexpOne(identity(\'abc123\'), \'^ab|c$\', \'\\\\\\\\0\')` -FROM system.one AS __table1 --- \0 with extra characters (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc123$', 'pre\\0post'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^abc123$\', \'pre\\\\0post\') AS `replaceRegexpOne(identity(\'abc123\'), \'^abc123$\', \'pre\\\\\\\\0post\')` -FROM system.one AS __table1 --- \1 with two capture groups (should rewrite — only \1 used) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(a)(b)$', '\1'); -SELECT extract(identity(\'abc123\'), \'^(a)(b)$\') AS `replaceRegexpOne(identity(\'abc123\'), \'^(a)(b)$\', \'\\\\\\\\1\')` -FROM system.one AS __table1 --- \2 used (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(a)(b)$', '\2'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^(a)(b)$\', \'\\\\2\') AS `replaceRegexpOne(identity(\'abc123\'), \'^(a)(b)$\', \'\\\\\\\\2\')` -FROM system.one AS __table1 --- Mixed content in replacement (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', 'X\1Y'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^(abc)$\', \'X\\\\1Y\') AS `replaceRegexpOne(identity(\'abc123\'), \'^(abc)$\', \'X\\\\\\\\1Y\')` -FROM system.one AS __table1 --- Escaped backslash in replacement (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', '\\\\1'); -SELECT replaceRegexpOne(identity(\'abc123\'), \'^(abc)$\', \'\\\\\\\\1\') AS `replaceRegexpOne(identity(\'abc123\'), \'^(abc)$\', \'\\\\\\\\\\\\\\\\1\')` -FROM system.one AS __table1 -- Rule 3: If an extract function has a regexp with some subpatterns and the regexp starts with ^.* or ending with an unescaped .*$, remove this prefix and/or suffix. -- Starts with ^.* (should strip prefix) @@ -134,19 +81,11 @@ SELECT extract(identity(\'abc123\'), \'(abc).*\') AS `extract(identity(\'abc123\ FROM system.one AS __table1 -- Cascade tests --- Rule 1 + Rule 2: replaceRegexpAll to replaceRegexpOne to extract +-- Rule 1 only: replaceRegexpAll to replaceRegexpOne (Rule 2 removed) EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity('abc'), '^(abc)', '\1'); SELECT replaceRegexpOne(identity(\'abc\'), \'^(abc)\', \'\\\\1\') AS `replaceRegexpAll(identity(\'abc\'), \'^(abc)\', \'\\\\\\\\1\')` FROM system.one AS __table1 --- Rule 2 + 3: replaceRegexpOne -> extract -> simplified extract -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc'), '^.*(abc).*$','\1'); -SELECT extract(identity(\'abc\'), \'(abc)\') AS `replaceRegexpOne(identity(\'abc\'), \'^.*(abc).*$\', \'\\\\\\\\1\')` -FROM system.one AS __table1 --- Rule 1 + 2 + 3: replaceRegexpAll -> replaceRegexpOne -> extract -> simplified extract -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity('abc'), '^.*(abc).*$','\1'); -SELECT extract(identity(\'abc\'), \'(abc)\') AS `replaceRegexpAll(identity(\'abc\'), \'^.*(abc).*$\', \'\\\\\\\\1\')` -FROM system.one AS __table1 --- ClickBench Q28 +-- ClickBench Q28: Rule 1 only: regexp_replace to replaceRegexpOne EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT REGEXP_REPLACE(identity('some referer'), '^https?://(?:www\.)?([^/]+)/.*$', '\1'); -SELECT extract(identity(\'some referer\'), \'^https?://(?:www\\\\.)?([^/]+)/\') AS `REGEXP_REPLACE(identity(\'some referer\'), \'^https?://(?:www\\\\\\\\.)?([^/]+)/.*$\', \'\\\\\\\\1\')` +SELECT replaceRegexpOne(identity(\'some referer\'), \'^https?://(?:www\\\\.)?([^/]+)/.*$\', \'\\\\1\') AS `REGEXP_REPLACE(identity(\'some referer\'), \'^https?://(?:www\\\\\\\\.)?([^/]+)/.*$\', \'\\\\\\\\1\')` FROM system.one AS __table1 diff --git a/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.sql b/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.sql index e5f37eb54c9e..3e0e3194442d 100644 --- a/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.sql +++ b/tests/queries/0_stateless/03538_optimize_rewrite_regexp_functions.sql @@ -28,49 +28,8 @@ EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity( -- Pattern with alternatives (should NOT rewrite) EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity('abc123'), '^123|456$', ''); --- Rule 2: If a replaceRegexpOne function has a replacement of nothing other than \1 and some subpatterns in the regexp, or \0 and no subpatterns in the regexp, rewrite it with extract. - --- NOTE: \0 is specially treated as NUL instead of capture group reference. Need to use \\0 instead. - --- Only \0, no capture group (should rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc123$', '\\0'); - --- Only \1, with one capture group (should rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', '\1'); - --- Only \1, no capture group (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc$', '\1'); - --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc', '\\0'); - --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), 'abc$', '\\0'); - --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), 'abc', '\\0'); - --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc\\$', '\\0'); - --- Pattern not full (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^ab|c$', '\\0'); - --- \0 with extra characters (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^abc123$', 'pre\\0post'); - --- \1 with two capture groups (should rewrite — only \1 used) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(a)(b)$', '\1'); - --- \2 used (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(a)(b)$', '\2'); - --- Mixed content in replacement (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', 'X\1Y'); - --- Escaped backslash in replacement (should NOT rewrite) -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc123'), '^(abc)$', '\\\\1'); - +-- Rule 2 (replaceRegexpOne -> extract) was removed because extract returns empty string on non-match, +-- while replaceRegexpOne returns the original string, making them semantically different. -- Rule 3: If an extract function has a regexp with some subpatterns and the regexp starts with ^.* or ending with an unescaped .*$, remove this prefix and/or suffix. @@ -104,14 +63,8 @@ EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT extract(identity('abc123') -- Cascade tests --- Rule 1 + Rule 2: replaceRegexpAll to replaceRegexpOne to extract +-- Rule 1 only: replaceRegexpAll to replaceRegexpOne (Rule 2 removed) EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity('abc'), '^(abc)', '\1'); --- Rule 2 + 3: replaceRegexpOne -> extract -> simplified extract -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpOne(identity('abc'), '^.*(abc).*$','\1'); - --- Rule 1 + 2 + 3: replaceRegexpAll -> replaceRegexpOne -> extract -> simplified extract -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT replaceRegexpAll(identity('abc'), '^.*(abc).*$','\1'); - --- ClickBench Q28 +-- ClickBench Q28: Rule 1 only: regexp_replace to replaceRegexpOne EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT REGEXP_REPLACE(identity('some referer'), '^https?://(?:www\.)?([^/]+)/.*$', '\1'); diff --git a/tests/queries/0_stateless/03716_multiple_joins_using_top_level_identifier.reference b/tests/queries/0_stateless/03716_multiple_joins_using_top_level_identifier.reference new file mode 100644 index 000000000000..5b5f70835c39 --- /dev/null +++ b/tests/queries/0_stateless/03716_multiple_joins_using_top_level_identifier.reference @@ -0,0 +1,4 @@ +a_1 v +b_1 w +_1 v +b_1 w diff --git a/tests/queries/0_stateless/03716_multiple_joins_using_top_level_identifier.sql b/tests/queries/0_stateless/03716_multiple_joins_using_top_level_identifier.sql new file mode 100644 index 000000000000..820fc46048bb --- /dev/null +++ b/tests/queries/0_stateless/03716_multiple_joins_using_top_level_identifier.sql @@ -0,0 +1,36 @@ +SET analyzer_compatibility_join_using_top_level_identifier = 1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 (id String, val String) ENGINE = MergeTree() ORDER BY id; +CREATE TABLE t2 (id String, code String) ENGINE = MergeTree() ORDER BY id; +CREATE TABLE t3 (id String, code String) ENGINE = MergeTree() ORDER BY id; + +INSERT INTO t1 VALUES ('a', 'v'), ('b', 'w'); +INSERT INTO t2 VALUES ('b', 'c'); +INSERT INTO t3 VALUES ('a_1', 'c'), ('b_1', 'd'); + +SET enable_analyzer = 1; + +SELECT t1.id || '_1' AS id, t1.val +FROM t1 +LEFT JOIN t2 ON t1.id = t2.id +LEFT JOIN t3 USING (id) +ORDER BY t1.val +; + +SELECT t2.id || '_1' AS id, t1.val +FROM t1 +LEFT JOIN t2 ON t1.id = t2.id +LEFT JOIN t3 USING (id) +ORDER BY t1.val +; + +SELECT t1.id || t2.id || '_1' AS id, t1.val +FROM t1 +INNER JOIN t2 ON t1.id = t2.id +LEFT JOIN t3 USING (id) +ORDER BY t1.val +; -- { serverError AMBIGUOUS_IDENTIFIER } diff --git a/tests/queries/0_stateless/03727_alter_with_localhost_remote.reference b/tests/queries/0_stateless/03727_alter_with_localhost_remote.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03727_alter_with_localhost_remote.sql b/tests/queries/0_stateless/03727_alter_with_localhost_remote.sql new file mode 100644 index 000000000000..7051909669d6 --- /dev/null +++ b/tests/queries/0_stateless/03727_alter_with_localhost_remote.sql @@ -0,0 +1,3 @@ +-- Tags: no-replicated-database, no-parallel + +-- The test has been removed from the backport because it requires `EXECUTE AS` feature. diff --git a/tests/queries/0_stateless/03732_toweek_partition_pruning.reference b/tests/queries/0_stateless/03732_toweek_partition_pruning.reference new file mode 100644 index 000000000000..02af9a39522c --- /dev/null +++ b/tests/queries/0_stateless/03732_toweek_partition_pruning.reference @@ -0,0 +1,2 @@ +49 2 +52 2 diff --git a/tests/queries/0_stateless/03732_toweek_partition_pruning.sql b/tests/queries/0_stateless/03732_toweek_partition_pruning.sql new file mode 100644 index 000000000000..bd1ce72d7713 --- /dev/null +++ b/tests/queries/0_stateless/03732_toweek_partition_pruning.sql @@ -0,0 +1,21 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/90240 +-- toWeek() incorrectly claimed monotonicity, causing partition pruning +-- to skip December partitions for weeks 49-52. + +DROP TABLE IF EXISTS test_toweek_pruning; + +CREATE TABLE test_toweek_pruning (date Date, value String) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date) +ORDER BY date; + +INSERT INTO test_toweek_pruning VALUES + ('2025-11-30', 'x'), ('2025-12-01', 'x'), ('2025-12-07', 'x'), + ('2025-12-08', 'x'), ('2025-12-14', 'x'), ('2025-12-15', 'x'), + ('2025-12-21', 'x'), ('2025-12-22', 'x'), ('2025-12-28', 'x'), + ('2025-12-29', 'x'), ('2025-12-31', 'x'); + +SELECT toWeek(date, 3), count() FROM test_toweek_pruning WHERE toWeek(date, 3) = 49 GROUP BY 1; +SELECT toWeek(date, 3), count() FROM test_toweek_pruning WHERE toWeek(date, 3) = 52 GROUP BY 1; + +DROP TABLE test_toweek_pruning; diff --git a/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference index 7e87f95616da..a8960301ac05 100644 --- a/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference +++ b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference @@ -33,8 +33,11 @@ CREATE TABLE t (dt DateTime) ENGINE=MergeTree ORDER BY dt SETTINGS index_granula INSERT INTO t SELECT toDateTime('2020-01-01 00:00:00') + number * 3600 FROM numbers(24 * 40); SELECT count() FROM t -WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')) SETTINGS force_primary_key = 1, max_rows_to_read = 169; +WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')); 168 +SELECT count() +FROM t +WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')) SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED } DROP TABLE IF EXISTS t; CREATE TABLE t (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s; INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); diff --git a/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql index fb550e72051f..5e81bfe84ce5 100644 --- a/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql +++ b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql @@ -40,7 +40,11 @@ INSERT INTO t SELECT toDateTime('2020-01-01 00:00:00') + number * 3600 FROM numb SELECT count() FROM t -WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')) SETTINGS force_primary_key = 1, max_rows_to_read = 169; +WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')); + +SELECT count() +FROM t +WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')) SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED } DROP TABLE IF EXISTS t; CREATE TABLE t (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s; diff --git a/tests/queries/0_stateless/03801_attach_view_with_sql_security.reference b/tests/queries/0_stateless/03801_attach_view_with_sql_security.reference new file mode 100644 index 000000000000..4a703b3be841 --- /dev/null +++ b/tests/queries/0_stateless/03801_attach_view_with_sql_security.reference @@ -0,0 +1,2 @@ +ACCESS_DENIED +ACCESS_DENIED diff --git a/tests/queries/0_stateless/03801_attach_view_with_sql_security.sh b/tests/queries/0_stateless/03801_attach_view_with_sql_security.sh new file mode 100755 index 000000000000..98b791ffcffe --- /dev/null +++ b/tests/queries/0_stateless/03801_attach_view_with_sql_security.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user="user03801_${CLICKHOUSE_DATABASE}_$RANDOM" +db=${CLICKHOUSE_DATABASE} + +${CLICKHOUSE_CLIENT} <&1 | grep -q "ACCESS_DENIED" && echo "ACCESS_DENIED" || echo "NO ERROR" + +${CLICKHOUSE_CLIENT} --user $user --query " + ATTACH VIEW $db.test_mv UUID '8025ef9c-d735-4c16-ab4c-7f1f5110d049' + (s String) SQL SECURITY NONE + AS SELECT * FROM $db.test_table; +" 2>&1 | grep -q "ACCESS_DENIED" && echo "ACCESS_DENIED" || echo "NO ERROR" + +${CLICKHOUSE_CLIENT} --query "GRANT ALLOW SQL SECURITY NONE ON *.* TO $user;" + +${CLICKHOUSE_CLIENT} --user $user --query " + ATTACH VIEW $db.test_mv UUID '7025ef9c-d735-4c16-ab4c-7f1f5110d049' + (s String) SQL SECURITY NONE + AS SELECT * FROM $db.test_table + SETTINGS send_logs_level = 'error'; +" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE $db.test_mv;" +${CLICKHOUSE_CLIENT} --query "DROP USER $user;" diff --git a/tests/queries/0_stateless/03813_mergetree_projection_grants.reference b/tests/queries/0_stateless/03813_mergetree_projection_grants.reference new file mode 100644 index 000000000000..a6219591f168 --- /dev/null +++ b/tests/queries/0_stateless/03813_mergetree_projection_grants.reference @@ -0,0 +1,4 @@ +=== mergeTreeProjection without grant === +ACCESS_DENIED +=== mergeTreeProjection with grant === +1 diff --git a/tests/queries/0_stateless/03813_mergetree_projection_grants.sh b/tests/queries/0_stateless/03813_mergetree_projection_grants.sh new file mode 100755 index 000000000000..f49a45e513ea --- /dev/null +++ b/tests/queries/0_stateless/03813_mergetree_projection_grants.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +# Test that mergeTreeProjection checks table grants correctly. +# This function should require SELECT permission on the source table. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_proj_grants_mt" +$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS test_user_03813" + +$CLICKHOUSE_CLIENT -q " +CREATE TABLE test_proj_grants_mt (key Int, value Int, PROJECTION proj_sum (SELECT key, sum(value) GROUP BY key)) +ENGINE = MergeTree() ORDER BY key +" + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_proj_grants_mt SELECT number % 10, number FROM numbers(1000)" +$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE test_proj_grants_mt FINAL" + +# Create user without any grants +$CLICKHOUSE_CLIENT -q "CREATE USER test_user_03813" + +# Test mergeTreeProjection - should fail without SELECT grant +echo "=== mergeTreeProjection without grant ===" +$CLICKHOUSE_CLIENT --user test_user_03813 -q " +SELECT count() FROM mergeTreeProjection(currentDatabase(), test_proj_grants_mt, proj_sum) +" 2>&1 | grep -o 'ACCESS_DENIED' | head -1 + +# Grant SELECT permission +$CLICKHOUSE_CLIENT -q "GRANT SELECT ON ${CLICKHOUSE_DATABASE}.test_proj_grants_mt TO test_user_03813" + +# Test mergeTreeProjection - should work with SELECT grant +echo "=== mergeTreeProjection with grant ===" +$CLICKHOUSE_CLIENT --user test_user_03813 -q " +SELECT count() > 0 FROM mergeTreeProjection(currentDatabase(), test_proj_grants_mt, proj_sum) +" + +# Cleanup +$CLICKHOUSE_CLIENT -q "DROP TABLE test_proj_grants_mt" +$CLICKHOUSE_CLIENT -q "DROP USER test_user_03813" diff --git a/tests/queries/0_stateless/03821_json_skip_path_fix.reference b/tests/queries/0_stateless/03821_json_skip_path_fix.reference new file mode 100644 index 000000000000..be9e7e81710a --- /dev/null +++ b/tests/queries/0_stateless/03821_json_skip_path_fix.reference @@ -0,0 +1 @@ +{"path_1":42,"path_2":42} diff --git a/tests/queries/0_stateless/03821_json_skip_path_fix.sql b/tests/queries/0_stateless/03821_json_skip_path_fix.sql new file mode 100644 index 000000000000..8c770fd14e29 --- /dev/null +++ b/tests/queries/0_stateless/03821_json_skip_path_fix.sql @@ -0,0 +1,2 @@ +select '{"path_1" : 42, "path_2" : 42, "path" : {"a" : 42}}'::JSON(SKIP path); + diff --git a/tests/queries/0_stateless/03822_file_function_read_on_file_grant.reference b/tests/queries/0_stateless/03822_file_function_read_on_file_grant.reference new file mode 100644 index 000000000000..865d4ff5230b --- /dev/null +++ b/tests/queries/0_stateless/03822_file_function_read_on_file_grant.reference @@ -0,0 +1,4 @@ +ACCESS_DENIED +ACCESS_DENIED +FILE_DOESNT_EXIST +CANNOT_STAT diff --git a/tests/queries/0_stateless/03822_file_function_read_on_file_grant.sh b/tests/queries/0_stateless/03822_file_function_read_on_file_grant.sh new file mode 100755 index 000000000000..6db574849a79 --- /dev/null +++ b/tests/queries/0_stateless/03822_file_function_read_on_file_grant.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user="user_03822_${CLICKHOUSE_DATABASE}_$RANDOM" +missing_txt="missing_03822_${CLICKHOUSE_DATABASE}_$RANDOM.txt" +missing_csv="missing_03822_${CLICKHOUSE_DATABASE}_$RANDOM.csv" + +${CLICKHOUSE_CLIENT} <&1 | grep -c "ACCESS_DENIED") >= 1 )) && echo "ACCESS_DENIED" || echo "UNEXPECTED"; +(( $(${CLICKHOUSE_CLIENT} --user $user --query "DESCRIBE TABLE file('$missing_csv', 'CSV')" 2>&1 | grep -c "ACCESS_DENIED") >= 1 )) && echo "ACCESS_DENIED" || echo "UNEXPECTED"; + +${CLICKHOUSE_CLIENT} --query "GRANT READ ON FILE TO $user"; + +(( $(${CLICKHOUSE_CLIENT} --user $user --query "SELECT file('$missing_txt')" 2>&1 | grep -c "FILE_DOESNT_EXIST") >= 1 )) && echo "FILE_DOESNT_EXIST" || echo "UNEXPECTED"; +(( $(${CLICKHOUSE_CLIENT} --user $user --query "DESCRIBE TABLE file('$missing_csv', 'CSV')" 2>&1 | grep -c "CANNOT_STAT") >= 1 )) && echo "CANNOT_STAT" || echo "UNEXPECTED"; + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user"; diff --git a/tests/queries/0_stateless/03822_revoke_default_role.reference b/tests/queries/0_stateless/03822_revoke_default_role.reference new file mode 100644 index 000000000000..1d674b07c13c --- /dev/null +++ b/tests/queries/0_stateless/03822_revoke_default_role.reference @@ -0,0 +1,4 @@ +CREATE USER user_03822_default IDENTIFIED WITH no_password DEFAULT ROLE role_03822_default +role_03822_default 0 1 +After revoke: +CREATE USER user_03822_default IDENTIFIED WITH no_password DEFAULT ROLE NONE diff --git a/tests/queries/0_stateless/03822_revoke_default_role.sh b/tests/queries/0_stateless/03822_revoke_default_role.sh new file mode 100755 index 000000000000..aa51bd26e643 --- /dev/null +++ b/tests/queries/0_stateless/03822_revoke_default_role.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +user_name="user_03822_${CLICKHOUSE_DATABASE}" +role_name="role_03822_${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS ${user_name}" +$CLICKHOUSE_CLIENT -q "DROP ROLE IF EXISTS ${role_name}" + +$CLICKHOUSE_CLIENT -q "CREATE USER ${user_name}" +$CLICKHOUSE_CLIENT -q "CREATE ROLE ${role_name}" + +$CLICKHOUSE_CLIENT -q "GRANT ${role_name} TO ${user_name}" +$CLICKHOUSE_CLIENT -q "SET DEFAULT ROLE ${role_name} TO ${user_name}" +$CLICKHOUSE_CLIENT -q "SHOW CREATE USER ${user_name}" +$CLICKHOUSE_CLIENT --user ${user_name} -q "SHOW CURRENT ROLES" + +echo "After revoke:" +$CLICKHOUSE_CLIENT -q "REVOKE ${role_name} FROM ${user_name}" +$CLICKHOUSE_CLIENT -q "SHOW CREATE USER ${user_name}" +$CLICKHOUSE_CLIENT --user ${user_name} -q "SHOW CURRENT ROLES" + +$CLICKHOUSE_CLIENT -q "DROP USER ${user_name}" +$CLICKHOUSE_CLIENT -q "DROP ROLE ${role_name}" diff --git a/tests/queries/0_stateless/03831_index_of_assume_sorted_const_exception.reference b/tests/queries/0_stateless/03831_index_of_assume_sorted_const_exception.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03831_index_of_assume_sorted_const_exception.sql b/tests/queries/0_stateless/03831_index_of_assume_sorted_const_exception.sql new file mode 100644 index 000000000000..6bbc1f0ccf33 --- /dev/null +++ b/tests/queries/0_stateless/03831_index_of_assume_sorted_const_exception.sql @@ -0,0 +1,4 @@ +-- indexOfAssumeSorted with incompatible types in constant array should throw an exception, not crash (std::terminate from noexcept). +-- https://github.com/ClickHouse/ClickHouse/issues/92975 +SELECT indexOfAssumeSorted(['1.1.1.1'::IPv4], 0); -- { serverError BAD_TYPE_OF_FIELD } +SELECT indexOfAssumeSorted(['172.181.59.225'::IPv4], 3350671033650519441::Int8); -- { serverError BAD_TYPE_OF_FIELD } diff --git a/tests/queries/0_stateless/03835_todate_monotonicity_boundary.reference b/tests/queries/0_stateless/03835_todate_monotonicity_boundary.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03835_todate_monotonicity_boundary.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03835_todate_monotonicity_boundary.sql b/tests/queries/0_stateless/03835_todate_monotonicity_boundary.sql new file mode 100644 index 000000000000..4460c5da7b68 --- /dev/null +++ b/tests/queries/0_stateless/03835_todate_monotonicity_boundary.sql @@ -0,0 +1,18 @@ +-- Regression test for off-by-one in ToDateMonotonicity boundary check. +-- The toDate function treats values <= DATE_LUT_MAX_DAY_NUM (65535) as day numbers +-- and values > 65535 as unix timestamps. The monotonicity check must correctly +-- identify ranges crossing this boundary as non-monotonic. +-- Previously caused LOGICAL_ERROR "Invalid binary search result in MergeTreeSetIndex" in debug builds. +-- https://github.com/ClickHouse/ClickHouse/issues/90461 + +DROP TABLE IF EXISTS t_todate_mono; + +CREATE TABLE t_todate_mono (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity = 1; +INSERT INTO t_todate_mono SELECT number FROM numbers(100000); + +-- With index_granularity=1, mark 65535 covers the range [65535, 65536], +-- which crosses the DATE_LUT_MAX_DAY_NUM boundary. +-- The toDate conversion in the key condition chain must report this range as non-monotonic. +SELECT count() > 0 FROM t_todate_mono WHERE toDate(x) IN (toDate(12345), toDate(67890)); + +DROP TABLE t_todate_mono; diff --git a/tests/queries/0_stateless/03902_format_datetime_fractional_msan.reference b/tests/queries/0_stateless/03902_format_datetime_fractional_msan.reference new file mode 100644 index 000000000000..9ab82215084b --- /dev/null +++ b/tests/queries/0_stateless/03902_format_datetime_fractional_msan.reference @@ -0,0 +1,9 @@ +January 12345678 +January 1234 +January 000000 +January 1234 +January 0 +005 +050 +005000 +00 diff --git a/tests/queries/0_stateless/03902_format_datetime_fractional_msan.sql b/tests/queries/0_stateless/03902_format_datetime_fractional_msan.sql new file mode 100644 index 000000000000..d3fa89558188 --- /dev/null +++ b/tests/queries/0_stateless/03902_format_datetime_fractional_msan.sql @@ -0,0 +1,18 @@ +-- Test for use-of-uninitialized-value in formatDateTime fractional second formatters. +-- %M is a variable-width formatter, so the output buffer is not pre-filled with the template. +-- The fractional second formatters must fully initialize their output bytes. + +-- mysqlFractionalSecondScaleNumDigits (formatdatetime_f_prints_scale_number_of_digits = 1) +SELECT formatDateTime(toDateTime64('2024-01-01 12:00:00.12345678', 8, 'UTC'), '%M %f') SETTINGS formatdatetime_f_prints_scale_number_of_digits = 1; +SELECT formatDateTime(toDateTime64('2024-01-01 12:00:00.1234', 4, 'UTC'), '%M %f') SETTINGS formatdatetime_f_prints_scale_number_of_digits = 1; +SELECT formatDateTime(toDateTime64('2024-01-01 12:00:00', 0, 'UTC'), '%M %f') SETTINGS formatdatetime_f_prints_scale_number_of_digits = 1; + +-- mysqlFractionalSecondSingleZero (formatdatetime_f_prints_single_zero = 1) +SELECT formatDateTime(toDateTime64('2024-01-01 12:00:00.1234', 4, 'UTC'), '%M %f') SETTINGS formatdatetime_f_prints_single_zero = 1; +SELECT formatDateTime(toDateTime64('2024-01-01 12:00:00', 0, 'UTC'), '%M %f') SETTINGS formatdatetime_f_prints_single_zero = 1; + +-- jodaFractionOfSecond: leading zeros must be preserved (e.g. fractional_second=5, scale=3 -> "005") +SELECT formatDateTimeInJodaSyntax(toDateTime64('2024-01-01 12:00:00.005', 3, 'UTC'), 'SSS'); +SELECT formatDateTimeInJodaSyntax(toDateTime64('2024-01-01 12:00:00.050', 3, 'UTC'), 'SSS'); +SELECT formatDateTimeInJodaSyntax(toDateTime64('2024-01-01 12:00:00.005', 3, 'UTC'), 'SSSSSS'); +SELECT formatDateTimeInJodaSyntax(toDateTime64('2024-01-01 12:00:00.005', 3, 'UTC'), 'SS'); diff --git a/tests/queries/0_stateless/03903_cancellation_checker_large_timeout.reference b/tests/queries/0_stateless/03903_cancellation_checker_large_timeout.reference new file mode 100644 index 000000000000..e8183f05f5db --- /dev/null +++ b/tests/queries/0_stateless/03903_cancellation_checker_large_timeout.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/03903_cancellation_checker_large_timeout.sql b/tests/queries/0_stateless/03903_cancellation_checker_large_timeout.sql new file mode 100644 index 000000000000..edadd3e08f05 --- /dev/null +++ b/tests/queries/0_stateless/03903_cancellation_checker_large_timeout.sql @@ -0,0 +1,9 @@ +-- Tags: no-fasttest +-- Test that extremely large max_execution_time values don't cause livelock in CancellationChecker. +-- The timeout is internally capped to 1 year to prevent overflow in std::condition_variable::wait_for. +-- This query should complete quickly without hanging, regardless of the huge timeout value. + +SET max_execution_time = 9223372041; -- Close to INT64_MAX / 1000000000, would overflow when converted to nanoseconds +SELECT 1; +SELECT 1; +SELECT 1; diff --git a/tests/queries/0_stateless/03903_join_alias_dups.reference b/tests/queries/0_stateless/03903_join_alias_dups.reference new file mode 100644 index 000000000000..30ea790176ca --- /dev/null +++ b/tests/queries/0_stateless/03903_join_alias_dups.reference @@ -0,0 +1,4 @@ +42 +1 g +42 +1 g diff --git a/tests/queries/0_stateless/03903_join_alias_dups.sql.j2 b/tests/queries/0_stateless/03903_join_alias_dups.sql.j2 new file mode 100644 index 000000000000..71aad85c3d14 --- /dev/null +++ b/tests/queries/0_stateless/03903_join_alias_dups.sql.j2 @@ -0,0 +1,32 @@ +{% for enable_analyzer in [0, 1] -%} + +SET enable_analyzer = {{ enable_analyzer }}; +SET join_algorithm = 'hash'; + +SELECT A.g +FROM ( SELECT 1::Int8 AS d ) AS B +JOIN ( SELECT 1::Int8 as d, g, 42::Int32 AS g FROM ( SELECT '128' AS g ) ) AS A +USING (d); + +WITH B AS ( +SELECT + 1 AS d + ), + A AS ( +SELECT + g, d, + MAX(IF(m = 'A', g, NULL)) AS g +FROM + ( + SELECT + 'g' AS g, 1 d, + 'A' m + ) +GROUP BY ALL ) +SELECT + B.*, + A.g +FROM + B +LEFT JOIN A USING d; +{% endfor -%} diff --git a/tests/queries/0_stateless/03913_data_type_function_null_arg_hash.reference b/tests/queries/0_stateless/03913_data_type_function_null_arg_hash.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03913_data_type_function_null_arg_hash.sql b/tests/queries/0_stateless/03913_data_type_function_null_arg_hash.sql new file mode 100644 index 000000000000..956fce12ae8d --- /dev/null +++ b/tests/queries/0_stateless/03913_data_type_function_null_arg_hash.sql @@ -0,0 +1,3 @@ +-- Regression test: DataTypeFunction::updateHashImpl must handle null argument types +-- https://s3.amazonaws.com/clickhouse-test-reports/json.html?REF=master&sha=b9e68f4b9b0b33c7db43b00afb3eff4ff2050694&name_0=MasterCI&name_1=AST%20fuzzer%20%28amd_ubsan%29 +SELECT arrayFold((acc, x) -> plus(acc, toString(NULL, toLowCardinality(toUInt128(4)), materialize(4), 'aaaa', materialize(4), 4, 4, 1), x), range(number), ((acc, x) -> if(x % 2, arrayPushFront(acc, x), arrayPushBack(acc, x)))) FROM system.numbers LIMIT 0; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/03915_bech32_invalid_witness_version.reference b/tests/queries/0_stateless/03915_bech32_invalid_witness_version.reference new file mode 100644 index 000000000000..55b1c2ff759b --- /dev/null +++ b/tests/queries/0_stateless/03915_bech32_invalid_witness_version.reference @@ -0,0 +1,3 @@ +bc1qw3jhxaq2azfhz +bc1pw3jhxaqz7j562 +bc1sw3jhxaq2aj0ly diff --git a/tests/queries/0_stateless/03915_bech32_invalid_witness_version.sql b/tests/queries/0_stateless/03915_bech32_invalid_witness_version.sql new file mode 100644 index 000000000000..2182f04eba2e --- /dev/null +++ b/tests/queries/0_stateless/03915_bech32_invalid_witness_version.sql @@ -0,0 +1,19 @@ +-- Tags: no-fasttest +-- Test valid witness versions (0-16) +SELECT bech32Encode('bc', 'test', 0); +SELECT bech32Encode('bc', 'test', 1); +SELECT bech32Encode('bc', 'test', 16); + +-- Test invalid witness versions (should throw BAD_ARGUMENTS exception) +SELECT bech32Encode('bc', 'test', 17); -- { serverError BAD_ARGUMENTS } +SELECT bech32Encode('bc', 'test', 32); -- { serverError BAD_ARGUMENTS } +SELECT bech32Encode('bc', 'test', 40); -- { serverError BAD_ARGUMENTS } +SELECT bech32Encode('bc', 'test', 255); -- { serverError BAD_ARGUMENTS } + +-- Test the original fuzzer repro case (witness version 40 causing buffer overflow) +DROP TABLE IF EXISTS hex_data_test; +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE hex_data_test (hrp String, data String, witver LowCardinality(UInt8)) ENGINE = Memory; +INSERT INTO hex_data_test VALUES ('bc', 'test_data', 0); +SELECT bech32Encode(hrp, toFixedString('751e76e8199196d454941c45d1b3a323f1433bd6', 40), 40) FROM hex_data_test; -- { serverError BAD_ARGUMENTS } +DROP TABLE hex_data_test; diff --git a/tests/queries/0_stateless/03916_ttl_minmax_projection_epoch_bug.reference b/tests/queries/0_stateless/03916_ttl_minmax_projection_epoch_bug.reference new file mode 100644 index 000000000000..b261da18d51a --- /dev/null +++ b/tests/queries/0_stateless/03916_ttl_minmax_projection_epoch_bug.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/03916_ttl_minmax_projection_epoch_bug.sql b/tests/queries/0_stateless/03916_ttl_minmax_projection_epoch_bug.sql new file mode 100644 index 000000000000..b56e198112e2 --- /dev/null +++ b/tests/queries/0_stateless/03916_ttl_minmax_projection_epoch_bug.sql @@ -0,0 +1,29 @@ + + +DROP TABLE IF EXISTS test_ttl_minmax_epoch; + +CREATE TABLE test_ttl_minmax_epoch +( + timestamp DateTime64(9, 'UTC') +) +ENGINE = MergeTree +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (timestamp) +TTL timestamp + INTERVAL 1 MINUTE SETTINGS index_granularity = 1; + +-- rows from ~1-60 seconds ago, some will expire during merge +INSERT INTO test_ttl_minmax_epoch +SELECT + now64(9, 'UTC') - toIntervalSecond(1 + rand() % 60) AS timestamp +FROM numbers(1000); + +OPTIMIZE TABLE test_ttl_minmax_epoch FINAL; + +SELECT (SELECT min(timestamp) FROM test_ttl_minmax_epoch) = + (SELECT min(timestamp) FROM test_ttl_minmax_epoch SETTINGS optimize_use_implicit_projections = 0); + +SELECT countIf(min_time < '1971-01-01') AS parts_with_epoch_mintime +FROM system.parts +WHERE table = 'test_ttl_minmax_epoch' AND database = currentDatabase() AND active; + +DROP TABLE test_ttl_minmax_epoch; diff --git a/tests/queries/0_stateless/03916_window_functions_group_by_use_nulls.reference b/tests/queries/0_stateless/03916_window_functions_group_by_use_nulls.reference new file mode 100644 index 000000000000..cf4a6235a6b3 --- /dev/null +++ b/tests/queries/0_stateless/03916_window_functions_group_by_use_nulls.reference @@ -0,0 +1,31 @@ + +a +--- +a +a +a +a +a +a +a +a + +a +--- +a +\N +--- +hello world +hello world +hello world +--- +test +test +test +--- +\N x +x \N +--- +1 42 3 +1 42 3 +1 42 3 diff --git a/tests/queries/0_stateless/03916_window_functions_group_by_use_nulls.sql b/tests/queries/0_stateless/03916_window_functions_group_by_use_nulls.sql new file mode 100644 index 000000000000..2cd62789abe3 --- /dev/null +++ b/tests/queries/0_stateless/03916_window_functions_group_by_use_nulls.sql @@ -0,0 +1,35 @@ +SET enable_analyzer = 1; + +-- https://github.com/ClickHouse/ClickHouse/issues/82499 +-- Window functions with `group_by_use_nulls = 1` and CUBE/ROLLUP/GROUPING SETS +-- could crash because the aggregate function was created with non-nullable argument +-- types, but the actual data columns became nullable after GROUP BY. + +-- Original reproducer from the issue: +CREATE DICTIONARY d0 (c0 Int) PRIMARY KEY (c0) SOURCE(NULL()) LAYOUT(HASHED()) LIFETIME(1); +SELECT min('a') OVER () FROM d0 GROUP BY 'a', c0 WITH CUBE WITH TOTALS SETTINGS group_by_use_nulls = 1; +DROP DICTIONARY d0; + +SELECT '---'; + +SELECT min('a') OVER () FROM numbers(3) GROUP BY 'a', number WITH CUBE WITH TOTALS SETTINGS group_by_use_nulls = 1; + +SELECT '---'; + +WITH 'a' AS x SELECT leadInFrame(x) OVER (ORDER BY x NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) GROUP BY ROLLUP(x) ORDER BY 1 NULLS LAST SETTINGS group_by_use_nulls = 1; + +SELECT '---'; + +SELECT max('hello') OVER (), min('world') OVER () FROM numbers(2) GROUP BY number WITH ROLLUP SETTINGS group_by_use_nulls = 1; + +SELECT '---'; + +SELECT any('test') OVER () FROM numbers(2) GROUP BY GROUPING SETS(('test', number), ('test')) SETTINGS group_by_use_nulls = 1; + +SELECT '---'; + +WITH 'x' AS v SELECT lag(v) OVER (ORDER BY v), lead(v) OVER (ORDER BY v) GROUP BY ROLLUP(v) ORDER BY 1 NULLS FIRST SETTINGS group_by_use_nulls = 1, enable_analyzer = 1; -- lag/lead require the analyzer + +SELECT '---'; + +SELECT min(1) OVER (), max(42) OVER (), sum(1) OVER () FROM numbers(2) GROUP BY number WITH ROLLUP SETTINGS group_by_use_nulls = 1; diff --git a/tests/queries/0_stateless/03925_json_shared_data_buckets_missing_stream_bug.reference b/tests/queries/0_stateless/03925_json_shared_data_buckets_missing_stream_bug.reference new file mode 100644 index 000000000000..e9c6b09c7b3c --- /dev/null +++ b/tests/queries/0_stateless/03925_json_shared_data_buckets_missing_stream_bug.reference @@ -0,0 +1,20 @@ +0 {"a":42} +0 {"a":42} +1 {"a":42} +1 {"a":42} +2 {"a":42} +2 {"a":42} +3 {"a":42} +3 {"a":42} +4 {"a":42} +4 {"a":42} +5 {"a":42} +5 {"a":42} +6 {"a":42} +6 {"a":42} +7 {"a":42} +7 {"a":42} +8 {"a":42} +8 {"a":42} +9 {"a":42} +9 {"a":42} diff --git a/tests/queries/0_stateless/03925_json_shared_data_buckets_missing_stream_bug.sql b/tests/queries/0_stateless/03925_json_shared_data_buckets_missing_stream_bug.sql new file mode 100644 index 000000000000..dafa2b2065fb --- /dev/null +++ b/tests/queries/0_stateless/03925_json_shared_data_buckets_missing_stream_bug.sql @@ -0,0 +1,11 @@ +drop table if exists src; +drop table if exists dst; +create table src (id UInt64, json JSON) engine=MergeTree order by id settings min_bytes_for_wide_part=1, object_serialization_version='v3', object_shared_data_serialization_version_for_zero_level_parts='map_with_buckets'; +create table dst (id UInt64, json JSON) engine=MergeTree order by id settings min_bytes_for_wide_part=1, object_serialization_version='v3', object_shared_data_serialization_version_for_zero_level_parts='map_with_buckets'; +insert into src select number, '{"a" : 42}' from numbers(10); +insert into src select number, '{"a" : 42}' from numbers(10); +insert into dst select * from src order by id desc; +select * from dst order by id; +drop table src; +drop table dst; + diff --git a/tests/queries/0_stateless/03928_json_advanced_shared_data_bug.reference b/tests/queries/0_stateless/03928_json_advanced_shared_data_bug.reference new file mode 100644 index 000000000000..c0d2ee3f3b2a --- /dev/null +++ b/tests/queries/0_stateless/03928_json_advanced_shared_data_bug.reference @@ -0,0 +1,30 @@ +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{} +{} +{} +{} +{} +{} +{} +{} +{} +{} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} +{"a":[{"b":42}]} diff --git a/tests/queries/0_stateless/03928_json_advanced_shared_data_bug.sql b/tests/queries/0_stateless/03928_json_advanced_shared_data_bug.sql new file mode 100644 index 000000000000..c54c8378ae3c --- /dev/null +++ b/tests/queries/0_stateless/03928_json_advanced_shared_data_bug.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test +( + `json` JSON(max_dynamic_paths = 1) +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 1, min_rows_for_wide_part = 1, write_marks_for_substreams_in_compact_parts = 1, object_serialization_version = 'v3', object_shared_data_serialization_version = 'advanced', object_shared_data_serialization_version_for_zero_level_parts = 'advanced', object_shared_data_buckets_for_wide_part = 1, index_granularity = 100; + +INSERT INTO test SELECT multiIf(number < 10, '{"a" : [{"b" : 42}]}', number < 20, '{}', '{"a" : [{"b" : 42}]}') from numbers(30); + +SELECT * FROM test SETTINGS max_block_size=10; + +DROP TABLE test; + diff --git a/tests/queries/0_stateless/03988_grace_hash_join_leftover_blocks.reference b/tests/queries/0_stateless/03988_grace_hash_join_leftover_blocks.reference new file mode 100644 index 000000000000..3894b6e08587 --- /dev/null +++ b/tests/queries/0_stateless/03988_grace_hash_join_leftover_blocks.reference @@ -0,0 +1,6 @@ +9 +9 +9 +9 +9 +9 diff --git a/tests/queries/0_stateless/03988_grace_hash_join_leftover_blocks.sql b/tests/queries/0_stateless/03988_grace_hash_join_leftover_blocks.sql new file mode 100644 index 000000000000..521ca6af3cf3 --- /dev/null +++ b/tests/queries/0_stateless/03988_grace_hash_join_leftover_blocks.sql @@ -0,0 +1,79 @@ +DROP TABLE IF EXISTS test; +DROP TABLE IF EXISTS test2; + +SET max_joined_block_size_rows = 5; +SET enable_analyzer = 1; + +CREATE TABLE test +( + c0 Int, + c1 Date +) +ENGINE = MergeTree() +ORDER BY (c1); + +INSERT INTO test (c0, c1) VALUES +(1,'1995-01-28'), +(1,'1995-01-29'), +(1,'1995-01-30'); + +CREATE TABLE test2 +( + c0 Int, + c1 Date +) +ENGINE = MergeTree() +ORDER BY (c0); + +INSERT INTO test2 (c1, c0) VALUES +('1992-12-14',1), +('1992-12-14',1), +('1989-05-06',1); + +SELECT + count() +FROM test +LEFT JOIN test2 + ON test.c0 = test2.c0 + AND test.c1 >= test2.c1 +SETTINGS join_algorithm='parallel_hash'; + +SELECT + count() +FROM test2 +LEFT JOIN test + ON test.c0 = test2.c0 + AND test.c1 >= test2.c1 +SETTINGS join_algorithm='grace_hash', grace_hash_join_initial_buckets = 2; + +SELECT + count() +FROM test +RIGHT JOIN test2 + ON test.c0 = test2.c0 + AND test.c1 >= test2.c1 +SETTINGS join_algorithm='parallel_hash'; + +SELECT + count() +FROM test2 +RIGHT JOIN test + ON test.c0 = test2.c0 + AND test.c1 >= test2.c1 +SETTINGS join_algorithm='grace_hash', grace_hash_join_initial_buckets = 2; + +SELECT + count() +FROM test +FULL JOIN test2 + ON test.c0 = test2.c0 + AND test.c1 >= test2.c1 +SETTINGS join_algorithm='parallel_hash'; + +SELECT + count() +FROM test2 +FULL JOIN test + ON test.c0 = test2.c0 + AND test.c1 >= test2.c1 +SETTINGS join_algorithm='grace_hash', grace_hash_join_initial_buckets = 2; \ No newline at end of file diff --git a/tests/queries/0_stateless/03988_map_contains_key_like_tokenbf.reference b/tests/queries/0_stateless/03988_map_contains_key_like_tokenbf.reference new file mode 100644 index 000000000000..599d91218700 --- /dev/null +++ b/tests/queries/0_stateless/03988_map_contains_key_like_tokenbf.reference @@ -0,0 +1,18 @@ +1 +0 +1 +1 +1 +1 +1 +1 +0 +hostname +Verify skip index is used +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03988_map_contains_key_like_tokenbf.sql b/tests/queries/0_stateless/03988_map_contains_key_like_tokenbf.sql new file mode 100644 index 000000000000..d14c3fc91371 --- /dev/null +++ b/tests/queries/0_stateless/03988_map_contains_key_like_tokenbf.sql @@ -0,0 +1,65 @@ +-- Test for issue https://github.com/ClickHouse/ClickHouse/issues/97792 + +SET parallel_replicas_local_plan = 1; + +DROP TABLE IF EXISTS t_map_tokenbf; + +CREATE TABLE t_map_tokenbf +( + metadata Map(String, String), + created_at DateTime64(3), + INDEX index_metadata_keys mapKeys(metadata) TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1, + INDEX index_metadata_vals mapValues(metadata) TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY created_at; + +INSERT INTO t_map_tokenbf VALUES ({'hostname': 'myhost', 'env': 'prod'}, now()); + +SELECT count() FROM t_map_tokenbf WHERE mapContainsKeyLike(metadata, '%host%'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE mapContainsKeyLike(metadata, '%bad%'); -- 0 +SELECT count() FROM t_map_tokenbf WHERE mapContains(metadata, 'hostname'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE mapContainsKey(metadata, 'env'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE has(mapKeys(metadata), 'env'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE has(metadata, 'hostname'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE mapContainsValue(metadata, 'prod'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE mapContainsValueLike(metadata, '%host%'); -- 1 +SELECT count() FROM t_map_tokenbf WHERE mapContainsValueLike(metadata, '%random%'); -- 0 + +SELECT arrayJoin(mapKeys(mapExtractKeyLike(metadata, '%host%'))) as extracted_metadata +FROM t_map_tokenbf +WHERE mapContainsKeyLike(metadata, '%host%') +GROUP BY extracted_metadata; + +-- Verify that skip index was used - all should return 1 +SELECT 'Verify skip index is used'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE mapContainsKeyLike(metadata, '%host%') + ) WHERE explain LIKE '%index_metadata%'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE mapContains(metadata, 'hostname') + ) WHERE explain LIKE '%index_metadata%'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE mapContainsKey(metadata, 'env') + ) WHERE explain LIKE '%index_metadata%'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE has(mapKeys(metadata), 'env') + ) WHERE explain LIKE '%index_metadata%'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE has(metadata, 'hostname') + ) WHERE explain LIKE '%index_metadata%'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE mapContainsValue(metadata, 'prod') + ) WHERE explain LIKE '%index_metadata%'; + +SELECT COUNT(*) FROM ( + EXPLAIN indexes=1 SELECT count() FROM t_map_tokenbf WHERE mapContainsValueLike(metadata, '%random%') + ) WHERE explain LIKE '%index_metadata%'; + +DROP TABLE t_map_tokenbf; diff --git a/tests/queries/0_stateless/03988_zookeeper_send_receive_race.reference b/tests/queries/0_stateless/03988_zookeeper_send_receive_race.reference new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/tests/queries/0_stateless/03988_zookeeper_send_receive_race.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/03988_zookeeper_send_receive_race.sh b/tests/queries/0_stateless/03988_zookeeper_send_receive_race.sh new file mode 100755 index 000000000000..8b4a8a3d92f4 --- /dev/null +++ b/tests/queries/0_stateless/03988_zookeeper_send_receive_race.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-fasttest + +# Regression test for a data race in ZooKeeper client between sendThread and receiveThread. +# +# sendThread used to mutate the request (addRootPath, has_watch) AFTER copying it +# into the operations map, while receiveThread could concurrently read from the +# same shared request object via the operations map. This caused a data race on +# the request's path string (std::string reallocation during addRootPath vs +# concurrent getPath() read), leading to SIGBUS/use-after-free crashes. +# +# Under TSAN this test reliably detects the race before the fix. +# The key is to generate many concurrent ZooKeeper requests through the server's +# shared ZK session so sendThread and receiveThread are both actively working on +# the operations map at the same time. + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q " + DROP TABLE IF EXISTS t_zk_race; + CREATE TABLE t_zk_race (key UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_zk_race', 'r1') + ORDER BY key; +" + +ZK_PATH="/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_zk_race" + +# Flood the server's shared ZK connection with concurrent reads from +# system.zookeeper. Each SELECT issues ZK list/get requests that go through +# sendThread (addRootPath + operations map insert) and receiveThread +# (operations map read for timeout + response handling) on the same session. +# +# Use clickhouse-benchmark for maximum ZK operations/sec on a single session. +# --timelimit ensures the test runs long enough for TSAN to catch the race. +echo "SELECT count() FROM system.zookeeper WHERE path = '$ZK_PATH' FORMAT Null" | \ + ${CLICKHOUSE_BENCHMARK} --concurrency 30 --iterations 100000 --timelimit 10 2>&1 | grep -q "Executed" || true + +echo "OK" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_zk_race" diff --git a/tests/queries/0_stateless/04001_virtual_row_conversions_join_column_names.reference b/tests/queries/0_stateless/04001_virtual_row_conversions_join_column_names.reference new file mode 100644 index 000000000000..3d454cd4efb8 --- /dev/null +++ b/tests/queries/0_stateless/04001_virtual_row_conversions_join_column_names.reference @@ -0,0 +1,16 @@ +-10 +-1 +0 +0 +1 +10 +- +0 1 +0 1 +0 2 +0 2 +- +0 1 +0 1 +0 2 +0 2 diff --git a/tests/queries/0_stateless/04001_virtual_row_conversions_join_column_names.sql b/tests/queries/0_stateless/04001_virtual_row_conversions_join_column_names.sql new file mode 100644 index 000000000000..30e1af99e362 --- /dev/null +++ b/tests/queries/0_stateless/04001_virtual_row_conversions_join_column_names.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; + +SET allow_suspicious_low_cardinality_types = 1; +SET enable_analyzer = 1; +CREATE TABLE t0 (c0 LowCardinality(Int)) ENGINE = MergeTree() ORDER BY (c0); +CREATE TABLE t1 (c0 Nullable(Int)) ENGINE = MergeTree() ORDER BY tuple(); + +INSERT INTO TABLE t0 (c0) VALUES (0), (1); +INSERT INTO TABLE t0 (c0) VALUES (-10), (10); +INSERT INTO TABLE t0 (c0) VALUES (0), (-1); +INSERT INTO TABLE t1 (c0) VALUES (1), (2); + +SET read_in_order_use_virtual_row = 1; + + +SELECT CAST(c0, 'Int32') a FROM t0 ORDER BY a; + +SELECT '-'; +SELECT * FROM t0 JOIN t1 ON t1.c0.null = t0.c0 +ORDER BY t0.c0, t1.c0; + +SELECT '-'; + +SELECT * FROM t0 JOIN t1 ON t1.c0.null = t0.c0 +ORDER BY t0.c0, t1.c0 +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/04003_array_join_in_filter_outer_to_inner_join.reference b/tests/queries/0_stateless/04003_array_join_in_filter_outer_to_inner_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/04003_array_join_in_filter_outer_to_inner_join.sql b/tests/queries/0_stateless/04003_array_join_in_filter_outer_to_inner_join.sql new file mode 100644 index 000000000000..3d63ae1a423d --- /dev/null +++ b/tests/queries/0_stateless/04003_array_join_in_filter_outer_to_inner_join.sql @@ -0,0 +1,19 @@ +-- Regression: segfault in executeActionForPartialResult when filter expression contains arrayJoin +-- and the convertOuterJoinToInnerJoin optimization tries to evaluate the filter with partial (null) arguments. + +SET enable_analyzer = 1; +SELECT DISTINCT + 2, + 1048575 +FROM numbers(1) AS l, + numbers(2, isZeroOrNull(assumeNotNull(1))) AS r +ANY INNER JOIN r AS alias37 ON equals(alias37.number, r.number) +RIGHT JOIN l AS alias44 ON equals(alias44.number, alias37.number) +ANY INNER JOIN alias44 AS alias48 ON equals(alias48.number, r.number) +ANY RIGHT JOIN r AS alias52 ON equals(alias52.number, alias37.number) +WHERE equals(isNull(toLowCardinality(toUInt128(2))), arrayJoin([*, 13, 13, 13, toNullable(13), 13])) +GROUP BY + materialize(1), + isNull(toUInt128(2)), + and(and(1048575, isZeroOrNull(1), isNullable(isNull(1))), materialize(13), isNull(toUInt256(materialize(2))), *, and(*, and(1, nan, isNull(isNull(1)), isZeroOrNull(1), 1048575), 13)) +WITH CUBE; diff --git a/tests/queries/0_stateless/04004_view_comment_before_as_select.reference b/tests/queries/0_stateless/04004_view_comment_before_as_select.reference new file mode 100644 index 000000000000..6353405096a4 --- /dev/null +++ b/tests/queries/0_stateless/04004_view_comment_before_as_select.reference @@ -0,0 +1,2 @@ +CREATE VIEW v\nAS (SELECT 1)\nCOMMENT \'test\' +CREATE MATERIALIZED VIEW v\nENGINE = MergeTree\nORDER BY c\nAS (SELECT 1 AS c)\nCOMMENT \'test\' diff --git a/tests/queries/0_stateless/04004_view_comment_before_as_select.sql b/tests/queries/0_stateless/04004_view_comment_before_as_select.sql new file mode 100644 index 000000000000..91b761945067 --- /dev/null +++ b/tests/queries/0_stateless/04004_view_comment_before_as_select.sql @@ -0,0 +1,3 @@ +-- Forward compatibility: accept COMMENT before AS SELECT (syntax produced by 26.2+). +SELECT formatQuery('CREATE VIEW v COMMENT \'test\' AS SELECT 1'); +SELECT formatQuery('CREATE MATERIALIZED VIEW v ENGINE = MergeTree ORDER BY c COMMENT \'test\' AS SELECT 1 AS c'); diff --git a/tests/queries/0_stateless/04023_issue_98484_drop_patch_part.reference b/tests/queries/0_stateless/04023_issue_98484_drop_patch_part.reference new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/tests/queries/0_stateless/04023_issue_98484_drop_patch_part.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/04023_issue_98484_drop_patch_part.sh b/tests/queries/0_stateless/04023_issue_98484_drop_patch_part.sh new file mode 100755 index 000000000000..c416c1405b33 --- /dev/null +++ b/tests/queries/0_stateless/04023_issue_98484_drop_patch_part.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-replicated-database +# Tag no-fasttest: requires lightweight_delete_mode setting + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test for issue #98484: DROP PART on patch part should not crash server. +# The bug was that getPatchPartMetadata() built a partition key expression +# referencing _part column, but the ColumnsDescription passed from +# createEmptyPart() only contained data columns, causing UNKNOWN_IDENTIFIER +# inside a NOEXCEPT_SCOPE which triggered std::terminate(). + +${CLICKHOUSE_CLIENT} --query " + CREATE TABLE t_98484 (c0 Int32, c1 String, c2 Int8) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS enable_block_offset_column = 1, enable_block_number_column = 1 +" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO t_98484 VALUES (1, 'hello', 10)" +${CLICKHOUSE_CLIENT} --query "INSERT INTO t_98484 VALUES (2, 'world', 20)" +${CLICKHOUSE_CLIENT} --query "INSERT INTO t_98484 VALUES (3, 'test', 30)" + +# Create patch parts via lightweight delete +${CLICKHOUSE_CLIENT} --query "SET lightweight_delete_mode = 'lightweight_update_force'; DELETE FROM t_98484 WHERE c0 = 1" + +# Wait for mutations to complete +for _ in $(seq 1 30); do + result=$(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 't_98484' AND name LIKE 'patch-%' AND active = 1") + if [ "$result" -ge 1 ]; then + break + fi + sleep 0.5 +done + +# Add column to change columns description (original trigger condition) +${CLICKHOUSE_CLIENT} --query "ALTER TABLE t_98484 ADD COLUMN c9 Nullable(Bool)" + +# Get the first active patch part name +PATCH_PART=$(${CLICKHOUSE_CLIENT} --query " + SELECT name FROM system.parts + WHERE database = currentDatabase() AND table = 't_98484' + AND name LIKE 'patch-%' AND active = 1 + ORDER BY name LIMIT 1 +") + +if [ -z "$PATCH_PART" ]; then + echo "FAIL: No patch parts found" + exit 1 +fi + +# DROP PART on the patch part - this should not crash the server +${CLICKHOUSE_CLIENT} --query "ALTER TABLE t_98484 DROP PART '$PATCH_PART'" 2>&1 + +# Verify server is still alive +${CLICKHOUSE_CLIENT} --query "SELECT 1" > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "FAIL: Server crashed" + exit 1 +fi + +echo "OK" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t_98484" diff --git a/tests/queries/0_stateless/04027_reverseUTF8_invalid_utf8.reference b/tests/queries/0_stateless/04027_reverseUTF8_invalid_utf8.reference new file mode 100644 index 000000000000..1f0eb51aa4f8 --- /dev/null +++ b/tests/queries/0_stateless/04027_reverseUTF8_invalid_utf8.reference @@ -0,0 +1,3 @@ +esuoHkcilC +тевирП +はちにんこ diff --git a/tests/queries/0_stateless/04027_reverseUTF8_invalid_utf8.sql b/tests/queries/0_stateless/04027_reverseUTF8_invalid_utf8.sql new file mode 100644 index 000000000000..0267c46640d4 --- /dev/null +++ b/tests/queries/0_stateless/04027_reverseUTF8_invalid_utf8.sql @@ -0,0 +1,15 @@ +-- Test that reverseUTF8 does not crash on invalid UTF-8 (truncated multi-byte sequences) +SELECT reverseUTF8(unhex('C0')) FORMAT Null; +SELECT reverseUTF8(unhex('E0')) FORMAT Null; +SELECT reverseUTF8(unhex('F0')) FORMAT Null; +SELECT reverseUTF8(unhex('E0A0')) FORMAT Null; +SELECT reverseUTF8(unhex('F09F')) FORMAT Null; +SELECT reverseUTF8(unhex('F09F98')) FORMAT Null; + +-- The original crash query from the AST fuzzer +SELECT DISTINCT reverseUTF8(maxMergeDistinct(x) IGNORE NULLS), toNullable(1) FROM (SELECT DISTINCT dictHas(tuple(toUInt16(NULL)), 13, toUInt32(6), NULL), CAST(concat(unhex('00001000'), randomString(intDiv(1048576, toNullable(1))), toLowCardinality(toFixedString('\0', 1))), 'AggregateFunction(max, String)') AS x) WITH TOTALS FORMAT Null; + +-- Verify correct behavior on valid UTF-8 +SELECT reverseUTF8('ClickHouse'); +SELECT reverseUTF8('Привет'); +SELECT reverseUTF8('こんにちは'); diff --git a/tests/queries/0_stateless/04028_recursive_cte_remote_view_segfault.reference b/tests/queries/0_stateless/04028_recursive_cte_remote_view_segfault.reference new file mode 100644 index 000000000000..6ed281c757a9 --- /dev/null +++ b/tests/queries/0_stateless/04028_recursive_cte_remote_view_segfault.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/04028_recursive_cte_remote_view_segfault.sql b/tests/queries/0_stateless/04028_recursive_cte_remote_view_segfault.sql new file mode 100644 index 000000000000..f397bff986a7 --- /dev/null +++ b/tests/queries/0_stateless/04028_recursive_cte_remote_view_segfault.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest +-- Regression test: recursive CTE with remote() + view() used to segfault +-- because isStorageUsedInTree tried to call getStorageID() on an unresolved +-- view() TableFunctionNode whose storage was null. + +SET enable_analyzer=1; + +WITH RECURSIVE x AS ( + (SELECT 1 FROM remote('127.0.0.1', view(SELECT 1))) + UNION ALL + (SELECT 1) +) +SELECT 1 FROM x; diff --git a/tests/queries/0_stateless/04034_patch_parts_column_order_mismatch.reference b/tests/queries/0_stateless/04034_patch_parts_column_order_mismatch.reference new file mode 100644 index 000000000000..4287567495fe --- /dev/null +++ b/tests/queries/0_stateless/04034_patch_parts_column_order_mismatch.reference @@ -0,0 +1,6 @@ +1 updated1 99 9.9 999 upd1 +2 updated1 99 9.9 999 upd1 +1 updated2 88 8.8 888 upd2 +2 updated2 88 8.8 888 upd2 +1 updated2 88 8.8 888 upd2 +2 updated2 88 8.8 888 upd2 diff --git a/tests/queries/0_stateless/04034_patch_parts_column_order_mismatch.sql b/tests/queries/0_stateless/04034_patch_parts_column_order_mismatch.sql new file mode 100644 index 000000000000..3c1d29ede8d4 --- /dev/null +++ b/tests/queries/0_stateless/04034_patch_parts_column_order_mismatch.sql @@ -0,0 +1,50 @@ +-- Regression test for https://github.com/ClickHouse/clickhouse-core-incidents/issues/1021 +-- When multiple patch parts (Merge + Join mode) update the same columns, +-- the column ordering in patch blocks must be deterministic to avoid +-- LOGICAL_ERROR "Block structure mismatch in patch parts stream". +-- +-- The failpoint reverses column order for odd-indexed patches to expose any +-- code relying on positional column matching. Without the sort in +-- getUpdatedHeader, this triggers the bug. + +SET enable_lightweight_update = 1; + +SYSTEM ENABLE FAILPOINT patch_parts_reverse_column_order; + +DROP TABLE IF EXISTS t_patch_order; + +CREATE TABLE t_patch_order (id UInt64, a_col String, b_col UInt64, c_col Float64, d_col UInt32, e_col String) +ENGINE = MergeTree ORDER BY id +SETTINGS + enable_block_number_column = 1, + enable_block_offset_column = 1, + apply_patches_on_merge = 0; + +-- Insert two separate blocks to create two base parts. +INSERT INTO t_patch_order VALUES (1, 'hello', 10, 1.5, 100, 'world'); +INSERT INTO t_patch_order VALUES (2, 'foo', 20, 2.5, 200, 'bar'); + +-- First UPDATE: creates Merge-mode patch parts for both base parts. +UPDATE t_patch_order SET a_col = 'updated1', b_col = 99, c_col = 9.9, d_col = 999, e_col = 'upd1' WHERE 1; + +-- Verify patch application works in Merge mode. +SELECT * FROM t_patch_order ORDER BY id; + +-- Merge base parts; patches become Join-mode (apply_patches_on_merge = 0). +OPTIMIZE TABLE t_patch_order FINAL; + +-- Second UPDATE: creates new Merge-mode patch parts for the merged base part. +UPDATE t_patch_order SET a_col = 'updated2', b_col = 88, c_col = 8.8, d_col = 888, e_col = 'upd2' WHERE 1; + +-- This SELECT must apply both Join-mode and Merge-mode patches simultaneously. +-- The failpoint reverses column order for odd-indexed patches. Without the fix, +-- getUpdatedHeader throws LOGICAL_ERROR because it compares patch headers positionally. +SELECT * FROM t_patch_order ORDER BY id; + +-- Materialize patches and verify final state. +ALTER TABLE t_patch_order APPLY PATCHES SETTINGS mutations_sync = 2; +SELECT * FROM t_patch_order ORDER BY id SETTINGS apply_patch_parts = 0; + +SYSTEM DISABLE FAILPOINT patch_parts_reverse_column_order; + +DROP TABLE t_patch_order; diff --git a/tests/queries/0_stateless/04038_check_table_sparse_tuple_dynamic.reference b/tests/queries/0_stateless/04038_check_table_sparse_tuple_dynamic.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/04038_check_table_sparse_tuple_dynamic.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/04038_check_table_sparse_tuple_dynamic.sql b/tests/queries/0_stateless/04038_check_table_sparse_tuple_dynamic.sql new file mode 100644 index 000000000000..c406eda00b2e --- /dev/null +++ b/tests/queries/0_stateless/04038_check_table_sparse_tuple_dynamic.sql @@ -0,0 +1,12 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/96588 +-- CHECK TABLE on a Tuple with a Dynamic element and a sparse-serialized element +-- used to fail with "Unexpected size of tuple element" because deserializeOffsets +-- in SerializationSparse treated limit=0 as "read everything" instead of "read nothing". + +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 (c0 Tuple(c1 Dynamic, c2 Tuple(c3 Int))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 1, ratio_of_defaults_for_sparse_serialization = 0.9; +INSERT INTO TABLE t0 (c0) SELECT (1, (number, ), ) FROM numbers(1); +CHECK TABLE t0; + +DROP TABLE t0; diff --git a/tests/queries/0_stateless/04039_intersect_except_duplicate_column_names.reference b/tests/queries/0_stateless/04039_intersect_except_duplicate_column_names.reference new file mode 100644 index 000000000000..9ac08789a5c9 --- /dev/null +++ b/tests/queries/0_stateless/04039_intersect_except_duplicate_column_names.reference @@ -0,0 +1,4 @@ +1 1 hello world world +2 2 foo bar bar +1 1 hello world world +2 2 foo bar bar diff --git a/tests/queries/0_stateless/04039_intersect_except_duplicate_column_names.sql b/tests/queries/0_stateless/04039_intersect_except_duplicate_column_names.sql new file mode 100644 index 000000000000..66d1f76d3dcc --- /dev/null +++ b/tests/queries/0_stateless/04039_intersect_except_duplicate_column_names.sql @@ -0,0 +1,20 @@ +-- Reproducer for heap-use-after-free in IntersectOrExceptTransform +-- when the header has duplicate column names (e.g., from SELECT col, *, col). +-- The bug was that getPositionByName returned the same position for duplicate names, +-- creating duplicate entries in key_columns_pos. Then convertToFullColumnIfConst +-- on the same position freed the column a raw pointer still referenced. + +DROP TABLE IF EXISTS t_intersect_except; +CREATE TABLE t_intersect_except (id UInt32, a String, b String) ENGINE = Memory; +INSERT INTO t_intersect_except VALUES (1, 'hello', 'world'), (2, 'foo', 'bar'); + +-- SELECT id, *, b produces duplicate column names: id appears twice, b appears twice. +(SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10) EXCEPT DISTINCT (SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10); + +(SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10) INTERSECT DISTINCT (SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10); + +(SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10) EXCEPT ALL (SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10); + +(SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10) INTERSECT ALL (SELECT id, *, b FROM t_intersect_except ORDER BY id LIMIT 10); + +DROP TABLE t_intersect_except; diff --git a/tests/queries/0_stateless/04041_variant_read_with_direct_io.reference b/tests/queries/0_stateless/04041_variant_read_with_direct_io.reference new file mode 100644 index 000000000000..3cd40e317c88 --- /dev/null +++ b/tests/queries/0_stateless/04041_variant_read_with_direct_io.reference @@ -0,0 +1,3 @@ +500000 +100000 +100000 diff --git a/tests/queries/0_stateless/04041_variant_read_with_direct_io.sh b/tests/queries/0_stateless/04041_variant_read_with_direct_io.sh new file mode 100755 index 000000000000..d50fa6d943b2 --- /dev/null +++ b/tests/queries/0_stateless/04041_variant_read_with_direct_io.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: long + +# Regression test for incorrect seek in AsynchronousReadBufferFromFileDescriptor +# with O_DIRECT (min_bytes_to_use_direct_io=1). The bug was that getPosition() +# and seek NOOP/in-buffer checks did not account for bytes_to_ignore set by +# O_DIRECT alignment, causing corrupted reads of Variant subcolumns. + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_suspicious_variant_types=1 --max_threads 2 --min_bytes_to_use_direct_io 1" + +$CH_CLIENT -q "drop table if exists test_variant_direct_io;" + +$CH_CLIENT -q "create table test_variant_direct_io (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity_bytes=10485760, index_granularity=8192;" + +$CH_CLIENT -mq "insert into test_variant_direct_io select number, NULL from numbers(100000); +insert into test_variant_direct_io select number + 100000, number from numbers(100000); +insert into test_variant_direct_io select number + 200000, ('str_' || toString(number))::Variant(String) from numbers(100000); +insert into test_variant_direct_io select number + 300000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(100000); +insert into test_variant_direct_io select number + 400000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(100000); +insert into test_variant_direct_io select number + 500000, range(number % 20 + 1)::Array(UInt64) from numbers(100000);" + +$CH_CLIENT -q "optimize table test_variant_direct_io final settings mutations_sync=1;" + +# Without the fix, reading v.String here would fail with: +# "Size of deserialized variant column less than the limit" +$CH_CLIENT -q "select v.String from test_variant_direct_io format Null;" + +# Also check that subcolumn reads return the correct count +$CH_CLIENT -q "select count() from test_variant_direct_io where v is not null;" +$CH_CLIENT -q "select count() from test_variant_direct_io where v.String is not null;" +$CH_CLIENT -q "select count() from test_variant_direct_io where v.UInt64 is not null;" + +$CH_CLIENT -q "drop table test_variant_direct_io;" diff --git a/tests/queries/0_stateless/04043_system_asynchronous_inserts_user_filter.reference b/tests/queries/0_stateless/04043_system_asynchronous_inserts_user_filter.reference new file mode 100644 index 000000000000..e9130c6855ee --- /dev/null +++ b/tests/queries/0_stateless/04043_system_asynchronous_inserts_user_filter.reference @@ -0,0 +1,6 @@ +restricted_user sees: +0 +secret_user sees: +1 +admin sees: +1 diff --git a/tests/queries/0_stateless/04043_system_asynchronous_inserts_user_filter.sh b/tests/queries/0_stateless/04043_system_asynchronous_inserts_user_filter.sh new file mode 100755 index 000000000000..9acc6585241c --- /dev/null +++ b/tests/queries/0_stateless/04043_system_asynchronous_inserts_user_filter.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +# Regression test: system.asynchronous_inserts must not leak cross-user insert metadata. +# A user without SHOW_USERS privilege must only see their own pending inserts. + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q " + DROP USER IF EXISTS secret_user_${CLICKHOUSE_DATABASE}; + DROP USER IF EXISTS restricted_user_${CLICKHOUSE_DATABASE}; + CREATE USER secret_user_${CLICKHOUSE_DATABASE}; + CREATE USER restricted_user_${CLICKHOUSE_DATABASE}; + DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.async_insert_test; + CREATE TABLE ${CLICKHOUSE_DATABASE}.async_insert_test (x UInt64) ENGINE=MergeTree ORDER BY x; + GRANT INSERT ON ${CLICKHOUSE_DATABASE}.async_insert_test TO secret_user_${CLICKHOUSE_DATABASE}; + GRANT SELECT ON system.asynchronous_inserts TO secret_user_${CLICKHOUSE_DATABASE}; + GRANT SELECT ON system.asynchronous_inserts TO restricted_user_${CLICKHOUSE_DATABASE}; +" + +# secret_user inserts with async_insert enabled and a very long flush timeout so the entry stays in the queue. +${CLICKHOUSE_CLIENT} \ + --user "secret_user_${CLICKHOUSE_DATABASE}" \ + --async_insert 1 \ + --async_insert_busy_timeout_max_ms 600000 \ + --async_insert_busy_timeout_min_ms 600000 \ + --wait_for_async_insert 0 \ + -q "INSERT INTO ${CLICKHOUSE_DATABASE}.async_insert_test VALUES (42)" + +# restricted_user must see 0 rows (no cross-user visibility). +echo "restricted_user sees:" +${CLICKHOUSE_CLIENT} \ + --user "restricted_user_${CLICKHOUSE_DATABASE}" \ + -q "SELECT count() FROM system.asynchronous_inserts WHERE table = 'async_insert_test' AND database = '${CLICKHOUSE_DATABASE}'" + +# secret_user must see their own row. +echo "secret_user sees:" +${CLICKHOUSE_CLIENT} \ + --user "secret_user_${CLICKHOUSE_DATABASE}" \ + -q "SELECT count() FROM system.asynchronous_inserts WHERE table = 'async_insert_test' AND database = '${CLICKHOUSE_DATABASE}'" + +# Admin (current session) must see all rows. +echo "admin sees:" +${CLICKHOUSE_CLIENT} \ + -q "SELECT count() FROM system.asynchronous_inserts WHERE table = 'async_insert_test' AND database = '${CLICKHOUSE_DATABASE}'" + +${CLICKHOUSE_CLIENT} -q " + DROP USER IF EXISTS secret_user_${CLICKHOUSE_DATABASE}; + DROP USER IF EXISTS restricted_user_${CLICKHOUSE_DATABASE}; + DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.async_insert_test; +" diff --git a/tests/queries/0_stateless/04049_aggregate_function_numeric_indexed_vector_self_merge.reference b/tests/queries/0_stateless/04049_aggregate_function_numeric_indexed_vector_self_merge.reference new file mode 100644 index 000000000000..156baf3abc90 --- /dev/null +++ b/tests/queries/0_stateless/04049_aggregate_function_numeric_indexed_vector_self_merge.reference @@ -0,0 +1,2 @@ +{100:2} +{100:4} diff --git a/tests/queries/0_stateless/04049_aggregate_function_numeric_indexed_vector_self_merge.sql b/tests/queries/0_stateless/04049_aggregate_function_numeric_indexed_vector_self_merge.sql new file mode 100644 index 000000000000..15eb95fe70bd --- /dev/null +++ b/tests/queries/0_stateless/04049_aggregate_function_numeric_indexed_vector_self_merge.sql @@ -0,0 +1,11 @@ +-- Test that self-merge of NumericIndexedVector aggregate states does not trigger +-- assertion failure in CRoaring (x1 != x2 in `roaring_bitmap_xor_inplace`). +-- https://github.com/ClickHouse/ClickHouse/issues/99704 + +-- `multiply` triggers self-merge via exponentiation by squaring (even branch). +SELECT arrayJoin([numericIndexedVectorToMap( + multiply(2, groupNumericIndexedVectorState(100, 1)))]); + +-- Power of 2 forces multiple self-merge iterations. +SELECT arrayJoin([numericIndexedVectorToMap( + multiply(4, groupNumericIndexedVectorState(100, 1)))]); diff --git a/tests/queries/0_stateless/04054_json_nested_shared_data_buckets_missing_stream_bug.reference b/tests/queries/0_stateless/04054_json_nested_shared_data_buckets_missing_stream_bug.reference new file mode 100644 index 000000000000..2fdc1deb963d --- /dev/null +++ b/tests/queries/0_stateless/04054_json_nested_shared_data_buckets_missing_stream_bug.reference @@ -0,0 +1,3 @@ +1 {"images":[{"url":"c","width":300}]} +2 {"images":[{"url":"d","width":400}]} +3 {"images":[{"url":"a","width":100}]} diff --git a/tests/queries/0_stateless/04054_json_nested_shared_data_buckets_missing_stream_bug.sql b/tests/queries/0_stateless/04054_json_nested_shared_data_buckets_missing_stream_bug.sql new file mode 100644 index 000000000000..9373a9fbaa57 --- /dev/null +++ b/tests/queries/0_stateless/04054_json_nested_shared_data_buckets_missing_stream_bug.sql @@ -0,0 +1,42 @@ +-- Tags: long + +SET allow_experimental_json_type = 1; + +-- Regression test for a bug where ColumnObject::index (and filter/replicate/scatter) +-- did not propagate statistics, causing a mismatch between the number of shared data +-- buckets chosen during stream creation vs serialization state creation for nested JSON +-- columns inside Array(JSON). This resulted in: +-- "Stream ... object_shared_data.1.size1 not found" (LOGICAL_ERROR) +-- +-- The bug requires: Wide parts, nested JSON with empty shared data, a non-trivial +-- permutation applied during INSERT, and optimize_on_insert=0 to prevent mergeBlock +-- from pre-sorting the block and nullifying the permutation. + +DROP TABLE IF EXISTS src; +DROP TABLE IF EXISTS dst; + +CREATE TABLE src (id UInt64, data JSON(max_dynamic_paths=256)) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS min_bytes_for_wide_part=0; + +INSERT INTO src VALUES + (3, '{"images": [{"url": "a", "width": 100}]}'), + (2, '{"images": [{"url": "d", "width": 400}]}'), + (1, '{"images": [{"url": "c", "width": 300}]}'); + +CREATE TABLE dst (id UInt64, data JSON(max_dynamic_paths=256)) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part=0; + +-- Data arrives at the MergeTree sink with statistics intact from reading the source part. +-- The sink applies a permutation to sort by id. With optimize_on_insert=0, the raw +-- permutation is passed to the writer. The inner JSON (inside Array) goes through +-- ColumnArray::permute -> ColumnArray::indexImpl -> ColumnObject::index. +-- Before the fix, ColumnObject::index dropped statistics, causing a bucket count mismatch. +INSERT INTO dst SELECT * FROM src +SETTINGS max_insert_threads=1, optimize_on_insert=0; + +SELECT id, data FROM dst ORDER BY id; + +DROP TABLE src; +DROP TABLE dst;