diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..335ec95 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.tar.gz diff --git a/clickhouse/.gitignore b/clickhouse/.gitignore new file mode 100644 index 0000000..035a2ff --- /dev/null +++ b/clickhouse/.gitignore @@ -0,0 +1 @@ +clickhouse* diff --git a/clickhouse/README.md b/clickhouse/README.md new file mode 100644 index 0000000..94debc1 --- /dev/null +++ b/clickhouse/README.md @@ -0,0 +1,41 @@ +# clickhouse + +Patches and notes for building ClickHouse on illumos. + +## Building + +```bash +$ ./build.sh +``` + +## Patches + +Most patches have been upstreamed into their various homes. The remaining are +mostly related to some C++ standard library wierdness. The files in +`patches/direct` are applied to the source (after cloning submodules), and +those in `patches/cmake` are applied after running `cmake` as they apply to +some of the generated build files. + +## Upstreaming + +In general, ClickHouse was very responsive to PRs, so additional work to +upstream things should be straightforward. The only bit to record is how to +handle updates to any of the repos cloned as submodules. + +After things have been upstreamed, say to `contrib/project-a`, run + +```bash +$ git submodule update --checkout --remote contrib/project-a +``` + +This records the latest commit of the remote submodule into the superproject. +Once all submodules have been updated like this, make a commit and put up +a PR against ClickHouse as usual. + +## Errors + +If you see errors complaining about a submodule not having a particular +commit or branch, you may need to specify the `branch` in the `.gitmodules` +file. It should be whatever the remote uses as the default branch. Git +defaults to using `master`, but many remotes don't use that name, hence +the errors. diff --git a/clickhouse/build.sh b/clickhouse/build.sh new file mode 100755 index 0000000..1fc5a78 --- /dev/null +++ b/clickhouse/build.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +set -o errexit +set -o pipefail + +function info { + printf 'INFO: %s\n' "$*" +} + +function header { + printf -- '\n' + printf -- '----------------------------------------------------------\n' + printf -- 'INFO: %s\n' "$*" + printf -- '----------------------------------------------------------\n' + printf -- '\n' +} + +function fatal { + printf 'ERROR: %s\n' "$*" >&2 + exit 1 +} + +ROOT=$(cd "$(dirname "$0")" && pwd) +ARTEFACT="$ROOT/clickhouse" +WORK="$ARTEFACT/build" +VER="21.7" +BRANCH="master" +REPO="https://github.com/oxidecomputer/clickhouse" + +# Get platform specific options/tools/paths +if [ $# -eq 0 ]; then + PLATFORM="$OSTYPE" +else + PLATFORM="$1" +fi +case $PLATFORM in + linux*) + PLATFORM="linux" + BUILD_COMMAND="make" + CC=gcc-10 + CXX=g++-10 + STRIP_ARGS="--strip-debug" + NPROC="$(nproc)" + ;; + darwin*) + PLATFORM="macos" + BUILD_COMMAND="make" + CC=clang + CXX=clang + STRIP_ARGS="-S" + NPROC="$(sysctl -n hw.ncpu)" + ;; + solaris*|illumos*) + PLATFORM="illumos" + BUILD_COMMAND="ninja" + CC=gcc-10 + CXX=g++-10 + STRIP_ARGS="-x" + NPROC="$(nproc)" + ;; + *) + failed "Unsupported platform $PLATFORM" + exit 1 + ;; +esac +COMMON_PATCH_DIR="$ROOT/common/patches" +PATCH_DIR="$ROOT/$PLATFORM/patches" +FILES_DIR="$ROOT/$PLATFORM/files" +EXTRA_FILES="" +if [ -d "$FILES_DIR" ]; then + EXTRA_FILES="$(ls "$FILES_DIR")" +fi +header "Building clickhouse for $PLATFORM" + +# +# Download ClickHouse sources +# +if [ -d "$ARTEFACT" ]; then + info "ClickHouse repo exists, resetting to HEAD" + cd "$ARTEFACT" + git fetch origin + git switch "$BRANCH" + git reset --hard "origin/$BRANCH" + git submodule update --checkout --recursive --force +else + info "Cloning ClickHouse sources" + git clone "$REPO" + cd "$ARTEFACT" + git switch "$BRANCH" + git submodule update --init --recursive +fi + +# Apply common patches, independent of platform +header "Applying shared ClickHouse patches" +git apply --verbose $COMMON_PATCH_DIR/* + +# Patches to the actual sources. Below we apply those to CMake-generated files. +if [ -d "$PATCH_DIR/direct" ]; then + header "Applying $PLATFORM-specific patches" + git apply --verbose $PATCH_DIR/direct/* +fi + +header "Building ClickHouse" +mkdir -p "$WORK" && cd "$WORK" +FLAGS="-D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -D__EXTENSIONS__ -m64 -I$ARTEFACT/contrib/hyperscan-cmake/x86_64/" +CC=$CC CXX=$CXX CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" \ +cmake \ + -DABSL_CXX_STANDARD="17" \ + -DENABLE_LDAP=off \ + -DUSE_INTERNAL_LDAP_LIBRARY=off \ + -DENABLE_HDFS=off \ + -DUSE_INTERNAL_HDFS3_LIBRARY=off \ + -DENABLE_AMQPCPP=off \ + -DENABLE_AVRO=off \ + -DUSE_INTERNAL_AVRO_LIBRARY=off \ + -DENABLE_CAPNP=off \ + -DUSE_INTERNAL_CAPNP_LIBRARY=off \ + -DENABLE_MSGPACK=off \ + -DUSE_INTERNAL_MSGPACK_LIBRARY=off \ + -DENABLE_MYSQL=off \ + -DENABLE_S3=off \ + -DUSE_INTERNAL_AWS_S3_LIBRARY=off \ + -DENABLE_PARQUET=off \ + -DUSE_INTERNAL_PARQUET_LIBRARY=off \ + -DENABLE_ORC=off \ + -DUSE_INTERNAL_ORC_LIBRARY=off \ + -DUSE_SENTRY=off \ + -DENABLE_CLICKHOUSE_ODBC_BRIDGE=off \ + -DENABLE_CLICKHOUSE_BENCHMARK=off \ + -DENABLE_TESTS=off \ + "$ARTEFACT" + +header "Patching CMake-generated files" +cd "$ARTEFACT" +if [ -d "$PATCH_DIR/cmake" ]; then + git apply --verbose $PATCH_DIR/cmake/* +fi +cd "$WORK" + +# The build is massive. Try to parallelize until we error out, usually due to space constraints while +# linking. At that point, continue serially +$BUILD_COMMAND -j "$NPROC" || (header "Parallel build failed, continuing serially" && $BUILD_COMMAND -j 1) + +# Strip the resulting binary. This part is crucial. ClickHouse's binary is 3+GiB unstripped. +strip $STRIP_ARGS "$WORK/programs/clickhouse" +CONFIG_FILE_DIR="$ARTEFACT/programs/server" +CONFIG_FILE_NAME="config.xml" +if [ -z "$EXTRA_FILES" ]; then + /usr/bin/tar cvfz \ + $ROOT/clickhouse-v$VER.$PLATFORM.tar.gz \ + -C "$WORK/programs" clickhouse \ + -C "$CONFIG_FILE_DIR" "$CONFIG_FILE_NAME" +else + /usr/bin/tar cvfz \ + $ROOT/clickhouse-v$VER.$PLATFORM.tar.gz \ + -C "$WORK/programs" clickhouse \ + -C "$CONFIG_FILE_DIR" "$CONFIG_FILE_NAME" \ + -C "$FILES_DIR" "$EXTRA_FILES" +fi + +header "Build output:" +find "$WORK" -type f -ls diff --git a/clickhouse/common/patches/config.patch b/clickhouse/common/patches/config.patch new file mode 100644 index 0000000..0f5ef62 --- /dev/null +++ b/clickhouse/common/patches/config.patch @@ -0,0 +1,64 @@ +diff --git a/programs/server/config.xml b/programs/server/config.xml +index df8a5266c3..e58318e970 100644 +--- a/programs/server/config.xml ++++ b/programs/server/config.xml +@@ -22,8 +22,8 @@ + [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114 + --> + trace +- /var/log/clickhouse-server/clickhouse-server.log +- /var/log/clickhouse-server/clickhouse-server.err.log ++ /tmp/clickhouse/21.7/clickhouse-server.log ++ /tmp/clickhouse/21.7/clickhouse-server.err.log + +@@ -333,10 +333,10 @@ + 1073741824 + + +- /var/lib/clickhouse/ ++ /tmp/clickhouse/21.7/ + + +- /var/lib/clickhouse/tmp/ ++ /tmp/clickhouse/21.7/tmp/ + + + + +- /var/lib/clickhouse/user_files/ ++ /tmp/clickhouse/21.7/user_files/ + + + +@@ -425,7 +425,7 @@ + + + +- /var/lib/clickhouse/access/ ++ /tmp/clickhouse/21.7/access/ + + + + + +- ++ + +- /var/lib/clickhouse/format_schemas/ ++ /tmp/clickhouse/21.7/format_schemas/ + +