From 699ad52f2ec2a4bf1a30cb1a49b6c5b9279675f8 Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 14 Nov 2025 16:28:48 +0530 Subject: [PATCH 1/5] Add China mirror support for dependencies Fixes #239 --- BUILD_CHINA.md | 60 ++++++++++ README.md | 2 + .../IcebergThirdpartyToolchain.cmake | 110 +++++++++++++++--- 3 files changed, 156 insertions(+), 16 deletions(-) create mode 100644 BUILD_CHINA.md diff --git a/BUILD_CHINA.md b/BUILD_CHINA.md new file mode 100644 index 000000000..b24e21fc8 --- /dev/null +++ b/BUILD_CHINA.md @@ -0,0 +1,60 @@ +# Building in China + +This guide helps developers in China build iceberg-cpp when network access to GitHub and other international sites is limited. + +## Mirror Support + +The build system automatically tries alternative download mirrors when the primary URL fails. All third-party dependencies have been configured with China-based mirrors. + +### Available Mirrors + +Dependencies are automatically downloaded from these mirror sites: + +**Apache Projects (Arrow, Nanoarrow):** +- Tsinghua University: https://mirrors.tuna.tsinghua.edu.cn/apache/ +- USTC: https://mirrors.ustc.edu.cn/apache/ + +**GitHub Projects (CRoaring, nlohmann-json, spdlog, cpr):** +- Gitee: https://gitee.com/mirrors/ +- FastGit: https://hub.fastgit.xyz/ + +**Note**: Avro requires a git repository (unreleased version). Automatic mirror fallback is not available for git repositories, but you can specify a custom git mirror using the `ICEBERG_AVRO_GIT_URL` environment variable. + +### Custom Mirror URLs + +To override the default mirrors, set environment variables before running CMake: + +```bash +export ICEBERG_ARROW_URL="https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" +export ICEBERG_NANOARROW_URL="https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" +export ICEBERG_CROARING_URL="https://gitee.com/mirrors/CRoaring/repository/archive/v4.3.11.tar.gz" +export ICEBERG_NLOHMANN_JSON_URL="https://gitee.com/mirrors/JSON-for-Modern-CPP/releases/download/v3.11.3/json.tar.xz" +export ICEBERG_SPDLOG_URL="https://gitee.com/mirrors/spdlog/repository/archive/v1.15.3.tar.gz" +export ICEBERG_CPR_URL="https://gitee.com/mirrors/cpr/repository/archive/1.12.0.tar.gz" + +# For Avro, you can use either a tarball URL or a git repository URL: +export ICEBERG_AVRO_URL="https://example.com/avro.tar.gz" # if you have a tarball +# OR +export ICEBERG_AVRO_GIT_URL="https://gitee.com/mirrors/avro.git" # for git mirror +``` + +Then build as usual: + +```bash +cmake -S . -B build +cmake --build build +``` + +## Troubleshooting + +**Download failures:** +- Try setting a specific mirror using environment variables +- Use a VPN or proxy: `export https_proxy=http://proxy:port` +- Pre-download tarballs to `~/.cmake/Downloads/` + +**Slow downloads:** +- The build will automatically retry with different mirrors +- Consider using Meson build system as an alternative + +**Still having issues?** +Open an issue at https://github.com/apache/iceberg-cpp/issues with details about which dependency failed and the error message. diff --git a/README.md b/README.md index 26bcf5a2b..affe87816 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ C++ implementation of [Apache Iceberg™](https://iceberg.apache.org/). - CMake 3.25 or higher - C++23 compliant compiler +> **Note**: For developers in China experiencing network issues when downloading dependencies, see [BUILD_CHINA.md](BUILD_CHINA.md) for mirror configuration. + ## Build ### Build, Run Test and Install Core Libraries diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8dce58fc3..f24af2b4e 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -33,6 +33,8 @@ else() set(ARROW_SOURCE_URL "https://www.apache.org/dyn/closer.lua?action=download&filename=/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" "https://downloads.apache.org/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" + "https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" + "https://mirrors.ustc.edu.cn/apache/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" ) endif() @@ -164,17 +166,42 @@ function(resolve_avro_dependency) OFF CACHE BOOL "" FORCE) - fetchcontent_declare(avro-cpp - ${FC_DECLARE_COMMON_OPTIONS} - # TODO: switch to Apache Avro 1.13.0 once released. - GIT_REPOSITORY https://github.com/apache/avro.git - GIT_TAG e6c308780e876b4c11a470b9900995947f7b0fb5 - SOURCE_SUBDIR - lang/c++ - FIND_PACKAGE_ARGS - NAMES - avro-cpp - CONFIG) + if(DEFINED ENV{ICEBERG_AVRO_URL}) + # Support custom tarball URL + fetchcontent_declare(avro-cpp + ${FC_DECLARE_COMMON_OPTIONS} + URL $ENV{ICEBERG_AVRO_URL} + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) + elseif(DEFINED ENV{ICEBERG_AVRO_GIT_URL}) + # Support custom git URL for mirrors + fetchcontent_declare(avro-cpp + ${FC_DECLARE_COMMON_OPTIONS} + GIT_REPOSITORY $ENV{ICEBERG_AVRO_GIT_URL} + GIT_TAG e6c308780e876b4c11a470b9900995947f7b0fb5 + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) + else() + # Default to GitHub - uses unreleased version + fetchcontent_declare(avro-cpp + ${FC_DECLARE_COMMON_OPTIONS} + GIT_REPOSITORY https://github.com/apache/avro.git + GIT_TAG e6c308780e876b4c11a470b9900995947f7b0fb5 + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) + endif() fetchcontent_makeavailable(avro-cpp) @@ -221,9 +248,20 @@ endfunction() function(resolve_nanoarrow_dependency) prepare_fetchcontent() + if(DEFINED ENV{ICEBERG_NANOARROW_URL}) + set(NANOARROW_URL "$ENV{ICEBERG_NANOARROW_URL}") + else() + set(NANOARROW_URL + "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + "https://downloads.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + "https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + "https://mirrors.ustc.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + ) + endif() + fetchcontent_declare(nanoarrow ${FC_DECLARE_COMMON_OPTIONS} - URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + URL ${NANOARROW_URL} FIND_PACKAGE_ARGS NAMES nanoarrow @@ -270,9 +308,19 @@ function(resolve_croaring_dependency) set(ENABLE_ROARING_TESTS OFF) set(ENABLE_ROARING_MICROBENCHMARKS OFF) + if(DEFINED ENV{ICEBERG_CROARING_URL}) + set(CROARING_URL "$ENV{ICEBERG_CROARING_URL}") + else() + set(CROARING_URL + "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" + "https://gitee.com/mirrors/CRoaring/repository/archive/v4.3.11.tar.gz" + "https://hub.fastgit.xyz/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" + ) + endif() + fetchcontent_declare(croaring ${FC_DECLARE_COMMON_OPTIONS} - URL "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" + URL ${CROARING_URL} FIND_PACKAGE_ARGS NAMES roaring @@ -318,9 +366,19 @@ function(resolve_nlohmann_json_dependency) OFF CACHE BOOL "" FORCE) + if(DEFINED ENV{ICEBERG_NLOHMANN_JSON_URL}) + set(NLOHMANN_JSON_URL "$ENV{ICEBERG_NLOHMANN_JSON_URL}") + else() + set(NLOHMANN_JSON_URL + "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" + "https://gitee.com/mirrors/JSON-for-Modern-CPP/releases/download/v3.11.3/json.tar.xz" + "https://hub.fastgit.xyz/nlohmann/json/releases/download/v3.11.3/json.tar.xz" + ) + endif() + fetchcontent_declare(nlohmann_json ${FC_DECLARE_COMMON_OPTIONS} - URL "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" + URL ${NLOHMANN_JSON_URL} FIND_PACKAGE_ARGS NAMES nlohmann_json @@ -378,9 +436,19 @@ function(resolve_spdlog_dependency) ON CACHE BOOL "" FORCE) + if(DEFINED ENV{ICEBERG_SPDLOG_URL}) + set(SPDLOG_URL "$ENV{ICEBERG_SPDLOG_URL}") + else() + set(SPDLOG_URL + "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" + "https://gitee.com/mirrors/spdlog/repository/archive/v1.15.3.tar.gz" + "https://hub.fastgit.xyz/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" + ) + endif() + fetchcontent_declare(spdlog ${FC_DECLARE_COMMON_OPTIONS} - URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" + URL ${SPDLOG_URL} FIND_PACKAGE_ARGS NAMES spdlog @@ -440,9 +508,19 @@ function(resolve_cpr_dependency) set(CPR_ENABLE_SSL ON) set(CPR_USE_SYSTEM_CURL ON) + if(DEFINED ENV{ICEBERG_CPR_URL}) + set(CPR_URL "$ENV{ICEBERG_CPR_URL}") + else() + set(CPR_URL + "https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz" + "https://gitee.com/mirrors/cpr/repository/archive/1.12.0.tar.gz" + "https://hub.fastgit.xyz/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz" + ) + endif() + fetchcontent_declare(cpr ${FC_DECLARE_COMMON_OPTIONS} - URL https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz + URL ${CPR_URL} FIND_PACKAGE_ARGS NAMES cpr From a565133bc246214441aa4fb81c25c9a067e5bc0c Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 14 Nov 2025 16:36:21 +0530 Subject: [PATCH 2/5] Fix formatting and license header --- BUILD_CHINA.md | 19 +++++++++++++++ .../IcebergThirdpartyToolchain.cmake | 24 ++++++++----------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/BUILD_CHINA.md b/BUILD_CHINA.md index b24e21fc8..aa61e179c 100644 --- a/BUILD_CHINA.md +++ b/BUILD_CHINA.md @@ -1,3 +1,22 @@ + + # Building in China This guide helps developers in China build iceberg-cpp when network access to GitHub and other international sites is limited. diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index f24af2b4e..fec1e0e0e 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -171,12 +171,12 @@ function(resolve_avro_dependency) fetchcontent_declare(avro-cpp ${FC_DECLARE_COMMON_OPTIONS} URL $ENV{ICEBERG_AVRO_URL} - SOURCE_SUBDIR - lang/c++ - FIND_PACKAGE_ARGS - NAMES - avro-cpp - CONFIG) + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) elseif(DEFINED ENV{ICEBERG_AVRO_GIT_URL}) # Support custom git URL for mirrors fetchcontent_declare(avro-cpp @@ -314,8 +314,7 @@ function(resolve_croaring_dependency) set(CROARING_URL "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" "https://gitee.com/mirrors/CRoaring/repository/archive/v4.3.11.tar.gz" - "https://hub.fastgit.xyz/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" - ) + "https://hub.fastgit.xyz/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz") endif() fetchcontent_declare(croaring @@ -372,8 +371,7 @@ function(resolve_nlohmann_json_dependency) set(NLOHMANN_JSON_URL "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" "https://gitee.com/mirrors/JSON-for-Modern-CPP/releases/download/v3.11.3/json.tar.xz" - "https://hub.fastgit.xyz/nlohmann/json/releases/download/v3.11.3/json.tar.xz" - ) + "https://hub.fastgit.xyz/nlohmann/json/releases/download/v3.11.3/json.tar.xz") endif() fetchcontent_declare(nlohmann_json @@ -442,8 +440,7 @@ function(resolve_spdlog_dependency) set(SPDLOG_URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" "https://gitee.com/mirrors/spdlog/repository/archive/v1.15.3.tar.gz" - "https://hub.fastgit.xyz/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" - ) + "https://hub.fastgit.xyz/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz") endif() fetchcontent_declare(spdlog @@ -514,8 +511,7 @@ function(resolve_cpr_dependency) set(CPR_URL "https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz" "https://gitee.com/mirrors/cpr/repository/archive/1.12.0.tar.gz" - "https://hub.fastgit.xyz/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz" - ) + "https://hub.fastgit.xyz/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz") endif() fetchcontent_declare(cpr From c436250ca6547a569c9dc7aaaf68b9c7e4f43bc7 Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Sat, 15 Nov 2025 08:50:05 +0530 Subject: [PATCH 3/5] Remove hardcoded mirrors, keep env var support --- BUILD_CHINA.md | 39 +++++++++++++++++++ .../IcebergThirdpartyToolchain.cmake | 23 ++--------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/BUILD_CHINA.md b/BUILD_CHINA.md index aa61e179c..27a4c3e7c 100644 --- a/BUILD_CHINA.md +++ b/BUILD_CHINA.md @@ -19,6 +19,45 @@ # Building in China +This guide helps developers in China who may experience network issues when downloading dependencies from GitHub or international mirrors. + +## Using Custom Mirror URLs + +If you experience download timeouts, you can override the default dependency URLs using environment variables: + +```bash +export ICEBERG_ARROW_URL="/apache-arrow-22.0.0.tar.gz" +export ICEBERG_NANOARROW_URL="/apache-arrow-nanoarrow-0.7.0.tar.gz" +export ICEBERG_CROARING_URL="/CRoaring-v4.3.11.tar.gz" +export ICEBERG_NLOHMANN_JSON_URL="/json-v3.11.3.tar.xz" +export ICEBERG_SPDLOG_URL="/spdlog-v1.15.3.tar.gz" +export ICEBERG_CPR_URL="/cpr-1.12.0.tar.gz" + +# For Avro (git repository): +export ICEBERG_AVRO_GIT_URL="/avro.git" +# Or if you have a tarball: +export ICEBERG_AVRO_URL="/avro.tar.gz" +``` + +Then build as usual: + +```bash +cmake -S . -B build +cmake --build build +``` + +## Alternative Solutions + +1. **Use system packages**: Install dependencies via your system package manager +2. **Use a proxy**: Set `https_proxy` environment variable +3. **Pre-download**: Manually download tarballs to `~/.cmake/Downloads/` + +## Getting Help + +If you continue experiencing build issues, please open an issue at https://github.com/apache/iceberg-cpp/issues with details about which dependency failed. + +# Building in China + This guide helps developers in China build iceberg-cpp when network access to GitHub and other international sites is limited. ## Mirror Support diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index fec1e0e0e..9d69a38f5 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -33,8 +33,6 @@ else() set(ARROW_SOURCE_URL "https://www.apache.org/dyn/closer.lua?action=download&filename=/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" "https://downloads.apache.org/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" - "https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" - "https://mirrors.ustc.edu.cn/apache/arrow/arrow-${ICEBERG_ARROW_BUILD_VERSION}/apache-arrow-${ICEBERG_ARROW_BUILD_VERSION}.tar.gz" ) endif() @@ -253,9 +251,6 @@ function(resolve_nanoarrow_dependency) else() set(NANOARROW_URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" - "https://downloads.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" - "https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" - "https://mirrors.ustc.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" ) endif() @@ -312,9 +307,7 @@ function(resolve_croaring_dependency) set(CROARING_URL "$ENV{ICEBERG_CROARING_URL}") else() set(CROARING_URL - "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" - "https://gitee.com/mirrors/CRoaring/repository/archive/v4.3.11.tar.gz" - "https://hub.fastgit.xyz/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz") + "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz") endif() fetchcontent_declare(croaring @@ -369,9 +362,7 @@ function(resolve_nlohmann_json_dependency) set(NLOHMANN_JSON_URL "$ENV{ICEBERG_NLOHMANN_JSON_URL}") else() set(NLOHMANN_JSON_URL - "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" - "https://gitee.com/mirrors/JSON-for-Modern-CPP/releases/download/v3.11.3/json.tar.xz" - "https://hub.fastgit.xyz/nlohmann/json/releases/download/v3.11.3/json.tar.xz") + "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz") endif() fetchcontent_declare(nlohmann_json @@ -437,10 +428,7 @@ function(resolve_spdlog_dependency) if(DEFINED ENV{ICEBERG_SPDLOG_URL}) set(SPDLOG_URL "$ENV{ICEBERG_SPDLOG_URL}") else() - set(SPDLOG_URL - "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" - "https://gitee.com/mirrors/spdlog/repository/archive/v1.15.3.tar.gz" - "https://hub.fastgit.xyz/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz") + set(SPDLOG_URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz") endif() fetchcontent_declare(spdlog @@ -508,10 +496,7 @@ function(resolve_cpr_dependency) if(DEFINED ENV{ICEBERG_CPR_URL}) set(CPR_URL "$ENV{ICEBERG_CPR_URL}") else() - set(CPR_URL - "https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz" - "https://gitee.com/mirrors/cpr/repository/archive/1.12.0.tar.gz" - "https://hub.fastgit.xyz/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz") + set(CPR_URL "https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz") endif() fetchcontent_declare(cpr From 7b8a65cc892dc6ed6c2aae34441d12f4b81357cd Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 21 Nov 2025 17:09:23 +0530 Subject: [PATCH 4/5] Document env vars at top, remove BUILD_CHINA.md --- BUILD_CHINA.md | 118 ------------------ README.md | 4 +- .../IcebergThirdpartyToolchain.cmake | 16 +++ 3 files changed, 19 insertions(+), 119 deletions(-) delete mode 100644 BUILD_CHINA.md diff --git a/BUILD_CHINA.md b/BUILD_CHINA.md deleted file mode 100644 index 27a4c3e7c..000000000 --- a/BUILD_CHINA.md +++ /dev/null @@ -1,118 +0,0 @@ - - -# Building in China - -This guide helps developers in China who may experience network issues when downloading dependencies from GitHub or international mirrors. - -## Using Custom Mirror URLs - -If you experience download timeouts, you can override the default dependency URLs using environment variables: - -```bash -export ICEBERG_ARROW_URL="/apache-arrow-22.0.0.tar.gz" -export ICEBERG_NANOARROW_URL="/apache-arrow-nanoarrow-0.7.0.tar.gz" -export ICEBERG_CROARING_URL="/CRoaring-v4.3.11.tar.gz" -export ICEBERG_NLOHMANN_JSON_URL="/json-v3.11.3.tar.xz" -export ICEBERG_SPDLOG_URL="/spdlog-v1.15.3.tar.gz" -export ICEBERG_CPR_URL="/cpr-1.12.0.tar.gz" - -# For Avro (git repository): -export ICEBERG_AVRO_GIT_URL="/avro.git" -# Or if you have a tarball: -export ICEBERG_AVRO_URL="/avro.tar.gz" -``` - -Then build as usual: - -```bash -cmake -S . -B build -cmake --build build -``` - -## Alternative Solutions - -1. **Use system packages**: Install dependencies via your system package manager -2. **Use a proxy**: Set `https_proxy` environment variable -3. **Pre-download**: Manually download tarballs to `~/.cmake/Downloads/` - -## Getting Help - -If you continue experiencing build issues, please open an issue at https://github.com/apache/iceberg-cpp/issues with details about which dependency failed. - -# Building in China - -This guide helps developers in China build iceberg-cpp when network access to GitHub and other international sites is limited. - -## Mirror Support - -The build system automatically tries alternative download mirrors when the primary URL fails. All third-party dependencies have been configured with China-based mirrors. - -### Available Mirrors - -Dependencies are automatically downloaded from these mirror sites: - -**Apache Projects (Arrow, Nanoarrow):** -- Tsinghua University: https://mirrors.tuna.tsinghua.edu.cn/apache/ -- USTC: https://mirrors.ustc.edu.cn/apache/ - -**GitHub Projects (CRoaring, nlohmann-json, spdlog, cpr):** -- Gitee: https://gitee.com/mirrors/ -- FastGit: https://hub.fastgit.xyz/ - -**Note**: Avro requires a git repository (unreleased version). Automatic mirror fallback is not available for git repositories, but you can specify a custom git mirror using the `ICEBERG_AVRO_GIT_URL` environment variable. - -### Custom Mirror URLs - -To override the default mirrors, set environment variables before running CMake: - -```bash -export ICEBERG_ARROW_URL="https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" -export ICEBERG_NANOARROW_URL="https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" -export ICEBERG_CROARING_URL="https://gitee.com/mirrors/CRoaring/repository/archive/v4.3.11.tar.gz" -export ICEBERG_NLOHMANN_JSON_URL="https://gitee.com/mirrors/JSON-for-Modern-CPP/releases/download/v3.11.3/json.tar.xz" -export ICEBERG_SPDLOG_URL="https://gitee.com/mirrors/spdlog/repository/archive/v1.15.3.tar.gz" -export ICEBERG_CPR_URL="https://gitee.com/mirrors/cpr/repository/archive/1.12.0.tar.gz" - -# For Avro, you can use either a tarball URL or a git repository URL: -export ICEBERG_AVRO_URL="https://example.com/avro.tar.gz" # if you have a tarball -# OR -export ICEBERG_AVRO_GIT_URL="https://gitee.com/mirrors/avro.git" # for git mirror -``` - -Then build as usual: - -```bash -cmake -S . -B build -cmake --build build -``` - -## Troubleshooting - -**Download failures:** -- Try setting a specific mirror using environment variables -- Use a VPN or proxy: `export https_proxy=http://proxy:port` -- Pre-download tarballs to `~/.cmake/Downloads/` - -**Slow downloads:** -- The build will automatically retry with different mirrors -- Consider using Meson build system as an alternative - -**Still having issues?** -Open an issue at https://github.com/apache/iceberg-cpp/issues with details about which dependency failed and the error message. diff --git a/README.md b/README.md index affe87816..c247ecda3 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,9 @@ C++ implementation of [Apache Iceberg™](https://iceberg.apache.org/). - CMake 3.25 or higher - C++23 compliant compiler -> **Note**: For developers in China experiencing network issues when downloading dependencies, see [BUILD_CHINA.md](BUILD_CHINA.md) for mirror configuration. +## Customizing Dependency URLs + +If you experience network issues when downloading dependencies, you can customize the download URLs using environment variables. See the available options documented in [`cmake_modules/IcebergThirdpartyToolchain.cmake`](cmake_modules/IcebergThirdpartyToolchain.cmake#L24-L40). ## Build diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 9d69a38f5..04e01ed5a 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -22,6 +22,22 @@ set(ICEBERG_ARROW_INSTALL_INTERFACE_LIBS) # ---------------------------------------------------------------------- # Versions and URLs for toolchain builds +# +# The following environment variables can be set to customize dependency URLs: +# +# ICEBERG_ARROW_URL - Apache Arrow tarball URL +# ICEBERG_AVRO_URL - Apache Avro tarball URL +# ICEBERG_AVRO_GIT_URL - Apache Avro git repository URL +# ICEBERG_NANOARROW_URL - Nanoarrow tarball URL +# ICEBERG_CROARING_URL - CRoaring tarball URL +# ICEBERG_NLOHMANN_JSON_URL - nlohmann-json tarball URL +# ICEBERG_SPDLOG_URL - spdlog tarball URL +# ICEBERG_CPR_URL - cpr tarball URL +# +# Example usage: +# export ICEBERG_ARROW_URL="https://your-mirror.com/apache-arrow-22.0.0.tar.gz" +# cmake -S . -B build +# set(ICEBERG_ARROW_BUILD_VERSION "22.0.0") set(ICEBERG_ARROW_BUILD_SHA256_CHECKSUM From 24ad0411d5bc46df1d388b4603acb312b1ea590c Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 21 Nov 2025 22:45:56 +0530 Subject: [PATCH 5/5] Address PR feedback: Inline dependency URL documentation --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c247ecda3..883e6566a 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,25 @@ C++ implementation of [Apache Iceberg™](https://iceberg.apache.org/). ## Customizing Dependency URLs -If you experience network issues when downloading dependencies, you can customize the download URLs using environment variables. See the available options documented in [`cmake_modules/IcebergThirdpartyToolchain.cmake`](cmake_modules/IcebergThirdpartyToolchain.cmake#L24-L40). +If you experience network issues when downloading dependencies, you can customize the download URLs using environment variables. + +The following environment variables can be set to customize dependency URLs: + +- `ICEBERG_ARROW_URL`: Apache Arrow tarball URL +- `ICEBERG_AVRO_URL`: Apache Avro tarball URL +- `ICEBERG_AVRO_GIT_URL`: Apache Avro git repository URL +- `ICEBERG_NANOARROW_URL`: Nanoarrow tarball URL +- `ICEBERG_CROARING_URL`: CRoaring tarball URL +- `ICEBERG_NLOHMANN_JSON_URL`: nlohmann-json tarball URL +- `ICEBERG_SPDLOG_URL`: spdlog tarball URL +- `ICEBERG_CPR_URL`: cpr tarball URL + +Example usage: + +```bash +export ICEBERG_ARROW_URL="https://your-mirror.com/apache-arrow-22.0.0.tar.gz" +cmake -S . -B build +``` ## Build