Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch
docker compose -f dev/docker-compose-integration.yml rm -f
docker compose -f dev/docker-compose-integration.yml build --no-cache

test-integration-trino: ## Run tests marked with @pytest.mark.integration_trino
sh ./dev/run-trino.sh
$(TEST_RUNNER) pytest tests/ -m integration_trino $(PYTEST_ARGS)

test-s3: ## Run tests marked with @pytest.mark.s3
sh ./dev/run-minio.sh
$(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS)
Expand All @@ -130,7 +134,7 @@ test-gcs: ## Run tests marked with @pytest.mark.gcs
$(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS)

test-coverage: COVERAGE=1
test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report
test-coverage: test test-integration test-integration-trino test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report

coverage-report: ## Combine and report coverage
uv run $(PYTHON_ARG) coverage combine
Expand Down
15 changes: 15 additions & 0 deletions dev/docker-compose-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,21 @@ services:
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
- CATALOG_JDBC_STRICT__MODE=true

trino:
image: trinodb/trino:478
container_name: pyiceberg-trino
networks:
iceberg_net:
ports:
- 8082:8080
environment:
- CATALOG_MANAGEMENT=dynamic
Comment on lines +68 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This environment variable looks redundant since we're adding catalogs with properties files instead of CREATE CATALOG statement.

depends_on:
- rest
- hive
volumes:
- ./trino/catalog:/etc/trino/catalog
Copy link
Contributor

@ebyhr ebyhr Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: How about specifying the full path of properties files? Thus, we can use existing catalogs such memory and TPCH catalogs - they are helpful during development.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great!

minio:
image: minio/minio
container_name: pyiceberg-minio
Expand Down
96 changes: 96 additions & 0 deletions dev/docker-compose-trino.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
services:
rest:
image: apache/iceberg-rest-fixture
container_name: pyiceberg-rest
networks:
iceberg_net:
ports:
- 8181:8181
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3://warehouse/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000

trino:
image: trinodb/trino:478
container_name: pyiceberg-trino
networks:
iceberg_net:
ports:
- 8082:8080
environment:
- CATALOG_MANAGEMENT=dynamic
depends_on:
- rest
- hive
volumes:
- ./trino/catalog:/etc/trino/catalog
- ./trino/config.properties:/etc/trino/config.properties

minio:
image: minio/minio
container_name: pyiceberg-minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
- MINIO_DOMAIN=minio
networks:
iceberg_net:
aliases:
- warehouse.minio
ports:
- 9001:9001
- 9000:9000
command: ["server", "/data", "--console-address", ":9001"]
mc:
depends_on:
- minio
image: minio/mc
container_name: pyiceberg-mc
networks:
iceberg_net:
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"

hive:
build: hive/
container_name: hive
hostname: hive
networks:
iceberg_net:
ports:
- 9083:9083
environment:
SERVICE_NAME: "metastore"
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"

networks:
iceberg_net:
33 changes: 33 additions & 0 deletions dev/run-trino.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

set -ex

if [ $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]; then
echo "Trino service running"
else
docker compose -f dev/docker-compose-trino.yml kill
docker compose -f dev/docker-compose-trino.yml up -d
while [ -z $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]
do
echo "Waiting for Trino"
sleep 1
done
fi
29 changes: 29 additions & 0 deletions dev/trino/catalog/warehouse_hive.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
connector.name=iceberg
iceberg.catalog.type=hive_metastore
iceberg.expire-snapshots.min-retention=0d
iceberg.remove-orphan-files.min-retention=0d
iceberg.register-table-procedure.enabled=true
hive.metastore.uri=thrift://hive:9083
iceberg.hive-catalog-name=hive
fs.native-s3.enabled=true
s3.region=us-east-1
s3.aws-access-key=admin
s3.aws-secret-key=password
s3.endpoint=http://minio:9000
s3.path-style-access=false
31 changes: 31 additions & 0 deletions dev/trino/catalog/warehouse_rest.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
connector.name=iceberg
iceberg.catalog.type=rest
iceberg.rest-catalog.uri=http://rest:8181
iceberg.rest-catalog.warehouse=s3://warehouse/default
iceberg.rest-catalog.nested-namespace-enabled=true
iceberg.rest-catalog.case-insensitive-name-matching=true
iceberg.expire-snapshots.min-retention=0d
iceberg.remove-orphan-files.min-retention=0d
iceberg.register-table-procedure.enabled=true
fs.native-s3.enabled=true
s3.region=us-east-1
s3.aws-access-key=admin
s3.aws-secret-key=password
s3.endpoint=http://minio:9000
s3.path-style-access=false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: We could remove this s3.path-style-access=false. It's disabled by default.

23 changes: 23 additions & 0 deletions dev/trino/config.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Licensed to the Apache Software Foundation (ASF) under one
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we want to add config.properties‎ file?

Copy link
Contributor Author

@dingo4dev dingo4dev Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ebyhr , Thanks for your review.
My first initial thought is to configure the catalog.management to dynamic which help to add and test the glue catalog and dynamo catalog without restart the container. Then, I though adding it will help development experience, you can modify server configure for your own resources without update dockerfile yourself

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I understand the motivation now. The next question is why enabling CATALOG_MANAGEMENT environment variable is insufficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added catalog.management=${ENV:CATALOG_MANAGEMENT} to make sure it consistence while someone updating config.properties explicitly. Or we can just use catalog.management=dynamic and remove the env var.

# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
coordinator=true
node-scheduler.include-coordinator=true
http-server.http.port=8080
discovery.uri=http://localhost:8080
http-server.process-forwarded=true
http-server.https.enabled=false
catalog.management=${ENV:CATALOG_MANAGEMENT}
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ dev = [
"docutils!=0.21.post1",
"mypy-boto3-glue>=1.28.18",
"mypy-boto3-dynamodb>=1.28.18",
"trino[sqlalchemy]>=0.336.0",
]
# for mkdocs
docs = [
Expand Down Expand Up @@ -153,6 +154,7 @@ markers = [
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
"adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)",
"integration: marks integration tests against Apache Spark",
"integration_trino: marks integration tests against Trino",
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
"benchmark: collection of tests to validate read/write performance before and after a change",
]
Expand Down
35 changes: 35 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import pytest
from moto import mock_aws
from pydantic_core import to_json
from sqlalchemy import Connection

from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog.noop import NoopCatalog
Expand Down Expand Up @@ -143,6 +144,18 @@ def pytest_addoption(parser: pytest.Parser) -> None:
"--gcs.oauth2.token", action="store", default="anon", help="The GCS authentication method for tests marked gcs"
)
parser.addoption("--gcs.project-id", action="store", default="test", help="The GCP project for tests marked gcs")
parser.addoption(
"--trino.rest.endpoint",
action="store",
default="trino://test@localhost:8082/warehouse_rest",
help="The Trino REST endpoint URL for tests marked as integration_trino",
)
parser.addoption(
"--trino.hive.endpoint",
action="store",
default="trino://test@localhost:8082/warehouse_hive",
help="The Trino Hive endpoint URL for tests marked as integration_trino",
)


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -2574,6 +2587,28 @@ def bound_reference_uuid() -> BoundReference[str]:
return BoundReference(field=NestedField(1, "field", UUIDType(), required=False), accessor=Accessor(position=0, inner=None))


@pytest.fixture(scope="session")
def trino_hive_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
from sqlalchemy import create_engine

trino_endpoint = request.config.getoption("--trino.hive.endpoint")
engine = create_engine(trino_endpoint)
connection = engine.connect()
yield connection
connection.close()


@pytest.fixture(scope="session")
def trino_rest_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
from sqlalchemy import create_engine

trino_endpoint = request.config.getoption("--trino.rest.endpoint")
engine = create_engine(trino_endpoint)
connection = engine.connect()
yield connection
connection.close()


@pytest.fixture(scope="session")
def session_catalog() -> Catalog:
return load_catalog(
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/test_rest_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# pylint:disable=redefined-outer-name

import pytest
from sqlalchemy import Connection, inspect

from pyiceberg.catalog.rest import RestCatalog

Expand Down Expand Up @@ -61,3 +62,22 @@ def test_create_namespace_if_already_existing(catalog: RestCatalog) -> None:
catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)

assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER)


@pytest.mark.integration
@pytest.mark.integration_trino
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")])
def test_schema_exists_in_trino(trino_rest_conn: Connection, catalog: RestCatalog) -> None:
"""Verifies that an Iceberg namespace correctly appears as a schema in Trino.

This test ensures the synchronization between Iceberg's namespace concept and
Trino's schema concept, confirming that after creating a namespace in the Iceberg
catalog, it becomes visible as a schema in the Trino environment.
"""

if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)

assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER)
assert TEST_NAMESPACE_IDENTIFIER.lower() in inspect(trino_rest_conn).get_schema_names()
Loading