Skip to content

Commit 84f4a41

Browse files
authored
Merge pull request #1528 from Altinity/google_big_lake_catalog
Added backport for support of Google BigLake Metastore
2 parents fdd27ed + 497edb7 commit 84f4a41

6 files changed

Lines changed: 583 additions & 45 deletions

File tree

src/Core/SettingsEnums.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ IMPLEMENT_SETTING_ENUM(
345345
{"glue", DatabaseDataLakeCatalogType::GLUE},
346346
{"hive", DatabaseDataLakeCatalogType::ICEBERG_HIVE},
347347
{"onelake", DatabaseDataLakeCatalogType::ICEBERG_ONELAKE},
348+
{"biglake", DatabaseDataLakeCatalogType::ICEBERG_BIGLAKE},
348349
{"paimon_rest", DatabaseDataLakeCatalogType::PAIMON_REST}})
349350

350351
IMPLEMENT_SETTING_ENUM(

src/Core/SettingsEnums.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ enum class DatabaseDataLakeCatalogType : uint8_t
437437
GLUE,
438438
ICEBERG_HIVE,
439439
ICEBERG_ONELAKE,
440+
ICEBERG_BIGLAKE,
440441
PAIMON_REST,
441442
};
442443

src/Databases/DataLake/DatabaseDataLake.cpp

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
#include <Databases/DataLake/ICatalog.h>
1111
#include <Common/Exception.h>
1212
#include <Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h>
13+
#include <IO/ReadBufferFromFile.h>
14+
#include <IO/ReadBuffer.h>
15+
#include <IO/ReadHelpers.h>
16+
#include <Poco/JSON/Parser.h>
17+
#include <Poco/JSON/Object.h>
1318

1419

1520
#if USE_AVRO && USE_PARQUET
@@ -67,6 +72,14 @@ namespace DatabaseDataLakeSetting
6772
extern const DatabaseDataLakeSettingsString onelake_client_secret;
6873
extern const DatabaseDataLakeSettingsString dlf_access_key_id;
6974
extern const DatabaseDataLakeSettingsString dlf_access_key_secret;
75+
extern const DatabaseDataLakeSettingsString google_project_id;
76+
extern const DatabaseDataLakeSettingsString google_service_account;
77+
extern const DatabaseDataLakeSettingsString google_metadata_service;
78+
extern const DatabaseDataLakeSettingsString google_adc_client_id;
79+
extern const DatabaseDataLakeSettingsString google_adc_client_secret;
80+
extern const DatabaseDataLakeSettingsString google_adc_refresh_token;
81+
extern const DatabaseDataLakeSettingsString google_adc_quota_project_id;
82+
extern const DatabaseDataLakeSettingsString google_adc_credentials_file;
7083
}
7184

7285
namespace Setting
@@ -172,7 +185,7 @@ std::shared_ptr<DataLake::ICatalog> DatabaseDataLake::getCatalog() const
172185
}
173186
case DB::DatabaseDataLakeCatalogType::ICEBERG_ONELAKE:
174187
{
175-
catalog_impl = std::make_shared<DataLake::RestCatalog>(
188+
catalog_impl = std::make_shared<DataLake::OneLakeCatalog>(
176189
settings[DatabaseDataLakeSetting::warehouse].value,
177190
url,
178191
settings[DatabaseDataLakeSetting::onelake_tenant_id].value,
@@ -184,6 +197,75 @@ std::shared_ptr<DataLake::ICatalog> DatabaseDataLake::getCatalog() const
184197
Context::getGlobalContextInstance());
185198
break;
186199
}
200+
case DB::DatabaseDataLakeCatalogType::ICEBERG_BIGLAKE:
201+
{
202+
std::string google_project_id = settings[DatabaseDataLakeSetting::google_project_id].value;
203+
std::string google_service_account = settings[DatabaseDataLakeSetting::google_service_account].value;
204+
std::string google_metadata_service = settings[DatabaseDataLakeSetting::google_metadata_service].value;
205+
std::string google_adc_client_id = settings[DatabaseDataLakeSetting::google_adc_client_id].value;
206+
std::string google_adc_client_secret = settings[DatabaseDataLakeSetting::google_adc_client_secret].value;
207+
std::string google_adc_refresh_token = settings[DatabaseDataLakeSetting::google_adc_refresh_token].value;
208+
std::string google_adc_quota_project_id = settings[DatabaseDataLakeSetting::google_adc_quota_project_id].value;
209+
210+
if (settings[DatabaseDataLakeSetting::google_adc_credentials_file].changed)
211+
{
212+
try
213+
{
214+
const std::string & credentials_file_path = settings[DatabaseDataLakeSetting::google_adc_credentials_file].value;
215+
DB::ReadBufferFromFile file_buf(credentials_file_path);
216+
std::string json_str;
217+
DB::readStringUntilEOF(json_str, file_buf);
218+
219+
Poco::JSON::Parser parser;
220+
Poco::Dynamic::Var json = parser.parse(json_str);
221+
const Poco::JSON::Object::Ptr & object = json.extract<Poco::JSON::Object::Ptr>();
222+
223+
if (object->has("type"))
224+
{
225+
String type = object->get("type").extract<String>();
226+
if (type != "authorized_user")
227+
{
228+
throw DB::Exception(
229+
DB::ErrorCodes::BAD_ARGUMENTS,
230+
"Unsupported credentials type '{}' in Google ADC credentials file. Expected 'authorized_user'",
231+
type);
232+
}
233+
}
234+
235+
if (google_adc_client_id.empty() && object->has("client_id"))
236+
google_adc_client_id = object->get("client_id").extract<String>();
237+
if (google_adc_client_secret.empty() && object->has("client_secret"))
238+
google_adc_client_secret = object->get("client_secret").extract<String>();
239+
if (google_adc_refresh_token.empty() && object->has("refresh_token"))
240+
google_adc_refresh_token = object->get("refresh_token").extract<String>();
241+
if (google_adc_quota_project_id.empty() && object->has("quota_project_id"))
242+
google_adc_quota_project_id = object->get("quota_project_id").extract<String>();
243+
if (google_project_id.empty() && object->has("project_id"))
244+
google_project_id = object->get("project_id").extract<String>();
245+
}
246+
catch (const DB::Exception & e)
247+
{
248+
throw DB::Exception(
249+
DB::ErrorCodes::BAD_ARGUMENTS,
250+
"Failed to load Google ADC credentials from file '{}': {}",
251+
settings[DatabaseDataLakeSetting::google_adc_credentials_file].value,
252+
e.message());
253+
}
254+
}
255+
256+
catalog_impl = std::make_shared<DataLake::BigLakeCatalog>(
257+
settings[DatabaseDataLakeSetting::warehouse].value,
258+
url,
259+
google_project_id,
260+
google_service_account,
261+
google_metadata_service,
262+
google_adc_client_id,
263+
google_adc_client_secret,
264+
google_adc_refresh_token,
265+
google_adc_quota_project_id,
266+
Context::getGlobalContextInstance());
267+
break;
268+
}
187269
case DB::DatabaseDataLakeCatalogType::UNITY:
188270
{
189271
catalog_impl = std::make_shared<DataLake::UnityCatalog>(
@@ -279,6 +361,7 @@ StorageObjectStorageConfigurationPtr DatabaseDataLake::getConfiguration(
279361
}
280362
case DatabaseDataLakeCatalogType::ICEBERG_HIVE:
281363
case DatabaseDataLakeCatalogType::ICEBERG_REST:
364+
case DatabaseDataLakeCatalogType::ICEBERG_BIGLAKE:
282365
{
283366
switch (type)
284367
{
@@ -461,7 +544,7 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
461544
});
462545

463546
const bool with_vended_credentials = settings[DatabaseDataLakeSetting::vended_credentials].value;
464-
if (!lightweight && with_vended_credentials)
547+
if (with_vended_credentials)
465548
table_metadata = table_metadata.withStorageCredentials();
466549

467550
auto [namespace_name, table_name] = DataLake::parseTableName(name);
@@ -500,7 +583,7 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
500583
/// so we have a separate setting to know whether we should even try to fetch them.
501584
if (args.size() == 1)
502585
{
503-
std::array<DatabaseDataLakeCatalogType, 2> vended_credentials_catalogs = {DatabaseDataLakeCatalogType::ICEBERG_ONELAKE, DatabaseDataLakeCatalogType::PAIMON_REST};
586+
std::array<DatabaseDataLakeCatalogType, 3> vended_credentials_catalogs = {DatabaseDataLakeCatalogType::ICEBERG_ONELAKE, DatabaseDataLakeCatalogType::ICEBERG_BIGLAKE, DatabaseDataLakeCatalogType::PAIMON_REST};
504587
if (table_metadata.hasStorageCredentials())
505588
{
506589
LOG_DEBUG(log, "Getting credentials");
@@ -571,7 +654,7 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
571654
auto azure_configuration = std::static_pointer_cast<StorageAzureIcebergConfiguration>(configuration);
572655
if (!azure_configuration)
573656
throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration is not azure type for one lake catalog");
574-
auto rest_catalog = std::static_pointer_cast<DataLake::RestCatalog>(catalog);
657+
auto rest_catalog = std::static_pointer_cast<DataLake::OneLakeCatalog>(catalog);
575658
if (!rest_catalog)
576659
throw Exception(ErrorCodes::LOGICAL_ERROR, "Catalog is not equals to one lake");
577660
azure_configuration->setInitializationAsOneLake(
@@ -896,6 +979,7 @@ void registerDatabaseDataLake(DatabaseFactory & factory)
896979
{
897980
case DatabaseDataLakeCatalogType::ICEBERG_ONELAKE:
898981
case DatabaseDataLakeCatalogType::ICEBERG_REST:
982+
case DatabaseDataLakeCatalogType::ICEBERG_BIGLAKE:
899983
{
900984
if (!args.create_query.attach
901985
&& !args.context->getSettingsRef()[Setting::allow_experimental_database_iceberg])

src/Databases/DataLake/DatabaseDataLakeSettings.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ namespace ErrorCodes
3434
DECLARE(String, onelake_tenant_id, "", "Tenant id from azure", 0) \
3535
DECLARE(String, onelake_client_id, "", "Client id from azure", 0) \
3636
DECLARE(String, onelake_client_secret, "", "Client secret from azure", 0) \
37+
DECLARE(String, google_project_id, "", "Google Cloud project ID for BigLake. Required for BigLake catalog. Used in x-goog-user-project header. If not set and google_adc_quota_project_id is provided, it latter will be used", 0) \
38+
DECLARE(String, google_service_account, "", "Google Cloud service account email for metadata service authentication. Default: 'default'. Only used when ADC credentials are not provided", 0) \
39+
DECLARE(String, google_metadata_service, "", "Google Cloud metadata service endpoint for token retrieval. Default: 'metadata.google.internal'. Only used when ADC credentials are not provided", 0) \
40+
DECLARE(String, google_adc_client_id, "", "Google Application Default Credentials client_id for BigLake. Required if using ADC authentication instead of metadata service", 0) \
41+
DECLARE(String, google_adc_client_secret, "", "Google Application Default Credentials client_secret for BigLake. Required if using ADC authentication instead of metadata service", 0) \
42+
DECLARE(String, google_adc_refresh_token, "", "Google Application Default Credentials refresh_token for BigLake. Required if using ADC authentication instead of metadata service", 0) \
43+
DECLARE(String, google_adc_quota_project_id, "", "Google Application Default Credentials quota_project_id for BigLake. Optional, used if google_project_id is not set", 0) \
44+
DECLARE(String, google_adc_credentials_file, "", "Path to JSON file containing Google Application Default Credentials. If set, credentials will be loaded from this file. File should contain: type, client_id, client_secret, refresh_token, and optionally quota_project_id", 0) \
3745
DECLARE(String, dlf_access_key_id, "", "Access id of DLF token for Paimon REST Catalog", 0) \
3846
DECLARE(String, dlf_access_key_secret, "", "Access secret of DLF token for Paimon REST Catalog", 0) \
3947

0 commit comments

Comments
 (0)