diff --git a/sdk/cosmos/azure-cosmos-tests/THINCLIENT_TEST_MATRIX.md b/sdk/cosmos/azure-cosmos-tests/THINCLIENT_TEST_MATRIX.md new file mode 100644 index 000000000000..80d8f7af40a5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/THINCLIENT_TEST_MATRIX.md @@ -0,0 +1,212 @@ +# Thin Client E2E Test Matrix — Gateway V2 QueryPlan Support + +--- + +## 1. Query Tests (`ThinClientQueryE2ETest`) — 80 tests + +### Filtering (WHERE clause) +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testSelectAll` | `SELECT * FROM c` | Full scan | +| `testWhereEquality` | `SELECT * FROM c WHERE c.category = 'electronics'` | Equality filter | +| `testWhereEqualityParameterized` | `SELECT * FROM c WHERE c.category = @cat` | Parameterized query | +| `testWhereRangeGreaterThan` | `SELECT * FROM c WHERE c.age > 30` | Range (>) | +| `testWhereRangeLessThanOrEqual` | `SELECT * FROM c WHERE c.price <= 25.00` | Range (<=) | +| `testWhereRangeBetween` | `SELECT * FROM c WHERE c.age >= 18 AND c.age <= 40` | Range (between) | +| `testWhereIn` | `SELECT * FROM c WHERE c.category IN ('electronics', 'toys')` | IN operator | +| `testWhereCompoundAndOr` | `SELECT * FROM c WHERE c.status = 'active' AND (...)` | Compound AND/OR | +| `testWhereNotEqual` | `SELECT * FROM c WHERE c.status != 'inactive'` | Not equal | +| `testWhereBooleanField` | `SELECT * FROM c WHERE c.isActive = true` | Boolean filter | +| `testWhereIsDefined` | `SELECT * FROM c WHERE IS_DEFINED(c.address)` | IS_DEFINED | +| `testWhereStartsWith` | `SELECT * FROM c WHERE STARTSWITH(c.category, 'elec')` | STARTSWITH | +| `testWhereContains` | `SELECT * FROM c WHERE CONTAINS(c.category, 'ook')` | CONTAINS | +| `testWhereArrayContains` | `SELECT * FROM c WHERE ARRAY_CONTAINS(c.scores, 50)` | ARRAY_CONTAINS | +| `testWhereNestedProperty` | `SELECT * FROM c WHERE c.address.city = 'Seattle'` | Nested property | +| `testBetween` | `SELECT * FROM c WHERE c.age BETWEEN 18 AND 40` | BETWEEN keyword | + +### Projection +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testSelectSpecificFields` | `SELECT c.id, c.category, c.price FROM c` | Field projection | +| `testSelectComputedAlias` | `SELECT c.id, c.price * 1.1 AS taxedPrice FROM c` | Computed alias | +| `testSelectValueObject` | `SELECT VALUE { name: c.category, loc: c.address.city } FROM c` | VALUE with JSON object | +| `testSelectValueScalar` | `SELECT VALUE c.category FROM c` | VALUE scalar | + +### ORDER BY +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testOrderByAsc` | `SELECT * FROM c ORDER BY c.age` | ORDER BY ASC | +| `testOrderByDesc` | `SELECT * FROM c ORDER BY c.price DESC` | ORDER BY DESC | + +### DISTINCT +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testDistinctValue` | `SELECT DISTINCT VALUE c.category FROM c` | DISTINCT VALUE (string) | +| `testDistinctValueBoolean` | `SELECT DISTINCT VALUE c.isActive FROM c` | DISTINCT VALUE (boolean) | + +### TOP +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testTop` | `SELECT TOP 3 * FROM c` | TOP | +| `testTopWithOrderBy` | `SELECT TOP 5 * FROM c ORDER BY c.price DESC` | TOP + ORDER BY | + +### Aggregates +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testCount` | `SELECT VALUE COUNT(1) FROM c` | COUNT | +| `testSum` | `SELECT VALUE SUM(c.price) FROM c` | SUM | +| `testAvg` | `SELECT VALUE AVG(c.age) FROM c` | AVG | +| `testMin` | `SELECT VALUE MIN(c.price) FROM c` | MIN | +| `testMax` | `SELECT VALUE MAX(c.age) FROM c` | MAX | + +### GROUP BY +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testGroupByCount` | `SELECT c.category, COUNT(1) as cnt FROM c GROUP BY c.category` | GROUP BY + COUNT | +| `testGroupBySumAvg` | `SELECT c.category, SUM(c.price) as total, AVG(c.price) as avg FROM c GROUP BY c.category` | GROUP BY + SUM + AVG | + +### OFFSET / LIMIT +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testOffsetLimit` | `SELECT * FROM c ORDER BY c.idx OFFSET 3 LIMIT 4` | OFFSET + LIMIT | + +### JOIN (self-join on arrays) +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testJoinScoresArray` | `SELECT c.id, s AS score FROM c JOIN s IN c.scores` | JOIN (int array) | +| `testJoinWithFilter` | `SELECT c.id, s AS score FROM c JOIN s IN c.scores WHERE s >= 50` | JOIN + WHERE | +| `testJoinTagsArray` | `SELECT c.id, t AS tag FROM c JOIN t IN c.tags` | JOIN (string array) | + +### EXISTS subquery +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testExistsSubquery` | `SELECT * FROM c WHERE EXISTS (SELECT VALUE s FROM s IN c.scores WHERE s > 60)` | EXISTS | +| `testExistsSubqueryWithStringMatch` | `SELECT * FROM c WHERE EXISTS (SELECT VALUE t FROM t IN c.tags WHERE t = 'on-sale')` | EXISTS + string match | +| `testExistsAliasInProjection` | `SELECT c.id, EXISTS (...) AS hasHighScore FROM c` | EXISTS in projection | + +### LIKE +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testLikePrefix` | `SELECT * FROM c WHERE c.category LIKE 'elec%'` | LIKE prefix | +| `testLikeSuffix` | `SELECT * FROM c WHERE c.category LIKE '%ing'` | LIKE suffix | +| `testLikeContains` | `SELECT * FROM c WHERE c.category LIKE '%ook%'` | LIKE contains | + +### String Functions +| Test | SQL | Function | +|------|-----|----------| +| `testStringConcat` | `SELECT CONCAT(c.category, '-', c.status) AS label FROM c` | CONCAT | +| `testStringEndsWith` | `SELECT * FROM c WHERE ENDSWITH(c.category, 'ics')` | ENDSWITH | +| `testStringLower` | `SELECT LOWER(c.category) AS lowerCat FROM c` | LOWER | +| `testStringUpper` | `SELECT UPPER(c.status) AS upperStatus FROM c` | UPPER | +| `testStringLength` | `SELECT c.category, LENGTH(c.category) AS len FROM c` | LENGTH | +| `testStringSubstring` | `SELECT SUBSTRING(c.category, 0, 4) AS prefix FROM c` | SUBSTRING | +| `testStringReplace` | `SELECT REPLACE(c.category, 'o', '0') AS replaced FROM c` | REPLACE | +| `testStringIndexOf` | `SELECT INDEX_OF(c.category, 'o') AS pos FROM c` | INDEX_OF | +| `testStringLeft` | `SELECT LEFT(c.category, 3) AS l FROM c` | LEFT | +| `testStringReverse` | `SELECT REVERSE(c.category) AS rev FROM c` | REVERSE | +| `testStringTrim` | `SELECT TRIM(c.status) AS trimmed FROM c` | TRIM | +| `testRegexMatch` | `SELECT * FROM c WHERE RegexMatch(c.category, '^elec.*')` | RegexMatch | + +### Type Checking Functions +| Test | SQL | Function | +|------|-----|----------| +| `testIsArray` | `SELECT c.id, IS_ARRAY(c.scores) AS isArr FROM c` | IS_ARRAY | +| `testIsBool` | `SELECT c.id, IS_BOOL(c.isActive) AS isBool FROM c` | IS_BOOL | +| `testIsNull` | `SELECT * FROM c WHERE IS_NULL(c.nonExistentField)` | IS_NULL | +| `testIsNumber` | `SELECT c.id, IS_NUMBER(c.age) AS isNum FROM c` | IS_NUMBER | +| `testIsString` | `SELECT c.id, IS_STRING(c.category) AS isStr FROM c` | IS_STRING | +| `testIsObject` | `SELECT c.id, IS_OBJECT(c.address) AS isObj FROM c` | IS_OBJECT | + +### Math Functions +| Test | SQL | Function | +|------|-----|----------| +| `testMathAbs` | `SELECT ABS(c.age - 30) AS diff FROM c` | ABS | +| `testMathCeilingFloor` | `SELECT CEILING(c.price) AS ceil, FLOOR(c.price) AS flr FROM c` | CEILING, FLOOR | +| `testMathRound` | `SELECT ROUND(c.price) AS rounded FROM c` | ROUND | +| `testMathPower` | `SELECT POWER(c.age, 2) AS ageSq FROM c` | POWER | +| `testMathSqrt` | `SELECT SQRT(c.price) AS sqrtPrice FROM c` | SQRT | + +### Array Functions +| Test | SQL | Function | +|------|-----|----------| +| `testArrayLength` | `SELECT c.id, ARRAY_LENGTH(c.scores) AS len FROM c` | ARRAY_LENGTH | +| `testArraySlice` | `SELECT c.id, ARRAY_SLICE(c.tags, 0, 1) AS firstTag FROM c` | ARRAY_SLICE | + +### Conditional Functions +| Test | SQL | Function | +|------|-----|----------| +| `testIif` | `SELECT c.id, IIF(c.age >= 18, 'adult', 'minor') AS ageGroup FROM c` | IIF | + +### Date/Time Functions +| Test | SQL | Function | +|------|-----|----------| +| `testGetCurrentDateTime` | `SELECT VALUE GetCurrentDateTime()` | GetCurrentDateTime | + +### Cross-Partition +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testCrossPartitionSelectAll` | `SELECT * FROM c ORDER BY c.idx` | Cross-partition (no PK filter) | +| `testCrossPartitionWhereFilter` | `SELECT * FROM c WHERE c.category = 'electronics' ORDER BY c.idx` | Cross-partition + filter | + +### Multi-Range (creates dedicated container with multiple PKs) +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testMultiRangePartitionKeyInClause` | `SELECT * FROM c WHERE c.mypk IN (pk1, pk3, pk5)` | Multi-range IN | +| `testMultiRangePartitionKeyOrClause` | `SELECT * FROM c WHERE c.mypk = 'pk-or-1' OR c.mypk = 'pk-or-3'` | Multi-range OR | +| `testMultiRangeManyPartitionKeys` | `SELECT * FROM c WHERE c.mypk IN (pk1..pk10)` | Multi-range (10 PKs) | + +### Continuation Token +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testContinuationTokenDraining` | `SELECT * FROM c` (page size 2) | Pagination / continuation tokens | + +### Error Handling +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testInvalidQueryReturnsBadRequest` | `SELEC * FORM c` (invalid) | 400 BadRequest validation | + +### Vector Search (requires `EnableNoSQLVectorSearch` capability) +| Test | SQL | Query Feature | +|------|-----|---------------| +| `testVectorSearchGatewayVsThinClient` | `SELECT TOP 5 c.id, VectorDistance(c.embedding, [...]) AS score FROM c ORDER BY VectorDistance(...)` | VectorDistance + FLAT index | +| `testFullTextSearchGatewayVsThinClient` | `SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, 'mountain')` | FullTextContains | +| `testHybridSearchGatewayVsThinClient` | `SELECT TOP 3 * FROM c ORDER BY RANK RRF(VectorDistance(...), FullTextScore(...))` | Hybrid RRF (vector + full-text) | + +--- + +## 2. Point Operations (`ThinClientPointOperationE2ETest`) — 3 tests + +| Test | Operation | Coverage | +|------|-----------|----------| +| `testThinClientDocumentPointOperations` | Create, Read, Replace, Upsert, Patch, Delete | Full CRUD + Patch lifecycle | +| `testThinClientBulk` | Bulk create + bulk read | Bulk operations | +| `testThinClientBatch` | Transactional batch (create + read) | CosmosBatch | + +--- + +## 3. Change Feed (`ThinClientChangeFeedE2ETest`) — 3 tests + +| Test | FeedRange | Coverage | +|------|-----------|----------| +| `testThinClientIncrementalChangeFeed` | `FeedRange.forLogicalPartition(pk)` | Incremental change feed (via batch insert) | +| `testThinClientChangeFeedFullRange` | `FeedRange.forFullRange()` | Cross-partition change feed | +| `testThinClientChangeFeedPartitionKey` | `FeedRange.forLogicalPartition(pk)` | Single-PK feed with exact count + PK validation | + +--- + +## 4. Stored Procedures (`ThinClientStoredProcedureE2ETest`) — 3 tests + +| Test | Operation | Coverage | +|------|-----------|----------| +| `testThinClientStoredProcedure` | Create + execute sproc | Sproc creates a document, verifies execution | +| `testStoredProcedureExecutionWithoutPartitionKeyThrows` | Execute without PK | Validates 400 error | +| `testThinClientStoredProcedureWithPartitionKeyNone` | Execute with `PartitionKey.NONE` | Non-partitioned sproc execution | + +--- + +## Test Infrastructure + +- **Test data**: 10 diverse documents seeded per partition (categories, prices, ages, nested objects, arrays, tags, booleans) +- **Shared container**: `/mypk` partition key, reused across query tests +- **Comparison method**: Direct TCP vs Thin Client (HTTP/2 → Proxy), assert identical results +- **Endpoint validation**: Every test asserts thin client used `:10250` endpoint, gateway used `:443` diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index 6d019db1dabb..b42e6ed08ed3 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -848,6 +848,9 @@ Licensed under the MIT License. true + ${cosmos.use.aad.auth} + true + true 1 256 paranoid diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java index 43bebbe52b52..af8404655116 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java @@ -1981,14 +1981,21 @@ private void validateChannelAcquisitionContext(CosmosDiagnostics diagnostics, bo private String generateHttp2OptedInUserAgentIfRequired(String userAgent) { // Mirrors RxDocumentClientImpl.addUserAgentSuffix + UserAgentContainer.setFeatureEnabledFlagsAsSuffix: // when HTTP/2 is enabled, the Http2 bit is set; when PING keepalive is also effectively enabled - // (kill-switch on AND positive interval), the Http2PingHealth bit is OR'd in. + // (kill-switch on AND positive interval), the Http2PingHealth bit is OR'd in. ThinClient is set when + // COSMOS.THINCLIENT_ENABLED is true (default true after Gateway V2 default enablement). // Tests here do not override Http2ConnectionConfig.setEnabled(...) so the per-client override branch // in addUserAgentSuffix is a no-op for this helper. + int featureValue = 0; + if (Configs.isThinClientEnabled()) { + featureValue |= UserAgentFeatureFlags.ThinClient.getValue(); + } if (Configs.isHttp2Enabled()) { - int featureValue = UserAgentFeatureFlags.Http2.getValue(); + featureValue |= UserAgentFeatureFlags.Http2.getValue(); if (Configs.isHttp2PingHealthEnabled() && Configs.getHttp2PingIntervalInSeconds() > 0) { featureValue |= UserAgentFeatureFlags.Http2PingHealth.getValue(); } + } + if (featureValue != 0) { userAgent = userAgent + "|F" + Integer.toHexString(featureValue).toUpperCase(Locale.ROOT); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosNotFoundTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosNotFoundTests.java index 1abf1a589200..d1ac19fba94e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosNotFoundTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosNotFoundTests.java @@ -52,6 +52,16 @@ public CosmosNotFoundTests(CosmosClientBuilder clientBuilder) { @BeforeClass(groups = {"fast", "thinclient"}, timeOut = SETUP_TIMEOUT) public void before_CosmosNotFoundTests() { + // Thin-client routing tests in this class (the "thinclient" group) assert that + // requests actually went through the proxy on port 10250 via assertThinClientEndpointUsed + // and rely on proxy-specific substatus codes (e.g. OWNER_RESOURCE_NOT_EXISTS = 1003). + // The connectivity probe is enabled by default in production, but the proxy-side + // /connectivity-probe endpoint is not deployed in every CI test account yet. With the + // default failure threshold of 1, a single failed probe cycle flips routing from the + // proxy to Gateway V1, breaking these assertions. Disable the probe here so the routing + // path under test is exercised deterministically; production callers still get the + // probe ON by default. Cleared in @AfterClass to avoid leaking into other test classes. + System.setProperty("COSMOS.THINCLIENT_PROBE_ENABLED", "false"); executeWithRetry(() -> { safeClose(this.commonAsyncClient); this.commonAsyncClient = getClientBuilder().buildAsyncClient(); @@ -88,6 +98,7 @@ public static Object[][] operationTypeProvider() { @AfterClass(groups = {"fast", "thinclient"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) public void afterClass() { safeClose(this.commonAsyncClient); + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); } @Test(groups = {"fast"}, dataProvider = "operationTypeProvider", timeOut = TIMEOUT) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PerPartitionCircuitBreakerE2ETests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PerPartitionCircuitBreakerE2ETests.java index 14c98fcd0896..c874a2e26aab 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PerPartitionCircuitBreakerE2ETests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PerPartitionCircuitBreakerE2ETests.java @@ -240,6 +240,13 @@ public PerPartitionCircuitBreakerE2ETests(CosmosClientBuilder cosmosClientBuilde @BeforeClass(groups = {"circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "multi-region", "fi-thinclient-multi-master"}) public void beforeClass() { + // The "fi-thinclient-multi-master" tests assert thinclient routing via + // assertThinClientEndpointUsed. The connectivity probe is ON by default in production, but the + // proxy-side /connectivity-probe endpoint is not deployed in every CI account yet, so with the + // default failure threshold of 1 it would flip routing to Gateway V1 and break those assertions. + // Disable here so the routing path under test is exercised deterministically; other groups in + // this class (circuit-breaker-* / multi-region) are unaffected. Cleared in @AfterClass. + System.setProperty("COSMOS.THINCLIENT_PROBE_ENABLED", "false"); try (CosmosAsyncClient testClient = getClientBuilder().buildAsyncClient()) { RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(testClient); GlobalEndpointManager globalEndpointManager = documentClient.getGlobalEndpointManager(); @@ -4597,6 +4604,7 @@ public void afterClass() { safeClose(dummyClient); } } + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); } private static class ResponseWrapper { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/UserAgentSuffixTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/UserAgentSuffixTest.java index ceb5a9044682..84c19f2723f4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/UserAgentSuffixTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/UserAgentSuffixTest.java @@ -120,12 +120,19 @@ private void validateUserAgentSuffix(String actualUserAgent, String expectedUser // Mirrors RxDocumentClientImpl.addUserAgentSuffix + UserAgentContainer.setFeatureEnabledFlagsAsSuffix: // when HTTP/2 is enabled, the Http2 bit is set; when PING keepalive is also effectively enabled - // (kill-switch on AND positive interval), the Http2PingHealth bit is OR'd in. + // (kill-switch on AND positive interval), the Http2PingHealth bit is OR'd in. ThinClient is set when + // COSMOS.THINCLIENT_ENABLED is true (default true after Gateway V2 default enablement). + int featureValue = 0; + if (Configs.isThinClientEnabled()) { + featureValue |= UserAgentFeatureFlags.ThinClient.getValue(); + } if (Configs.isHttp2Enabled()) { - int featureValue = UserAgentFeatureFlags.Http2.getValue(); + featureValue |= UserAgentFeatureFlags.Http2.getValue(); if (Configs.isHttp2PingHealthEnabled() && Configs.getHttp2PingIntervalInSeconds() > 0) { featureValue |= UserAgentFeatureFlags.Http2PingHealth.getValue(); } + } + if (featureValue != 0) { expectedUserAgentSuffix = expectedUserAgentSuffix + "|F" + Integer.toHexString(featureValue).toUpperCase(Locale.ROOT); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/Http2PingKeepaliveTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/Http2PingKeepaliveTest.java index 99a5f25a793d..1076619b6f03 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/Http2PingKeepaliveTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/Http2PingKeepaliveTest.java @@ -86,6 +86,15 @@ public void beforeClass() { // set externally (Maven profile or -D) so a single test class covers both transports // across two pipeline runs. System.setProperty("COSMOS.HTTP2_ENABLED", "true"); + // Disable the thin-client connectivity probe for the duration of this test. + // The probe fires HTTP/2 POSTs to thin-client port 10250 on every account refresh; + // when this test installs an iptables DROP on port 10250 (THIN_CLIENT_ENABLED=true + // branch) those probe requests time out, the probe trips + // isProxyProbeHealthy()->false, and routing falls back to Gateway V1 on port 443 + // -- bypassing the very PING handler this test is exercising. Disable the probe + // so EndpointProbeClient.proxyHealthy stays optimistically true and the data plane + // request actually flows over port 10250 where the iptables DROP can take effect. + System.setProperty("COSMOS.THINCLIENT_PROBE_ENABLED", "false"); logger.info("Transport selected: thinClient={}, h2Port={}", THIN_CLIENT_ENABLED, H2_PORT); this.client = getClientBuilder().buildAsyncClient(); @@ -101,6 +110,7 @@ public void beforeClass() { public void afterClass() { safeClose(this.client); System.clearProperty("COSMOS.HTTP2_ENABLED"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); } @BeforeMethod(groups = {"manual-http-network-fault"}) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java index f1ea462ca638..f9a61997a9ad 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java @@ -207,7 +207,10 @@ public void rntbd() throws Exception { assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("(cto:PT5S, nrto:PT5S, icto:PT0S, ieto:PT1H, mcpe:130, mrpc:30, cer:true)"); String http2Enabled = Configs.isHttp2Enabled() ? "true" : "false"; - assertThat(objectNode.get("connCfg").get("gw").asText()).isEqualTo("(cps:1000, nrto:PT1M, icto:PT1M, cto:PT45S, gwV2Cto:n/a, p:false, http2:(enabled:"+ http2Enabled + ", maxc:1000, minc:" + Math.max(8, Runtime.getRuntime().availableProcessors()) + ", maxs:30))"); + String gwV2Cto = Configs.isThinClientEnabled() + ? Duration.ofMillis(Configs.getThinClientConnectionTimeoutInMs()).toString() + : "n/a"; + assertThat(objectNode.get("connCfg").get("gw").asText()).isEqualTo("(cps:1000, nrto:PT1M, icto:PT1M, cto:PT45S, gwV2Cto:" + gwV2Cto + ", p:false, http2:(enabled:"+ http2Enabled + ", maxc:1000, minc:" + Math.max(8, Runtime.getRuntime().availableProcessors()) + ", maxs:30))"); assertThat(objectNode.get("connCfg").get("other").asText()).isEqualTo("(ed: false, cs: false, rv: true)"); } @@ -244,7 +247,10 @@ public void gw() throws Exception { assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("null"); String http2Enabled = Configs.isHttp2Enabled() ? "true" : "false"; - assertThat(objectNode.get("connCfg").get("gw").asText()).isEqualTo("(cps:500, nrto:PT18S, icto:PT17S, cto:PT45S, gwV2Cto:n/a, p:false, http2:(enabled:" + http2Enabled + ", maxc:1000, minc:" + Math.max(8, Runtime.getRuntime().availableProcessors()) + ", maxs:30))"); + String gwV2Cto = Configs.isThinClientEnabled() + ? Duration.ofMillis(Configs.getThinClientConnectionTimeoutInMs()).toString() + : "n/a"; + assertThat(objectNode.get("connCfg").get("gw").asText()).isEqualTo("(cps:500, nrto:PT18S, icto:PT17S, cto:PT45S, gwV2Cto:" + gwV2Cto + ", p:false, http2:(enabled:" + http2Enabled + ", maxc:1000, minc:" + Math.max(8, Runtime.getRuntime().availableProcessors()) + ", maxs:30))"); assertThat(objectNode.get("connCfg").get("other").asText()).isEqualTo("(ed: false, cs: false, rv: true)"); } @@ -318,7 +324,10 @@ public void full( assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("null"); String http2Enabled = Configs.isHttp2Enabled() ? "true" : "false"; - assertThat(objectNode.get("connCfg").get("gw").asText()).isEqualTo("(cps:500, nrto:PT18S, icto:PT17S, cto:PT45S, gwV2Cto:n/a, p:false, http2:(enabled:" + http2Enabled + ", maxc:1000, minc:" + Math.max(8, Runtime.getRuntime().availableProcessors()) + ", maxs:30))"); + String gwV2Cto = Configs.isThinClientEnabled() + ? Duration.ofMillis(Configs.getThinClientConnectionTimeoutInMs()).toString() + : "n/a"; + assertThat(objectNode.get("connCfg").get("gw").asText()).isEqualTo("(cps:500, nrto:PT18S, icto:PT17S, cto:PT45S, gwV2Cto:" + gwV2Cto + ", p:false, http2:(enabled:" + http2Enabled + ", maxc:1000, minc:" + Math.max(8, Runtime.getRuntime().availableProcessors()) + ", maxs:30))"); assertThat(objectNode.get("connCfg").get("other").asText()).isEqualTo("(ed: true, cs: true, rv: false)"); assertThat(objectNode.get("excrgns").asText()).isEqualTo("[westus2]"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConfigsTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConfigsTests.java index c814714ce2ee..d0d28a33b5ee 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConfigsTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConfigsTests.java @@ -232,8 +232,19 @@ public void http2MaxFrameSizeInBytes() { @Test(groups = { "emulator" }) public void thinClientEnabledTest() { - assertThat(isThinClientEnabled()).isFalse(); + // Default flipped to true in the probe-flow work; explicit "false" must still opt out. System.clearProperty("COSMOS.THINCLIENT_ENABLED"); + try { + assertThat(isThinClientEnabled()).isTrue(); + } finally { + System.clearProperty("COSMOS.THINCLIENT_ENABLED"); + } + System.setProperty("COSMOS.THINCLIENT_ENABLED", "false"); + try { + assertThat(isThinClientEnabled()).isFalse(); + } finally { + System.clearProperty("COSMOS.THINCLIENT_ENABLED"); + } System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); try { assertThat(isThinClientEnabled()).isTrue(); @@ -242,6 +253,92 @@ public void thinClientEnabledTest() { } } + @Test(groups = { "unit" }) + public void thinClientProbeEnabledDefaultTest() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + try { + assertThat(Configs.isThinClientProbeEnabled()).isTrue(); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbeEnabledOverrideTest() { + System.setProperty("COSMOS.THINCLIENT_PROBE_ENABLED", "false"); + try { + assertThat(Configs.isThinClientProbeEnabled()).isFalse(); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbeFailureThresholdDefaultTest() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + try { + assertThat(Configs.getThinClientProbeFailureThreshold()).isEqualTo(1); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbeFailureThresholdOverrideTest() { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "5"); + try { + assertThat(Configs.getThinClientProbeFailureThreshold()).isEqualTo(5); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbeFailureThresholdCoercionTest() { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "0"); + try { + assertThat(Configs.getThinClientProbeFailureThreshold()).isEqualTo(1); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "-3"); + try { + assertThat(Configs.getThinClientProbeFailureThreshold()).isEqualTo(1); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbeFailureThresholdInvalidFallsBackToDefaultTest() { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "not-a-number"); + try { + assertThat(Configs.getThinClientProbeFailureThreshold()).isEqualTo(1); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbePathDefaultTest() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_PATH"); + try { + assertThat(Configs.getThinClientProbePath()).isEqualTo("/connectivity-probe"); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_PATH"); + } + } + + @Test(groups = { "unit" }) + public void thinClientProbePathOverrideTest() { + System.setProperty("COSMOS.THINCLIENT_PROBE_PATH", "/custom-probe"); + try { + assertThat(Configs.getThinClientProbePath()).isEqualTo("/custom-probe"); + } finally { + System.clearProperty("COSMOS.THINCLIENT_PROBE_PATH"); + } + } + @Test(groups = { "unit" }) public void thinClientConnectionTimeoutDefaultTest() { // Default thin client connection timeout should be 5000 milliseconds diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/EndpointProbeClientTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/EndpointProbeClientTests.java new file mode 100644 index 000000000000..c208a6fb1343 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/EndpointProbeClientTests.java @@ -0,0 +1,321 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation; + +import com.azure.cosmos.implementation.http.HttpClient; +import com.azure.cosmos.implementation.http.HttpHeaders; +import com.azure.cosmos.implementation.http.HttpRequest; +import com.azure.cosmos.implementation.http.HttpResponse; +import io.netty.buffer.ByteBuf; +import org.mockito.Mockito; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; +import reactor.core.publisher.Mono; + +import java.net.ConnectException; +import java.net.URI; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; + +public class EndpointProbeClientTests { + + private static final URI REGION_EAST = URI.create("https://probe-east.example.com:10250"); + private static final URI REGION_WEST = URI.create("https://probe-west.example.com:10250"); + + @BeforeMethod(groups = { "unit" }) + public void resetSystemProperties() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_RECOVERY_THRESHOLD"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_PATH"); + } + + @AfterMethod(groups = { "unit" }) + public void clearSystemProperties() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_RECOVERY_THRESHOLD"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_PATH"); + } + + @Test(groups = { "unit" }) + public void allGreen_cycleIsGreen_andHealthStaysTrue() { + Map statusByEndpoint = new HashMap<>(); + statusByEndpoint.put(REGION_EAST, 200); + statusByEndpoint.put(REGION_WEST, 200); + AtomicInteger sendCount = new AtomicInteger(0); + HttpClient client = mockClient(statusByEndpoint, sendCount, false); + + EndpointProbeClient probeClient = new EndpointProbeClient(client); + + Instant before = Instant.now(); + Boolean healthy = probeClient.runProbeCycle(Arrays.asList(REGION_EAST, REGION_WEST)).block(); + + assertThat(healthy).isTrue(); + assertThat(probeClient.isProxyHealthy()).isTrue(); + assertThat(sendCount.get()).isEqualTo(2); + EndpointProbeClient.DiagnosticsSnapshot snap = probeClient.getDiagnosticsSnapshot(); + assertThat(snap.getLastCycleSuccess()).isEqualTo(Boolean.TRUE); + assertThat(snap.getLastStateUpdatedAt()).isNotNull(); + assertThat(snap.getLastStateUpdatedAt()).isAfterOrEqualTo(before); + } + + @Test(groups = { "unit" }) + public void any503_failsTheCycle_andHysteresisDelaysFlip() { + // Threshold = 2: one RED cycle should NOT flip; two RED cycles SHOULD flip. + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "2"); + + Map statusByEndpoint = new HashMap<>(); + statusByEndpoint.put(REGION_EAST, 200); + statusByEndpoint.put(REGION_WEST, 503); + HttpClient client = mockClient(statusByEndpoint, new AtomicInteger(), false); + + EndpointProbeClient probeClient = new EndpointProbeClient(client); + + // Cycle 1: RED but below threshold — gate stays HEALTHY. + assertThat(probeClient.runProbeCycle(Arrays.asList(REGION_EAST, REGION_WEST)).block()).isTrue(); + assertThat(probeClient.isProxyHealthy()).isTrue(); + assertThat(probeClient.getDiagnosticsSnapshot().getLastCycleSuccess()).isEqualTo(Boolean.FALSE); + + // Cycle 2: RED at threshold -> flip. + assertThat(probeClient.runProbeCycle(Arrays.asList(REGION_EAST, REGION_WEST)).block()).isFalse(); + assertThat(probeClient.isProxyHealthy()).isFalse(); + assertThat(probeClient.getDiagnosticsSnapshot().getLastCycleSuccess()).isEqualTo(Boolean.FALSE); + } + + @Test(groups = { "unit" }) + public void singleGreenCycleRestoresHealthAndResetsCounter() { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "1"); + + Map redByEndpoint = new HashMap<>(); + redByEndpoint.put(REGION_EAST, 503); + EndpointProbeClient probeClient = new EndpointProbeClient(mockClient(redByEndpoint, new AtomicInteger(), false)); + + assertThat(probeClient.runProbeCycle(Collections.singletonList(REGION_EAST)).block()).isFalse(); + assertThat(probeClient.isProxyHealthy()).isFalse(); + + // Toggling client returns red on first call and green on subsequent calls; drive the + // probe client to RED on cycle 1 then GREEN on cycle 2 and assert hysteresis recovery. + EndpointProbeClient combo = new EndpointProbeClient(toggleClient(REGION_EAST, 503, 200)); + + assertThat(combo.runProbeCycle(Collections.singletonList(REGION_EAST)).block()).isFalse(); + assertThat(combo.isProxyHealthy()).isFalse(); + + assertThat(combo.runProbeCycle(Collections.singletonList(REGION_EAST)).block()).isTrue(); + assertThat(combo.isProxyHealthy()).isTrue(); + assertThat(combo.getDiagnosticsSnapshot().getLastCycleSuccess()).isEqualTo(Boolean.TRUE); + } + + @Test(groups = { "unit" }) + public void transportErrorIsRed() { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "1"); + + HttpClient client = Mockito.mock(HttpClient.class); + Mockito.doAnswer(inv -> Mono.error(new ConnectException("refused"))) + .when(client).send(any(HttpRequest.class), any(Duration.class)); + Mockito.doAnswer(inv -> Mono.error(new ConnectException("refused"))) + .when(client).send(any(HttpRequest.class)); + + EndpointProbeClient probeClient = new EndpointProbeClient(client); + + assertThat(probeClient.runProbeCycle(Collections.singletonList(REGION_EAST)).block()).isFalse(); + assertThat(probeClient.isProxyHealthy()).isFalse(); + } + + @Test(groups = { "unit" }) + public void featureFlagOff_isNoOp() { + System.setProperty("COSMOS.THINCLIENT_PROBE_ENABLED", "false"); + + HttpClient client = Mockito.mock(HttpClient.class); + EndpointProbeClient probeClient = new EndpointProbeClient(client); + + Boolean healthy = probeClient.runProbeCycle(Arrays.asList(REGION_EAST, REGION_WEST)).block(); + assertThat(healthy).isTrue(); + assertThat(probeClient.isProxyHealthy()).isTrue(); + Mockito.verify(client, Mockito.never()).send(any(HttpRequest.class), any(Duration.class)); + Mockito.verify(client, Mockito.never()).send(any(HttpRequest.class)); + } + + @Test(groups = { "unit" }) + public void emptyOrNullEndpointSet_isNoOp() { + HttpClient client = Mockito.mock(HttpClient.class); + EndpointProbeClient probeClient = new EndpointProbeClient(client); + + assertThat(probeClient.runProbeCycle(null).block()).isTrue(); + assertThat(probeClient.runProbeCycle(Collections.emptyList()).block()).isTrue(); + Mockito.verify(client, Mockito.never()).send(any(HttpRequest.class), any(Duration.class)); + Mockito.verify(client, Mockito.never()).send(any(HttpRequest.class)); + } + + @Test(groups = { "unit" }) + public void wrongPath400_isRed() { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "1"); + Map statusByEndpoint = new HashMap<>(); + statusByEndpoint.put(REGION_EAST, 400); + EndpointProbeClient probeClient = new EndpointProbeClient(mockClient(statusByEndpoint, new AtomicInteger(), false)); + assertThat(probeClient.runProbeCycle(Collections.singletonList(REGION_EAST)).block()).isFalse(); + } + + @Test(groups = { "unit" }) + public void probeRequestTargetsConfiguredPath() { + Map statusByEndpoint = new HashMap<>(); + statusByEndpoint.put(REGION_EAST, 200); + AtomicInteger sendCount = new AtomicInteger(0); + HttpClient client = mockClient(statusByEndpoint, sendCount, true); + + EndpointProbeClient probeClient = new EndpointProbeClient(client); + probeClient.runProbeCycle(Collections.singletonList(REGION_EAST)).block(); + + assertThat(sendCount.get()).isEqualTo(1); + } + + @Test(groups = { "unit" }) + public void recoveryThresholdRequiresMultipleGreenCycles() { + // Operator opts into more conservative recovery: require two consecutive GREEN cycles + // before flipping back to healthy. With default failureThreshold=1 the probe client + // becomes UNHEALTHY after one RED. A single GREEN must NOT restore traffic. + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "1"); + System.setProperty("COSMOS.THINCLIENT_PROBE_RECOVERY_THRESHOLD", "2"); + + // Sequenced client returns RED, GREEN, GREEN across successive probe calls + // against the single regional endpoint. + HttpClient sequencedClient = sequencedClient(REGION_EAST, 503, 200, 200); + EndpointProbeClient e = new EndpointProbeClient(sequencedClient); + + // RED #1 — hits failure threshold of 1, gate flips UNHEALTHY. + e.runProbeCycle(Collections.singletonList(REGION_EAST)).block(); + assertThat(e.isProxyHealthy()).isFalse(); + + // GREEN #1 — under recovery threshold of 2, gate STAYS UNHEALTHY. + e.runProbeCycle(Collections.singletonList(REGION_EAST)).block(); + assertThat(e.isProxyHealthy()).isFalse(); + + // GREEN #2 — second consecutive GREEN restores healthy. + e.runProbeCycle(Collections.singletonList(REGION_EAST)).block(); + assertThat(e.isProxyHealthy()).isTrue(); + } + + @Test(groups = { "unit" }) + public void forceUnhealthy_flipsGateToRedWithoutRunningProbe() { + // Safeguard path used by GlobalEndpointManager when account topology says thin-client + // is eligible but LocationCache cannot resolve a single thin-client regional endpoint. + // Without a fan-out, this method must still flip the gate so the optimistic-startup + // default does not pin traffic to an unreachable thin-client store model. + Map greenByEndpoint = new HashMap<>(); + greenByEndpoint.put(REGION_EAST, 200); + EndpointProbeClient probeClient = new EndpointProbeClient(mockClient(greenByEndpoint, new AtomicInteger(), false)); + assertThat(probeClient.isProxyHealthy()).isTrue(); + + probeClient.forceUnhealthy("test: endpoint resolution mismatch"); + assertThat(probeClient.isProxyHealthy()).isFalse(); + assertThat(probeClient.getDiagnosticsSnapshot().getLastCycleSuccess()).isEqualTo(Boolean.FALSE); + assertThat(probeClient.getDiagnosticsSnapshot().getLastStateUpdatedAt()).isNotNull(); + } + + @Test(groups = { "unit" }) + public void forceUnhealthy_onClosedProbeClient_isNoOp() { + Map greenByEndpoint = new HashMap<>(); + greenByEndpoint.put(REGION_EAST, 200); + EndpointProbeClient probeClient = new EndpointProbeClient(mockClient(greenByEndpoint, new AtomicInteger(), false)); + probeClient.close(); + + // Closed probe clients must not mutate any state — otherwise diagnostics from a + // shutting-down client would show spurious failures. + probeClient.forceUnhealthy("test"); + // Snapshot should remain at its pre-close state (no cycle ever ran). + assertThat(probeClient.getDiagnosticsSnapshot().getLastCycleSuccess()).isNull(); + } + + private static HttpClient sequencedClient(URI endpoint, int... statuses) { + HttpClient client = Mockito.mock(HttpClient.class); + AtomicInteger callCount = new AtomicInteger(0); + Mockito.doAnswer(inv -> { + int n = callCount.getAndIncrement(); + int status = statuses[Math.min(n, statuses.length - 1)]; + return Mono.just(stubResponse(status)); + }).when(client).send(any(HttpRequest.class), any(Duration.class)); + Mockito.doAnswer(inv -> { + int n = callCount.getAndIncrement(); + int status = statuses[Math.min(n, statuses.length - 1)]; + return Mono.just(stubResponse(status)); + }).when(client).send(any(HttpRequest.class)); + return client; + } + + // --- Mock helpers --- + + private static HttpClient mockClient( + Map statusByHost, + AtomicInteger sendCount, + boolean assertPathAndMethod) { + + HttpClient client = Mockito.mock(HttpClient.class); + Mockito.doAnswer(inv -> { + HttpRequest req = inv.getArgument(0); + sendCount.incrementAndGet(); + if (assertPathAndMethod) { + assertThat(req.httpMethod().name()).isEqualToIgnoringCase("POST"); + assertThat(req.uri().getPath()).isEqualTo("/connectivity-probe"); + } + int status = lookupStatus(statusByHost, req.uri()); + return Mono.just(stubResponse(status)); + }).when(client).send(any(HttpRequest.class), any(Duration.class)); + + Mockito.doAnswer(inv -> { + HttpRequest req = inv.getArgument(0); + sendCount.incrementAndGet(); + int status = lookupStatus(statusByHost, req.uri()); + return Mono.just(stubResponse(status)); + }).when(client).send(any(HttpRequest.class)); + return client; + } + + private static HttpClient toggleClient(URI endpoint, int firstStatus, int subsequentStatus) { + HttpClient client = Mockito.mock(HttpClient.class); + AtomicInteger callCount = new AtomicInteger(0); + Mockito.doAnswer(inv -> { + int n = callCount.incrementAndGet(); + int status = n == 1 ? firstStatus : subsequentStatus; + return Mono.just(stubResponse(status)); + }).when(client).send(any(HttpRequest.class), any(Duration.class)); + Mockito.doAnswer(inv -> { + int n = callCount.incrementAndGet(); + int status = n == 1 ? firstStatus : subsequentStatus; + return Mono.just(stubResponse(status)); + }).when(client).send(any(HttpRequest.class)); + return client; + } + + private static int lookupStatus(Map statusByHost, URI requestUri) { + for (Map.Entry e : statusByHost.entrySet()) { + if (requestUri.getHost() != null && requestUri.getHost().equalsIgnoreCase(e.getKey().getHost())) { + return e.getValue(); + } + } + return 500; + } + + private static HttpResponse stubResponse(int status) { + // Use Mono.empty() so the production body-drain path is not exercised with a singleton + // ByteBuf whose refCnt would underflow across multiple probe calls and silently throw + // IllegalReferenceCountException into the swallowed error handler. This mirrors real + // ReactorNettyHttpResponse.body() behavior on an empty HTTP/2 response. + return new HttpResponse() { + @Override public int statusCode() { return status; } + @Override public String headerValue(String name) { return null; } + @Override public HttpHeaders headers() { return new HttpHeaders(); } + @Override public Mono body() { return Mono.empty(); } + @Override public Mono bodyAsString() { return Mono.just(""); } + @Override public void close() { } + }; + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientE2ETest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientE2ETest.java deleted file mode 100644 index 5589d147783b..000000000000 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientE2ETest.java +++ /dev/null @@ -1,378 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. -package com.azure.cosmos.implementation; - -import com.azure.cosmos.ConsistencyLevel; -import com.azure.cosmos.CosmosAsyncClient; -import com.azure.cosmos.CosmosAsyncContainer; -import com.azure.cosmos.CosmosClientBuilder; -import com.azure.cosmos.CosmosDiagnostics; -import com.azure.cosmos.CosmosDiagnosticsContext; -import com.azure.cosmos.CosmosDiagnosticsRequestInfo; -import com.azure.cosmos.FlakyTestRetryAnalyzer; -import com.azure.cosmos.models.CosmosBatch; -import com.azure.cosmos.models.CosmosBatchResponse; -import com.azure.cosmos.models.CosmosBulkItemResponse; -import com.azure.cosmos.models.CosmosBulkOperationResponse; -import com.azure.cosmos.models.CosmosBulkOperations; -import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; -import com.azure.cosmos.models.CosmosQueryRequestOptions; -import com.azure.cosmos.models.FeedRange; -import com.azure.cosmos.models.PartitionKey; -import com.azure.cosmos.models.SqlQuerySpec; -import com.azure.cosmos.models.SqlParameter; -import com.azure.cosmos.models.FeedResponse; -import com.azure.cosmos.models.CosmosContainerProperties; -import com.azure.cosmos.models.ThroughputProperties; -import com.azure.cosmos.models.CosmosItemResponse; -import com.azure.cosmos.models.CosmosItemRequestOptions; -import com.azure.cosmos.models.CosmosPatchOperations; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testng.annotations.Test; -import reactor.core.publisher.Flux; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.UUID; - -import static org.assertj.core.api.AssertionsForClassTypes.assertThat; -import static org.assertj.core.api.Fail.fail; - -// End to end sanity tests for basic thin client functionality. -public class ThinClientE2ETest { - private static final Logger logger = LoggerFactory.getLogger(ThinClientE2ETest.class); - private static final String thinClientEndpointIndicator = ":10250/"; - - @Test(groups = {"thinclient"}, retryAnalyzer = FlakyTestRetryAnalyzer.class) - public void testThinClientQuery() { - CosmosAsyncClient client = null; - try { - // If running locally, uncomment these lines - // System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); - // System.setProperty("COSMOS.HTTP2_ENABLED", "true"); - - client = new CosmosClientBuilder() - .endpoint(TestConfigurations.HOST) - .key(TestConfigurations.MASTER_KEY) - .gatewayMode() - .consistencyLevel(ConsistencyLevel.SESSION) - .buildAsyncClient(); - - CosmosAsyncContainer container = client.getDatabase("db1").getContainer("c2"); - String idName = "id"; - String partitionKeyName = "partitionKey"; - ObjectMapper mapper = new ObjectMapper(); - ObjectNode doc = mapper.createObjectNode(); - String idValue = UUID.randomUUID().toString(); - doc.put(idName, idValue); - doc.put(partitionKeyName, idValue); - - container.createItem(doc, new PartitionKey(idValue), null).block(); - - String query = "select * from c WHERE c." + partitionKeyName + "=@id"; - SqlQuerySpec querySpec = new SqlQuerySpec(query); - querySpec.setParameters(Arrays.asList(new SqlParameter("@id", idValue))); - CosmosQueryRequestOptions requestOptions = - new CosmosQueryRequestOptions().setPartitionKey(new PartitionKey(idValue)); - FeedResponse response = container - .queryItems(querySpec, requestOptions, ObjectNode.class) - .byPage() - .blockFirst(); - - ObjectNode docFromResponse = response.getResults().get(0); - assertThat(docFromResponse.get(partitionKeyName).textValue()).isEqualTo(idValue); - assertThat(docFromResponse.get(idName).textValue()).isEqualTo(idValue); - assertThinClientEndpointUsed(response.getCosmosDiagnostics()); - - } finally { - if (client != null) { - client.close(); - } - } - } - - @Test(groups = {"thinclient"}, retryAnalyzer = FlakyTestRetryAnalyzer.class) - public void testThinClientBulk() { - CosmosAsyncClient client = null; - try { - // If running locally, uncomment these lines - // System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); - // System.setProperty("COSMOS.HTTP2_ENABLED", "true"); - - client = new CosmosClientBuilder() - .endpoint(TestConfigurations.HOST) - .key(TestConfigurations.MASTER_KEY) - .gatewayMode() - .consistencyLevel(ConsistencyLevel.EVENTUAL) - .buildAsyncClient(); - - CosmosAsyncContainer container = client.getDatabase("db1").getContainer("c2"); - String idName = "id"; - String partitionKeyName = "partitionKey"; - ObjectMapper mapper = new ObjectMapper(); - ObjectNode doc = mapper.createObjectNode(); - String idValue = UUID.randomUUID().toString(); - doc.put(idName, idValue); - doc.put(partitionKeyName, idValue); - - Flux> responsesFlux = container.executeBulkOperations(Flux.just( - CosmosBulkOperations.getCreateItemOperation(doc, new PartitionKey(idValue)) - )); - - List> responses = responsesFlux.collectList().block(); - - assertThat(responses.size()).isEqualTo(1); - assertThat(responses.get(0).getException()).isNull(); - CosmosBulkItemResponse bulkResponse = responses.get(0).getResponse(); - assertThat(bulkResponse.isSuccessStatusCode()).isEqualTo(true); - assertThinClientEndpointUsed(bulkResponse.getCosmosDiagnostics()); - } finally { - if (client != null) { - client.close(); - } - } - } - - @Test(groups = {"thinclient"}, retryAnalyzer = FlakyTestRetryAnalyzer.class) - public void testThinClientBatch() { - CosmosAsyncClient client = null; - try { - // If running locally, uncomment these lines - // System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); - // System.setProperty("COSMOS.HTTP2_ENABLED", "true"); - - client = new CosmosClientBuilder() - .endpoint(TestConfigurations.HOST) - .key(TestConfigurations.MASTER_KEY) - .gatewayMode() - .consistencyLevel(ConsistencyLevel.SESSION) - .buildAsyncClient(); - - CosmosAsyncContainer container = client.getDatabase("db1").getContainer("c2"); - String idName = "id"; - String partitionKeyName = "partitionKey"; - ObjectMapper mapper = new ObjectMapper(); - String pkValue = UUID.randomUUID().toString(); - ObjectNode doc1 = mapper.createObjectNode(); - String idValue1 = UUID.randomUUID().toString(); - doc1.put(idName, idValue1); - doc1.put(partitionKeyName, pkValue); - - ObjectNode doc2 = mapper.createObjectNode(); - String idValue2 = UUID.randomUUID().toString(); - doc2.put(idName, idValue2); - doc2.put(partitionKeyName, pkValue); - - CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); - batch.createItemOperation(doc1); - batch.createItemOperation(doc2); - - CosmosBatchResponse response = container - .executeCosmosBatch(batch) - .block(); - - assertThat(response.getStatusCode()).isEqualTo(200); - assertThinClientEndpointUsed(response.getDiagnostics()); - } finally { - if (client != null) { - client.close(); - } - } - } - - @Test(groups = {"thinclient"}, retryAnalyzer = FlakyTestRetryAnalyzer.class) - public void testThinClientIncrementalChangeFeed() { - CosmosAsyncClient client = null; - try { - // If running locally, uncomment these lines -// System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); -// System.setProperty("COSMOS.HTTP2_ENABLED", "true"); - - client = new CosmosClientBuilder() - .endpoint(TestConfigurations.HOST) - .key(TestConfigurations.MASTER_KEY) - .gatewayMode() - .consistencyLevel(ConsistencyLevel.SESSION) - .buildAsyncClient(); - - CosmosAsyncContainer container = client.getDatabase("db1").getContainer("c2"); - String idName = "id"; - String partitionKeyName = "partitionKey"; - ObjectMapper mapper = new ObjectMapper(); - String pkValue = UUID.randomUUID().toString(); - ObjectNode doc1 = mapper.createObjectNode(); - String idValue1 = UUID.randomUUID().toString(); - doc1.put(idName, idValue1); - doc1.put(partitionKeyName, pkValue); - - ObjectNode doc2 = mapper.createObjectNode(); - String idValue2 = UUID.randomUUID().toString(); - doc2.put(idName, idValue2); - doc2.put(partitionKeyName, pkValue); - - CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); - batch.createItemOperation(doc1); - batch.createItemOperation(doc2); - - CosmosBatchResponse response = container - .executeCosmosBatch(batch) - .block(); - - FeedResponse changeFeedResponse = container - .queryChangeFeed(CosmosChangeFeedRequestOptions.createForProcessingFromBeginning(FeedRange.forFullRange()), ObjectNode.class) - .byPage() - .blockFirst(); - - assertThat(changeFeedResponse).isNotNull(); - assertThat(changeFeedResponse.getResults()).isNotNull(); - assertThat(changeFeedResponse.getResults().size()).isGreaterThanOrEqualTo(1); - assertThinClientEndpointUsed(changeFeedResponse.getCosmosDiagnostics()); - } finally { - if (client != null) { - client.close(); - } - } - } - - private static void assertThinClientEndpointUsed(CosmosDiagnostics diagnostics) { - assertThat(diagnostics).isNotNull(); - - CosmosDiagnosticsContext ctx = diagnostics.getDiagnosticsContext(); - assertThat(ctx).isNotNull(); - - Collection requests = ctx.getRequestInfo(); - assertThat(requests).isNotNull(); - assertThat(requests.size()).isPositive(); - - for (CosmosDiagnosticsRequestInfo requestInfo : requests) { - logger.info( - "Endpoint: {}, RequestType: {}, Partition: {}/{}, ActivityId: {}", - requestInfo.getEndpoint(), - requestInfo.getRequestType(), - requestInfo.getPartitionId(), - requestInfo.getPartitionKeyRangeId(), - requestInfo.getActivityId()); - if (requestInfo.getEndpoint().contains(thinClientEndpointIndicator)) { - return; - } - } - - fail("No request targeting thin client proxy endpoint."); - } - - - @Test(groups = {"thinclient"}, retryAnalyzer = FlakyTestRetryAnalyzer.class) - public void testThinClientDocumentPointOperations() { - CosmosAsyncClient client = null; - try { - // if running locally, uncomment these lines - // System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); - // System.setProperty("COSMOS.HTTP2_ENABLED", "true"); - - client = new CosmosClientBuilder() - .endpoint(TestConfigurations.HOST) - .key(TestConfigurations.MASTER_KEY) - .gatewayMode() - .consistencyLevel(ConsistencyLevel.SESSION) - .buildAsyncClient(); - - String idName = "id"; - String partitionKeyName = "partitionKey"; - - client.createDatabaseIfNotExists("db1").block(); - - CosmosContainerProperties containerDef = - new CosmosContainerProperties("c2", "/" + partitionKeyName); - ThroughputProperties ruCfg = ThroughputProperties.createManualThroughput(35_000); - - client.getDatabase("db1").createContainerIfNotExists(containerDef, ruCfg).block(); - - CosmosAsyncContainer container = client.getDatabase("db1").getContainer("c2"); - - ObjectMapper mapper = new ObjectMapper(); - ObjectNode doc = mapper.createObjectNode(); - String idValue = UUID.randomUUID().toString(); - doc.put(idName, idValue); - doc.put(partitionKeyName, idValue); - - // create - CosmosItemResponse createResponse = container.createItem(doc).block(); - assertThat(createResponse.getStatusCode()).isEqualTo(201); - assertThat(createResponse.getRequestCharge()).isGreaterThan(0.0); - assertThinClientEndpointUsed(createResponse.getDiagnostics()); - - // read - CosmosItemResponse readResponse = container.readItem(idValue, new PartitionKey(idValue), ObjectNode.class).block(); - assertThat(readResponse.getStatusCode()).isEqualTo(200); - assertThat(readResponse.getRequestCharge()).isGreaterThan(0.0); - assertThinClientEndpointUsed(readResponse.getDiagnostics()); - - ObjectNode doc2 = mapper.createObjectNode(); - String idValue2 = UUID.randomUUID().toString(); - doc2.put(idName, idValue2); - doc2.put(partitionKeyName, idValue); - - // replace - CosmosItemResponse replaceResponse = container.replaceItem(doc2, idValue, new PartitionKey(idValue)).block(); - assertThat(replaceResponse.getStatusCode()).isEqualTo(200); - assertThat(replaceResponse.getRequestCharge()).isGreaterThan(0.0); - assertThinClientEndpointUsed(replaceResponse.getDiagnostics()); - - CosmosItemResponse readAfterReplaceResponse = container.readItem(idValue2, new PartitionKey(idValue), ObjectNode.class).block(); - assertThat(readAfterReplaceResponse.getStatusCode()).isEqualTo(200); - ObjectNode replacedItemFromRead = readAfterReplaceResponse.getItem(); - assertThat(replacedItemFromRead.get(idName).asText()).isEqualTo(idValue2); - assertThat(replacedItemFromRead.get(partitionKeyName).asText()).isEqualTo(idValue); - assertThinClientEndpointUsed(readAfterReplaceResponse.getDiagnostics()); - - ObjectNode doc3 = mapper.createObjectNode(); - doc3.put(idName, idValue2); - doc3.put(partitionKeyName, idValue); - doc3.put("newField", "newValue"); - - // upsert - CosmosItemResponse upsertResponse = container.upsertItem(doc3, new PartitionKey(idValue), new CosmosItemRequestOptions()).block(); - assertThat(upsertResponse.getStatusCode()).isEqualTo(200); - assertThat(upsertResponse.getRequestCharge()).isGreaterThan(0.0); - assertThinClientEndpointUsed(upsertResponse.getDiagnostics()); - - CosmosItemResponse readAfterUpsertResponse = container.readItem(idValue2, new PartitionKey(idValue), ObjectNode.class).block(); - ObjectNode upsertedItemFromRead = readAfterUpsertResponse.getItem(); - assertThat(upsertedItemFromRead.get(idName).asText()).isEqualTo(idValue2); - assertThat(upsertedItemFromRead.get(partitionKeyName).asText()).isEqualTo(idValue); - assertThat(upsertedItemFromRead.get("newField").asText()).isEqualTo("newValue"); - assertThinClientEndpointUsed(readAfterUpsertResponse.getDiagnostics()); - - // patch - CosmosPatchOperations patchOperations = CosmosPatchOperations.create(); - patchOperations.add("/anotherNewField", "anotherNewValue"); - patchOperations.replace("/newField", "patchedNewField"); - CosmosItemResponse patchResponse = container.patchItem(idValue2, new PartitionKey(idValue), patchOperations, ObjectNode.class).block(); - assertThat(patchResponse.getStatusCode()).isEqualTo(200); - assertThat(patchResponse.getRequestCharge()).isGreaterThan(0.0); - assertThinClientEndpointUsed(patchResponse.getDiagnostics()); - - CosmosItemResponse readAfterPatchResponse = container.readItem(idValue2, new PartitionKey(idValue), ObjectNode.class).block(); - ObjectNode patchedItemFromRead = readAfterPatchResponse.getItem(); - assertThat(patchedItemFromRead.get(idName).asText()).isEqualTo(idValue2); - assertThat(patchedItemFromRead.get(partitionKeyName).asText()).isEqualTo(idValue); - assertThat(patchedItemFromRead.get("newField").asText()).isEqualTo("patchedNewField"); - assertThat(patchedItemFromRead.get("anotherNewField").asText()).isEqualTo("anotherNewValue"); - assertThinClientEndpointUsed(readAfterPatchResponse.getDiagnostics()); - - // delete - CosmosItemResponse deleteResponse = container.deleteItem(idValue2, new PartitionKey(idValue)).block(); - assertThat(deleteResponse.getStatusCode()).isEqualTo(204); - assertThat(deleteResponse.getRequestCharge()).isGreaterThan(0.0); - assertThinClientEndpointUsed(deleteResponse.getDiagnostics()); - } finally { - if (client != null) { - client.close(); - } - } - } -} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientProbeWiringTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientProbeWiringTests.java new file mode 100644 index 000000000000..5c68d7a9d852 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientProbeWiringTests.java @@ -0,0 +1,296 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation; + +import com.azure.cosmos.DirectConnectionConfig; +import com.azure.cosmos.implementation.http.HttpClient; +import com.azure.cosmos.implementation.http.HttpHeaders; +import com.azure.cosmos.implementation.http.HttpRequest; +import com.azure.cosmos.implementation.http.HttpResponse; +import com.azure.cosmos.implementation.routing.LocationCache; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.lang.reflect.Field; +import java.net.URI; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; + +/** + * Verifies the connectivity-probe orchestrator is correctly wired into + * {@link GlobalEndpointManager} and that {@link com.azure.cosmos.implementation.routing.LocationCache} + * exposes thin-client regional endpoints discovered during topology refresh. + * + *

These tests cover the integration boundary that the routing gate + * {@code RxDocumentClientImpl.useThinClientStoreModel} relies on: + *

    + *
  • Default health is optimistic ({@code true}) before any probes complete.
  • + *
  • {@code isProxyProbeHealthy()} remains optimistic when no orchestrator is wired + * (preserves pre-existing GW v1 / direct-mode behavior).
  • + *
  • Once an HttpClient is wired, the orchestrator probes the regional endpoints + * discovered by {@code LocationCache.getThinClientRegionalEndpoints()}.
  • + *
  • An empty thin-client region set does not trigger probe traffic.
  • + *
+ */ +public class ThinClientProbeWiringTests { + + private static final int TIMEOUT = 60_000; + + private static final String DB_ACCOUNT_WITH_THINCLIENT_LOCATIONS = + "{\"_self\":\"\",\"id\":\"testaccount\",\"_rid\":\"testaccount.documents.azure.com\"," + + "\"writableLocations\":[{\"name\":\"East US\",\"databaseAccountEndpoint\":\"https://testaccount-eastus.documents.azure.com:443/\"}]," + + "\"readableLocations\":[{\"name\":\"East US\",\"databaseAccountEndpoint\":\"https://testaccount-eastus.documents.azure.com:443/\"}," + + "{\"name\":\"East Asia\",\"databaseAccountEndpoint\":\"https://testaccount-eastasia.documents.azure.com:443/\"}]," + + "\"thinClientWritableLocations\":[{\"name\":\"East US\",\"databaseAccountEndpoint\":\"https://testaccount-eastus.documents.azure.com:10250/\"}]," + + "\"thinClientReadableLocations\":[{\"name\":\"East US\",\"databaseAccountEndpoint\":\"https://testaccount-eastus.documents.azure.com:10250/\"}," + + "{\"name\":\"East Asia\",\"databaseAccountEndpoint\":\"https://testaccount-eastasia.documents.azure.com:10250/\"}]," + + "\"enableMultipleWriteLocations\":false,\"userReplicationPolicy\":{\"asyncReplication\":false,\"minReplicaSetSize\":3,\"maxReplicasetSize\":4}," + + "\"userConsistencyPolicy\":{\"defaultConsistencyLevel\":\"Session\"},\"systemReplicationPolicy\":{\"minReplicaSetSize\":3,\"maxReplicasetSize\":4}," + + "\"readPolicy\":{\"primaryReadCoefficient\":1,\"secondaryReadCoefficient\":1}}"; + + private static final String DB_ACCOUNT_NO_THINCLIENT_LOCATIONS = + "{\"_self\":\"\",\"id\":\"testaccount\",\"_rid\":\"testaccount.documents.azure.com\"," + + "\"writableLocations\":[{\"name\":\"East US\",\"databaseAccountEndpoint\":\"https://testaccount-eastus.documents.azure.com:443/\"}]," + + "\"readableLocations\":[{\"name\":\"East US\",\"databaseAccountEndpoint\":\"https://testaccount-eastus.documents.azure.com:443/\"}]," + + "\"enableMultipleWriteLocations\":false,\"userReplicationPolicy\":{\"asyncReplication\":false,\"minReplicaSetSize\":3,\"maxReplicasetSize\":4}," + + "\"userConsistencyPolicy\":{\"defaultConsistencyLevel\":\"Session\"},\"systemReplicationPolicy\":{\"minReplicaSetSize\":3,\"maxReplicasetSize\":4}," + + "\"readPolicy\":{\"primaryReadCoefficient\":1,\"secondaryReadCoefficient\":1}}"; + + private DatabaseAccountManagerInternal databaseAccountManagerInternal; + + @BeforeClass(groups = "unit") + public void setup() { + databaseAccountManagerInternal = Mockito.mock(DatabaseAccountManagerInternal.class); + } + + @BeforeMethod(groups = { "unit" }) + public void resetSystemProperties() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + + @AfterMethod(groups = { "unit" }) + public void clearSystemProperties() { + System.clearProperty("COSMOS.THINCLIENT_PROBE_ENABLED"); + System.clearProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"); + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void isProxyProbeHealthy_returnsTrueWhenNoOrchestratorWired() throws Exception { + GlobalEndpointManager gem = newGemWithAccount(DB_ACCOUNT_WITH_THINCLIENT_LOCATIONS); + try { + // No setThinClientHttpClient() called -> optimistic default keeps existing routing decisions intact. + assertThat(gem.isProxyProbeHealthy()).isTrue(); + assertThat(gem.getThinClientProbeDiagnostics()).isNull(); + } finally { + LifeCycleUtils.closeQuietly(gem); + } + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void locationCache_exposesThinClientRegionalEndpoints() throws Exception { + GlobalEndpointManager gem = newGemWithAccount(DB_ACCOUNT_WITH_THINCLIENT_LOCATIONS); + try { + LocationCache locationCache = getLocationCache(gem); + Set thinclientEndpoints = locationCache.getThinClientRegionalEndpoints(); + + assertThat(thinclientEndpoints).hasSize(2); + assertThat(thinclientEndpoints).contains( + URI.create("https://testaccount-eastus.documents.azure.com:10250/"), + URI.create("https://testaccount-eastasia.documents.azure.com:10250/")); + } finally { + LifeCycleUtils.closeQuietly(gem); + } + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void locationCache_returnsEmptySetWhenNoThinClientLocations() throws Exception { + GlobalEndpointManager gem = newGemWithAccount(DB_ACCOUNT_NO_THINCLIENT_LOCATIONS); + try { + LocationCache locationCache = getLocationCache(gem); + Set thinclientEndpoints = locationCache.getThinClientRegionalEndpoints(); + + assertThat(thinclientEndpoints).isEmpty(); + assertThat(gem.hasThinClientReadLocations()).isFalse(); + } finally { + LifeCycleUtils.closeQuietly(gem); + } + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void setThinClientHttpClient_triggersProbeOnRefresh() throws Exception { + AtomicInteger probeCallCount = new AtomicInteger(0); + Map statusByEndpoint = new HashMap<>(); + statusByEndpoint.put(URI.create("https://testaccount-eastus.documents.azure.com:10250/connectivity-probe"), 200); + statusByEndpoint.put(URI.create("https://testaccount-eastasia.documents.azure.com:10250/connectivity-probe"), 200); + HttpClient httpClient = stubHttpClient(statusByEndpoint, probeCallCount); + + DatabaseAccount databaseAccount = new DatabaseAccount(DB_ACCOUNT_WITH_THINCLIENT_LOCATIONS); + Mockito.when(databaseAccountManagerInternal.getDatabaseAccountFromEndpoint(any())).thenReturn(Flux.just(databaseAccount)); + Mockito.when(databaseAccountManagerInternal.getServiceEndpoint()).thenReturn(new URI("https://testaccount.documents.azure.com:443")); + + ConnectionPolicy connectionPolicy = new ConnectionPolicy(DirectConnectionConfig.getDefaultConfig()); + connectionPolicy.setEndpointDiscoveryEnabled(true); + connectionPolicy.setMultipleWriteRegionsEnabled(true); + + GlobalEndpointManager gem = new GlobalEndpointManager(databaseAccountManagerInternal, connectionPolicy, new Configs()); + try { + // Wire BEFORE init so the first refresh probes (mirrors RxDocumentClientImpl.init() sequence). + gem.setThinClientHttpClient(httpClient); + gem.init(); + + // Probe is fire-and-forget on a scheduler -> wait briefly for it to run. + waitForProbeCallCount(probeCallCount, 2, Duration.ofSeconds(5)); + + assertThat(probeCallCount.get()).as("probe was issued for each thin-client region").isGreaterThanOrEqualTo(2); + assertThat(gem.isProxyProbeHealthy()).as("after all-200 cycle, proxy is healthy").isTrue(); + assertThat(gem.getThinClientProbeDiagnostics()).isNotNull(); + assertThat(gem.getThinClientProbeDiagnostics().getLastCycleSuccess()).isEqualTo(Boolean.TRUE); + assertThat(gem.getThinClientProbeDiagnostics().getLastStateUpdatedAt()).isNotNull(); + } finally { + LifeCycleUtils.closeQuietly(gem); + } + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void setThinClientHttpClient_redProbesFlipHealthyAfterThreshold() throws Exception { + System.setProperty("COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD", "1"); + + AtomicInteger probeCallCount = new AtomicInteger(0); + Map statusByEndpoint = new HashMap<>(); + statusByEndpoint.put(URI.create("https://testaccount-eastus.documents.azure.com:10250/connectivity-probe"), 503); + statusByEndpoint.put(URI.create("https://testaccount-eastasia.documents.azure.com:10250/connectivity-probe"), 503); + HttpClient httpClient = stubHttpClient(statusByEndpoint, probeCallCount); + + DatabaseAccount databaseAccount = new DatabaseAccount(DB_ACCOUNT_WITH_THINCLIENT_LOCATIONS); + Mockito.when(databaseAccountManagerInternal.getDatabaseAccountFromEndpoint(any())).thenReturn(Flux.just(databaseAccount)); + Mockito.when(databaseAccountManagerInternal.getServiceEndpoint()).thenReturn(new URI("https://testaccount.documents.azure.com:443")); + + ConnectionPolicy connectionPolicy = new ConnectionPolicy(DirectConnectionConfig.getDefaultConfig()); + connectionPolicy.setEndpointDiscoveryEnabled(true); + connectionPolicy.setMultipleWriteRegionsEnabled(true); + + GlobalEndpointManager gem = new GlobalEndpointManager(databaseAccountManagerInternal, connectionPolicy, new Configs()); + try { + gem.setThinClientHttpClient(httpClient); + gem.init(); + + // Wait for the first cycle to complete and flip health (threshold=1 -> flips immediately on first RED). + long deadline = System.currentTimeMillis() + 5_000; + while (System.currentTimeMillis() < deadline) { + if (!gem.isProxyProbeHealthy()) { + break; + } + Thread.sleep(50); + } + + assertThat(gem.isProxyProbeHealthy()) + .as("after all-RED cycle with threshold=1, proxy should be marked unhealthy") + .isFalse(); + } finally { + LifeCycleUtils.closeQuietly(gem); + } + } + + // ---- helpers ---- + + private GlobalEndpointManager newGemWithAccount(String accountJson) throws Exception { + DatabaseAccount databaseAccount = new DatabaseAccount(accountJson); + Mockito.when(databaseAccountManagerInternal.getDatabaseAccountFromEndpoint(ArgumentMatchers.any())) + .thenReturn(Flux.just(databaseAccount)); + Mockito.when(databaseAccountManagerInternal.getServiceEndpoint()) + .thenReturn(new URI("https://testaccount.documents.azure.com:443")); + + ConnectionPolicy connectionPolicy = new ConnectionPolicy(DirectConnectionConfig.getDefaultConfig()); + connectionPolicy.setEndpointDiscoveryEnabled(true); + connectionPolicy.setMultipleWriteRegionsEnabled(true); + + GlobalEndpointManager gem = new GlobalEndpointManager(databaseAccountManagerInternal, connectionPolicy, new Configs()); + gem.init(); + return gem; + } + + private static LocationCache getLocationCache(GlobalEndpointManager gem) throws Exception { + Field f = GlobalEndpointManager.class.getDeclaredField("locationCache"); + f.setAccessible(true); + return (LocationCache) f.get(gem); + } + + private static HttpClient stubHttpClient(Map statusByEndpoint, AtomicInteger callCount) { + HttpClient mock = Mockito.mock(HttpClient.class); + Mockito.when(mock.send(any(HttpRequest.class), any(Duration.class))) + .thenAnswer(invocation -> { + HttpRequest req = invocation.getArgument(0); + callCount.incrementAndGet(); + Integer status = statusByEndpoint.get(req.uri()); + if (status == null) { + return Mono.error(new RuntimeException("Unexpected probe URI: " + req.uri())); + } + return Mono.just(stubResponse(req, status)); + }); + Mockito.when(mock.send(any(HttpRequest.class))) + .thenAnswer(invocation -> { + HttpRequest req = invocation.getArgument(0); + callCount.incrementAndGet(); + Integer status = statusByEndpoint.get(req.uri()); + if (status == null) { + return Mono.error(new RuntimeException("Unexpected probe URI: " + req.uri())); + } + return Mono.just(stubResponse(req, status)); + }); + return mock; + } + + private static HttpResponse stubResponse(HttpRequest req, int status) { + return new HttpResponse() { + @Override + public int statusCode() { + return status; + } + + @Override + public String headerValue(String name) { + return null; + } + + @Override + public HttpHeaders headers() { + return new HttpHeaders(); + } + + @Override + public Mono body() { + return Mono.just(Unpooled.EMPTY_BUFFER); + } + + @Override + public Mono bodyAsString() { + return Mono.just(""); + } + + @Override + public void close() { } + }; + } + + private static void waitForProbeCallCount(AtomicInteger counter, int expected, Duration timeout) throws InterruptedException { + long deadline = System.currentTimeMillis() + timeout.toMillis(); + while (System.currentTimeMillis() < deadline && counter.get() < expected) { + Thread.sleep(50); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientRoutingGateTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientRoutingGateTests.java new file mode 100644 index 000000000000..6dd9d6985858 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientRoutingGateTests.java @@ -0,0 +1,155 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation; + +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for {@link RxDocumentClientImpl#shouldUseThinClientStoreModel(boolean, boolean, boolean, RxDocumentServiceRequest)}. + * + *

These tests pin the exact wiring of the routing gate so that the probe-health bit + * actually flips traffic between the thin-client store model and the gateway-V1 store model. + * Prior to extracting the helper, this gate was buried inside {@code RxDocumentClientImpl} + * and exercised only via end-to-end tests, which made probe-fallback regressions hard to + * catch in CI. + */ +public class ThinClientRoutingGateTests { + + private static RxDocumentServiceRequest mockDocumentRequest(OperationType op) { + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.Document); + Mockito.when(request.getOperationType()).thenReturn(op); + Mockito.when(request.isChangeFeedRequest()).thenReturn(false); + Mockito.when(request.isAllVersionsAndDeletesChangeFeedMode()).thenReturn(false); + return request; + } + + @Test(groups = "unit") + public void allConditionsTrue_routesToThinClient() { + RxDocumentServiceRequest request = mockDocumentRequest(OperationType.Read); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isTrue(); + } + + @Test(groups = "unit") + public void probeUnhealthy_routesToGatewayV1() { + RxDocumentServiceRequest request = mockDocumentRequest(OperationType.Read); + // Probe says proxy is down — even with thin-client enabled and read locations present, + // the SDK must fall back to Gateway V1 until the next GREEN cycle restores routing. + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, false, request)).isFalse(); + } + + @Test(groups = "unit") + public void thinClientDisabled_routesToGatewayV1() { + RxDocumentServiceRequest request = mockDocumentRequest(OperationType.Read); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(false, true, true, request)).isFalse(); + } + + @Test(groups = "unit") + public void noThinClientReadLocations_routesToGatewayV1() { + RxDocumentServiceRequest request = mockDocumentRequest(OperationType.Read); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, false, true, request)).isFalse(); + } + + @Test(groups = "unit") + public void nonDocumentResource_routesToGatewayV1() { + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.DocumentCollection); + Mockito.when(request.getOperationType()).thenReturn(OperationType.Read); + // Metadata-style reads (DocumentCollection, etc.) must continue through gateway V1 + // even when probe is GREEN and thin-client is enabled. + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isFalse(); + } + + @Test(groups = "unit") + public void documentQuery_routesToThinClient() { + RxDocumentServiceRequest request = mockDocumentRequest(OperationType.Query); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isTrue(); + } + + @Test(groups = "unit") + public void batchOperation_routesToThinClient() { + RxDocumentServiceRequest request = mockDocumentRequest(OperationType.Batch); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isTrue(); + } + + @Test(groups = "unit") + public void allVersionsAndDeletesChangeFeed_routesToGatewayV1() { + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.Document); + Mockito.when(request.getOperationType()).thenReturn(OperationType.ReadFeed); + Mockito.when(request.isChangeFeedRequest()).thenReturn(true); + Mockito.when(request.isAllVersionsAndDeletesChangeFeedMode()).thenReturn(true); + // AllVersionsAndDeletes change feed must NOT go through the proxy. + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isFalse(); + } + + @Test(groups = "unit") + public void incrementalChangeFeed_routesToThinClient() { + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.Document); + Mockito.when(request.getOperationType()).thenReturn(OperationType.ReadFeed); + Mockito.when(request.isChangeFeedRequest()).thenReturn(true); + Mockito.when(request.isAllVersionsAndDeletesChangeFeedMode()).thenReturn(false); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isTrue(); + } + + // --- QueryPlan + Stored Procedure routing to Gateway V2 (PR #47759) --- + + @Test(groups = "unit") + public void executeStoredProcedure_onStoredProcedureResource_routesToThinClient() { + // Sproc execute lives on ResourceType.StoredProcedure, not Document. The gate must + // make a carve-out via isExecuteStoredProcedureBasedRequest() so the request still + // reaches the proxy and gets routed to Gateway V2. + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.StoredProcedure); + Mockito.when(request.getOperationType()).thenReturn(OperationType.ExecuteJavaScript); + Mockito.when(request.isExecuteStoredProcedureBasedRequest()).thenReturn(true); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isTrue(); + } + + @Test(groups = "unit") + public void nonExecuteStoredProcedureResource_routesToGatewayV1() { + // CRUD on the StoredProcedure resource (create/replace/delete sproc definition) must + // continue to flow through Gateway V1 — only the execute path is proxied. + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.StoredProcedure); + Mockito.when(request.getOperationType()).thenReturn(OperationType.Create); + Mockito.when(request.isExecuteStoredProcedureBasedRequest()).thenReturn(false); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isFalse(); + } + + @Test(groups = "unit") + public void queryPlanOperation_routesToThinClient() { + // QueryPlan is fetched on the Document resource with a dedicated operation type. + // The gate explicitly enumerates OperationType.QueryPlan so plan retrieval is proxied. + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.Document); + Mockito.when(request.getOperationType()).thenReturn(OperationType.QueryPlan); + Mockito.when(request.isExecuteStoredProcedureBasedRequest()).thenReturn(false); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, true, request)).isTrue(); + } + + @Test(groups = "unit") + public void queryPlanOperation_probeUnhealthy_routesToGatewayV1() { + // Probe fallback must also gate QueryPlan traffic — when the proxy is unhealthy, + // plan fetches must fall back to Gateway V1 just like document reads. + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.Document); + Mockito.when(request.getOperationType()).thenReturn(OperationType.QueryPlan); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, false, request)).isFalse(); + } + + @Test(groups = "unit") + public void executeStoredProcedure_probeUnhealthy_routesToGatewayV1() { + // Sproc execute must also respect probe health — even with the resource-type carve-out, + // a RED probe forces fallback to Gateway V1. + RxDocumentServiceRequest request = Mockito.mock(RxDocumentServiceRequest.class); + Mockito.when(request.getResourceType()).thenReturn(ResourceType.StoredProcedure); + Mockito.when(request.getOperationType()).thenReturn(OperationType.ExecuteJavaScript); + Mockito.when(request.isExecuteStoredProcedureBasedRequest()).thenReturn(true); + assertThat(RxDocumentClientImpl.shouldUseThinClientStoreModel(true, true, false, request)).isFalse(); + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UserAgentContainerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UserAgentContainerTest.java index d5f8744fae0b..ee3c4ea7fc40 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UserAgentContainerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UserAgentContainerTest.java @@ -108,7 +108,11 @@ public void UserAgentIntegration() { (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(gatewayClient); UserAgentContainer userAgentContainer = ReflectionUtils.getUserAgentContainer(documentClient); String expectedString = getUserAgentFixedPart() + SPACE + userProvidedSuffix; - assertThat(userAgentContainer.getUserAgent()).isEqualTo(expectedString); + // ThinClient (1 << 2) is included in the user-agent feature flags by default + // because COSMOS.THINCLIENT_ENABLED defaults to true. The suffix is added by + // RxDocumentClientImpl.addUserAgentSuffix at client construction. + String expectedStringWithFeatureFlags = expectedString + "|F4"; + assertThat(userAgentContainer.getUserAgent()).isEqualTo(expectedStringWithFeatureFlags); directClient = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) @@ -119,7 +123,7 @@ public void UserAgentIntegration() { .buildAsyncClient(); documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(directClient); userAgentContainer = ReflectionUtils.getUserAgentContainer(documentClient); - assertThat(userAgentContainer.getUserAgent()).isEqualTo(expectedString); + assertThat(userAgentContainer.getUserAgent()).isEqualTo(expectedStringWithFeatureFlags); } finally { if (gatewayClient != null) { gatewayClient.close(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/PartitionKeyInternalTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/PartitionKeyInternalTest.java index 427d8c763a7d..b565658f1533 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/PartitionKeyInternalTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/PartitionKeyInternalTest.java @@ -14,12 +14,18 @@ import com.azure.cosmos.implementation.routing.PartitionKeyInternal; import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; import com.azure.cosmos.implementation.routing.PartitionKeyInternalUtils; +import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.implementation.guava25.collect.Lists; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.node.ArrayNode; + import java.util.ArrayList; +import java.util.List; import java.util.function.BiFunction; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; @@ -27,6 +33,8 @@ public class PartitionKeyInternalTest { + private static final ObjectMapper MAPPER = new ObjectMapper(); + /** * Tests serialization of empty partition key. */ @@ -473,4 +481,117 @@ private static void verifyEffectivePartitionKeyEncoding(String buffer, int lengt PartitionKeyInternal pk = PartitionKeyInternalUtils.createPartitionKeyInternal(buffer.substring(0, length)); assertThat(PartitionKeyInternalHelper.getEffectivePartitionKeyString(pk, pkDefinition)).isEqualTo(expectedValue); } + + // ==================== convertToSortedEpkRanges Unit Tests ==================== + + private static PartitionKeyDefinition singleHashPkDef() { + PartitionKeyDefinition pkDef = new PartitionKeyDefinition(); + pkDef.setPaths(ImmutableList.of("/pk")); + pkDef.setVersion(PartitionKeyDefinitionVersion.V2); + pkDef.setKind(PartitionKind.HASH); + return pkDef; + } + + @Test(groups = "unit") + public void convertToSortedEpkRangesSingleValueRange() { + // Single PK value range: min=["testValue"], max=["testValue"] + // This is what ServiceInterop returns for WHERE pk = 'testValue' + ObjectNode json = MAPPER.createObjectNode(); + ArrayNode ranges = json.putArray("queryRanges"); + ObjectNode range = ranges.addObject(); + range.putArray("min").add("testValue"); + range.putArray("max").add("testValue"); + range.put("isMinInclusive", true); + range.put("isMaxInclusive", true); + + List> result = PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + + assertThat(result.size()).isEqualTo(1); + // EPK for a string value is a non-empty hex hash + assertThat(result.get(0).getMin()).isNotNull(); + assertThat(result.get(0).getMin().length()).isGreaterThan(0); + assertThat(result.get(0).getMin()).isEqualTo(result.get(0).getMax()); + assertThat(result.get(0).isMinInclusive()).isTrue(); + assertThat(result.get(0).isMaxInclusive()).isTrue(); + } + + @Test(groups = "unit") + public void convertToSortedEpkRangesMultipleRangesSorted() { + // Multiple ranges that need sorting after EPK conversion + ObjectNode json = MAPPER.createObjectNode(); + ArrayNode ranges = json.putArray("queryRanges"); + + // Add two ranges with different PK values — EPK hash order may differ from insertion order + ObjectNode range1 = ranges.addObject(); + range1.putArray("min").add("zzzValue"); + range1.putArray("max").add("zzzValue"); + range1.put("isMinInclusive", true); + range1.put("isMaxInclusive", true); + + ObjectNode range2 = ranges.addObject(); + range2.putArray("min").add("aaaValue"); + range2.putArray("max").add("aaaValue"); + range2.put("isMinInclusive", true); + range2.put("isMaxInclusive", true); + + List> result = PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + + assertThat(result.size()).isEqualTo(2); + // Verify sorted by min EPK (ascending) + assertThat(result.get(0).getMin().compareTo(result.get(1).getMin())).isLessThanOrEqualTo(0); + } + + @Test(groups = "unit", expectedExceptions = IllegalStateException.class) + public void convertToSortedEpkRangesMissingQueryRangesThrows() { + // Missing queryRanges property entirely + ObjectNode json = MAPPER.createObjectNode(); + json.put("queryInfo", "someValue"); + + PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + } + + @Test(groups = "unit", expectedExceptions = IllegalStateException.class) + public void convertToSortedEpkRangesNonArrayQueryRangesThrows() { + // queryRanges is a string instead of array + ObjectNode json = MAPPER.createObjectNode(); + json.put("queryRanges", "notAnArray"); + + PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + } + + @Test(groups = "unit", expectedExceptions = IllegalStateException.class) + public void convertToSortedEpkRangesNonObjectElementThrows() { + // queryRanges array contains a string instead of object + ObjectNode json = MAPPER.createObjectNode(); + json.putArray("queryRanges").add("notAnObject"); + + PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + } + + @Test(groups = "unit", expectedExceptions = IllegalStateException.class) + public void convertToSortedEpkRangesNullMinBoundaryThrows() { + // Range with null min boundary + ObjectNode json = MAPPER.createObjectNode(); + ArrayNode ranges = json.putArray("queryRanges"); + ObjectNode range = ranges.addObject(); + range.putNull("min"); + range.putArray("max").add("value"); + range.put("isMinInclusive", true); + range.put("isMaxInclusive", false); + + PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + } + + @Test(groups = "unit", expectedExceptions = IllegalStateException.class) + public void convertToSortedEpkRangesMissingInclusiveFieldsThrows() { + // Range missing isMinInclusive and isMaxInclusive + ObjectNode json = MAPPER.createObjectNode(); + ArrayNode ranges = json.putArray("queryRanges"); + ObjectNode range = ranges.addObject(); + range.putArray("min").add("value"); + range.putArray("max").add("value"); + // intentionally no isMinInclusive or isMaxInclusive + + PartitionKeyInternalHelper.convertToSortedEpkRanges("queryRanges", json, singleHashPkDef()); + } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/QueryPlanRetrieverSupportedFeaturesTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/QueryPlanRetrieverSupportedFeaturesTest.java new file mode 100644 index 000000000000..8a90efa89d76 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/QueryPlanRetrieverSupportedFeaturesTest.java @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation.query; + +import org.testng.annotations.Test; + +import java.lang.reflect.Field; + +import static org.assertj.core.api.Assertions.assertThat; + +public class QueryPlanRetrieverSupportedFeaturesTest { + + @Test(groups = {"unit"}) + public void supportedFeaturesIncludeOptimizedCountIfFlag() throws Exception { + String supportedFeatures = getQueryPlanRetrieverString("SUPPORTED_QUERY_FEATURES"); + + assertThat(supportedFeatures) + .contains(QueryFeature.CountIf.name()) + .doesNotContain("HybridSearchSkipOrderByRewrite") + .doesNotContain("ListAndSetAggregate"); + } + + private static String getQueryPlanRetrieverString(String fieldName) throws Exception { + Class queryPlanRetrieverClass = + Class.forName("com.azure.cosmos.implementation.query.QueryPlanRetriever"); + Field field = queryPlanRetrieverClass.getDeclaredField(fieldName); + field.setAccessible(true); + return (String) field.get(null); + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/SinglePartitionDocumentQueryTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/SinglePartitionDocumentQueryTest.java index 0aebb62bfcee..19c5f878a4dc 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/SinglePartitionDocumentQueryTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/SinglePartitionDocumentQueryTest.java @@ -98,20 +98,25 @@ public void querySinglePartitionDocuments() throws Exception { .getContainer(createdCollection.getId()); RxDocumentClientImpl asyncDocumentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); RxStoreModel serverStoreModel = ReflectionUtils.getRxServerStoreModel(asyncDocumentClient); - RxStoreModel proxy = asyncDocumentClient.useThinClient() ? - ReflectionUtils.getThinProxy(asyncDocumentClient) : - ReflectionUtils.getGatewayProxy(asyncDocumentClient); - RxStoreModel spyServerStoreModel = Mockito.spy(serverStoreModel); - RxStoreModel spyProxy = Mockito.spy(proxy); + // Spy on BOTH the gateway proxy and (when applicable) the thin-client proxy. At runtime the SDK + // routes through whichever proxy `useThinClientStoreModel(request)` selects; that gate depends on + // the probe-healthy state from GlobalEndpointManager, which races with this test. Spying on both + // makes the assertion robust regardless of which path is currently active. + RxStoreModel gatewayProxy = ReflectionUtils.getGatewayProxy(asyncDocumentClient); + RxStoreModel spyGatewayProxy = Mockito.spy(gatewayProxy); + ReflectionUtils.setGatewayProxy(asyncDocumentClient, spyGatewayProxy); - ReflectionUtils.setServerStoreModel(asyncDocumentClient, spyServerStoreModel); + RxStoreModel spyThinProxy = null; if (asyncDocumentClient.useThinClient()) { - ReflectionUtils.setThinProxy(asyncDocumentClient, spyProxy); - } else { - ReflectionUtils.setGatewayProxy(asyncDocumentClient, spyProxy); + RxStoreModel thinProxy = ReflectionUtils.getThinProxy(asyncDocumentClient); + spyThinProxy = Mockito.spy(thinProxy); + ReflectionUtils.setThinProxy(asyncDocumentClient, spyThinProxy); } + RxStoreModel spyServerStoreModel = Mockito.spy(serverStoreModel); + ReflectionUtils.setServerStoreModel(asyncDocumentClient, spyServerStoreModel); + CosmosPagedFlux queryFlux = container .queryItems("select * from root", options, InternalObjectNode.class); @@ -123,7 +128,14 @@ public void querySinglePartitionDocuments() throws Exception { // In gateway mode, serverstoremodel is GatewayStoreModel/ThinClientStoreModel so below passes // In direct mode, serverStoreModel is ServerStoreModel. So queryPlan goes through gatewayProxy and the query // goes through the serverStoreModel - Mockito.verify(spyProxy, Mockito.times(1)).processMessage(Mockito.any()); + int gatewayInvocations = Mockito.mockingDetails(spyGatewayProxy).getInvocations().stream() + .filter(inv -> "processMessage".equals(inv.getMethod().getName())).toArray().length; + int thinInvocations = spyThinProxy == null ? 0 + : Mockito.mockingDetails(spyThinProxy).getInvocations().stream() + .filter(inv -> "processMessage".equals(inv.getMethod().getName())).toArray().length; + assertThat(gatewayInvocations + thinInvocations) + .as("Exactly one of gateway/thin proxy should have processed the query") + .isEqualTo(1); if (asyncDocumentClient.getConnectionPolicy().getConnectionMode() == ConnectionMode.DIRECT) { Mockito.verify(spyServerStoreModel, Mockito.times(1)).processMessage(Mockito.any()); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index 66b6e6314ad0..ea9058ecd20d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -180,6 +180,7 @@ protected static void executeWithRetry(Runnable action, int maxRetries, String c protected static final ImmutableList protocols; protected static final AzureKeyCredential credential; + protected static final boolean useAadAuth; protected int subscriberValidationTimeout = TIMEOUT; @@ -267,12 +268,21 @@ protected static CosmosAsyncContainer getSharedSinglePartitionCosmosContainer(Co objectMapper.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, true); credential = new AzureKeyCredential(TestConfigurations.MASTER_KEY); + useAadAuth = Boolean.parseBoolean(System.getProperty("COSMOS.USE_AAD_AUTH", "false")) + || Boolean.parseBoolean(System.getenv("COSMOS_USE_AAD_AUTH")); } private static ImmutableList immutableListOrNull(List list) { return list != null ? ImmutableList.copyOf(list) : null; } + protected static CosmosClientBuilder applyCredential(CosmosClientBuilder builder) { + if (useAadAuth) { + return builder.credential(new com.azure.identity.DefaultAzureCredentialBuilder().build()); + } + return builder.credential(credential); + } + private static class DatabaseManagerImpl implements CosmosDatabaseForTest.DatabaseManager { public static DatabaseManagerImpl getInstance(CosmosAsyncClient client) { return new DatabaseManagerImpl(client); @@ -1320,6 +1330,22 @@ public static Object[][] clientBuildersWithGatewayAndHttp2() { }; } + /** + * Data provider for thin client tests. Returns a gateway + HTTP/2 builder. + * Tests using this provider should enable thin client mode in their @BeforeClass + * by calling {@code System.setProperty("COSMOS.THINCLIENT_ENABLED", "true")} and + * clean up in @AfterClass with {@code System.clearProperty("COSMOS.THINCLIENT_ENABLED")}. + * + *

This provider can be adopted by existing test classes (e.g., query, stored procedure tests) + * to gradually add thin client coverage using the same test logic.

+ */ + @DataProvider + public static Object[][] clientBuildersWithThinClient() { + return new Object[][]{ + {createGatewayRxDocumentClient(TestConfigurations.HOST, null, true, null, true, true, true)}, + }; + } + @DataProvider public static Object[][] clientBuildersWithSessionConsistency() { return new Object[][]{ @@ -1646,12 +1672,11 @@ static protected CosmosClientBuilder createGatewayHouseKeepingDocumentClient(boo ThrottlingRetryOptions options = new ThrottlingRetryOptions(); options.setMaxRetryWaitTime(Duration.ofSeconds(SUITE_SETUP_TIMEOUT)); GatewayConnectionConfig gatewayConnectionConfig = new GatewayConnectionConfig(); - return new CosmosClientBuilder().endpoint(TestConfigurations.HOST) - .credential(credential) + return applyCredential(new CosmosClientBuilder().endpoint(TestConfigurations.HOST) .gatewayMode(gatewayConnectionConfig) .throttlingRetryOptions(options) .contentResponseOnWriteEnabled(contentResponseOnWriteEnabled) - .consistencyLevel(ConsistencyLevel.SESSION); + .consistencyLevel(ConsistencyLevel.SESSION)); } static protected CosmosClientBuilder createGatewayRxDocumentClient( @@ -1690,13 +1715,12 @@ static protected CosmosClientBuilder createGatewayRxDocumentClient( gatewayConnectionConfig.setHttp2ConnectionConfig(http2ConnectionConfig); } - CosmosClientBuilder builder = new CosmosClientBuilder().endpoint(endpoint) - .credential(credential) + CosmosClientBuilder builder = applyCredential(new CosmosClientBuilder().endpoint(endpoint) .gatewayMode(gatewayConnectionConfig) .multipleWriteRegionsEnabled(multiMasterEnabled) .preferredRegions(preferredRegions) .contentResponseOnWriteEnabled(contentResponseOnWriteEnabled) - .consistencyLevel(consistencyLevel); + .consistencyLevel(consistencyLevel)); ImplementationBridgeHelpers .CosmosClientBuilderHelper .getCosmosClientBuilderAccessor() @@ -1719,11 +1743,12 @@ static protected CosmosClientBuilder createDirectRxDocumentClient(ConsistencyLev List preferredRegions, boolean contentResponseOnWriteEnabled, boolean retryOnThrottledRequests) { - CosmosClientBuilder builder = new CosmosClientBuilder().endpoint(TestConfigurations.HOST) - .credential(credential) - .directMode(DirectConnectionConfig.getDefaultConfig()) - .contentResponseOnWriteEnabled(contentResponseOnWriteEnabled) - .consistencyLevel(consistencyLevel); + CosmosClientBuilder builder = applyCredential( + new CosmosClientBuilder() + .endpoint(TestConfigurations.HOST) + .directMode(DirectConnectionConfig.getDefaultConfig()) + .contentResponseOnWriteEnabled(contentResponseOnWriteEnabled) + .consistencyLevel(consistencyLevel)); if (preferredRegions != null) { builder.preferredRegions(preferredRegions); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientChangeFeedE2ETest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientChangeFeedE2ETest.java new file mode 100644 index 000000000000..8a24adb46121 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientChangeFeedE2ETest.java @@ -0,0 +1,120 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.rx; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.models.PartitionKey; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +/** + * Thin client E2E tests for change feed operations. + * Container is truncated in {@code @BeforeClass} — no per-test cleanup needed. + */ +public class ThinClientChangeFeedE2ETest extends ThinClientTestBase { + + @Factory(dataProvider = "clientBuildersWithGatewayAndHttp2") + public ThinClientChangeFeedE2ETest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientIncrementalChangeFeed() { + String pkValue = UUID.randomUUID().toString(); + ObjectNode doc1 = createTestDocument(UUID.randomUUID().toString(), pkValue); + ObjectNode doc2 = createTestDocument(UUID.randomUUID().toString(), pkValue); + + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); + batch.createItemOperation(doc1); + batch.createItemOperation(doc2); + container.executeCosmosBatch(batch).block(); + + // Scope change feed to the specific logical partition to avoid + // consuming changes from other tests or partitions. + CosmosChangeFeedRequestOptions options = CosmosChangeFeedRequestOptions + .createForProcessingFromBeginning(FeedRange.forLogicalPartition(new PartitionKey(pkValue))); + + List changeFeedResults = new ArrayList<>(); + List allDiag = new ArrayList<>(); + for (FeedResponse page : container.queryChangeFeed(options, ObjectNode.class).byPage().toIterable()) { + changeFeedResults.addAll(page.getResults()); + allDiag.add(page.getCosmosDiagnostics()); + if (page.getResults().isEmpty()) { + break; + } + } + + assertThat(changeFeedResults.size()).isGreaterThanOrEqualTo(2); + for (CosmosDiagnostics d : allDiag) { + assertThinClientEndpointUsed(d); + } + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientChangeFeedFullRange() { + // Insert docs across two different partition keys so the full-range feed spans multiple partitions. + String pk1 = "cfFullRange1_" + UUID.randomUUID().toString().substring(0, 8); + String pk2 = "cfFullRange2_" + UUID.randomUUID().toString().substring(0, 8); + container.createItem(createTestDocument(UUID.randomUUID().toString(), pk1)).block(); + container.createItem(createTestDocument(UUID.randomUUID().toString(), pk2)).block(); + + CosmosChangeFeedRequestOptions options = CosmosChangeFeedRequestOptions + .createForProcessingFromBeginning(FeedRange.forFullRange()); + + List changeFeedResults = new ArrayList<>(); + List allDiag = new ArrayList<>(); + for (FeedResponse page : container.queryChangeFeed(options, ObjectNode.class).byPage().toIterable()) { + changeFeedResults.addAll(page.getResults()); + allDiag.add(page.getCosmosDiagnostics()); + if (page.getResults().isEmpty()) { + break; + } + } + + assertThat(changeFeedResults.size()).isGreaterThanOrEqualTo(2); + for (CosmosDiagnostics d : allDiag) { + assertThinClientEndpointUsed(d); + } + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientChangeFeedPartitionKey() { + String pkValue = "cfPk_" + UUID.randomUUID().toString().substring(0, 8); + container.createItem(createTestDocument(UUID.randomUUID().toString(), pkValue)).block(); + container.createItem(createTestDocument(UUID.randomUUID().toString(), pkValue)).block(); + + CosmosChangeFeedRequestOptions options = CosmosChangeFeedRequestOptions + .createForProcessingFromBeginning(FeedRange.forLogicalPartition(new PartitionKey(pkValue))); + + List changeFeedResults = new ArrayList<>(); + List allDiag = new ArrayList<>(); + for (FeedResponse page : container.queryChangeFeed(options, ObjectNode.class).byPage().toIterable()) { + changeFeedResults.addAll(page.getResults()); + allDiag.add(page.getCosmosDiagnostics()); + if (page.getResults().isEmpty()) { + break; + } + } + + // Should only see the 2 docs from this partition key + assertThat(changeFeedResults.size()).isEqualTo(2); + for (ObjectNode result : changeFeedResults) { + assertThat(result.get(PARTITION_KEY_FIELD).asText()).isEqualTo(pkValue); + } + for (CosmosDiagnostics d : allDiag) { + assertThinClientEndpointUsed(d); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientPointOperationE2ETest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientPointOperationE2ETest.java new file mode 100644 index 000000000000..1233f72dacfb --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientPointOperationE2ETest.java @@ -0,0 +1,120 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.rx; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosBulkItemResponse; +import com.azure.cosmos.models.CosmosBulkOperationResponse; +import com.azure.cosmos.models.CosmosBulkOperations; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.PartitionKey; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; +import reactor.core.publisher.Flux; + +import java.util.List; +import java.util.UUID; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +/** + * Thin client E2E tests for point operations: Create, Read, Replace, Upsert, Patch, Delete, Bulk, Batch. + * Container is truncated in {@code @BeforeClass} — no per-test cleanup needed. + */ +public class ThinClientPointOperationE2ETest extends ThinClientTestBase { + + @Factory(dataProvider = "clientBuildersWithGatewayAndHttp2") + public ThinClientPointOperationE2ETest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientDocumentPointOperations() { + String idValue = UUID.randomUUID().toString(); + ObjectNode doc = createTestDocument(idValue, idValue); + + // create + CosmosItemResponse createResponse = container.createItem(doc).block(); + assertThat(createResponse.getStatusCode()).isEqualTo(201); + assertThat(createResponse.getRequestCharge()).isGreaterThan(0.0); + assertThinClientEndpointUsed(createResponse.getDiagnostics()); + + // read + CosmosItemResponse readResponse = container.readItem(idValue, new PartitionKey(idValue), ObjectNode.class).block(); + assertThat(readResponse.getStatusCode()).isEqualTo(200); + assertThinClientEndpointUsed(readResponse.getDiagnostics()); + + String idValue2 = UUID.randomUUID().toString(); + ObjectNode doc2 = createTestDocument(idValue2, idValue); + + // replace + CosmosItemResponse replaceResponse = container.replaceItem(doc2, idValue, new PartitionKey(idValue)).block(); + assertThat(replaceResponse.getStatusCode()).isEqualTo(200); + assertThinClientEndpointUsed(replaceResponse.getDiagnostics()); + + // upsert + ObjectNode doc3 = createTestDocument(idValue2, idValue); + doc3.put("newField", "newValue"); + CosmosItemResponse upsertResponse = container.upsertItem(doc3, new PartitionKey(idValue), new CosmosItemRequestOptions()).block(); + assertThat(upsertResponse.getStatusCode()).isEqualTo(200); + assertThinClientEndpointUsed(upsertResponse.getDiagnostics()); + + CosmosItemResponse readAfterUpsertResponse = container.readItem(idValue2, new PartitionKey(idValue), ObjectNode.class).block(); + assertThat(readAfterUpsertResponse.getItem().get("newField").asText()).isEqualTo("newValue"); + + // patch + CosmosPatchOperations patchOperations = CosmosPatchOperations.create(); + patchOperations.add("/anotherNewField", "anotherNewValue"); + patchOperations.replace("/newField", "patchedNewField"); + CosmosItemResponse patchResponse = container.patchItem(idValue2, new PartitionKey(idValue), patchOperations, ObjectNode.class).block(); + assertThat(patchResponse.getStatusCode()).isEqualTo(200); + assertThinClientEndpointUsed(patchResponse.getDiagnostics()); + + CosmosItemResponse readAfterPatchResponse = container.readItem(idValue2, new PartitionKey(idValue), ObjectNode.class).block(); + assertThat(readAfterPatchResponse.getItem().get("newField").asText()).isEqualTo("patchedNewField"); + assertThat(readAfterPatchResponse.getItem().get("anotherNewField").asText()).isEqualTo("anotherNewValue"); + + // delete + CosmosItemResponse deleteResponse = container.deleteItem(idValue2, new PartitionKey(idValue)).block(); + assertThat(deleteResponse.getStatusCode()).isEqualTo(204); + assertThinClientEndpointUsed(deleteResponse.getDiagnostics()); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientBulk() { + String idValue = UUID.randomUUID().toString(); + ObjectNode doc = createTestDocument(idValue, idValue); + + Flux> responsesFlux = container.executeBulkOperations(Flux.just( + CosmosBulkOperations.getCreateItemOperation(doc, new PartitionKey(idValue)) + )); + + List> responses = responsesFlux.collectList().block(); + assertThat(responses.size()).isEqualTo(1); + CosmosBulkItemResponse bulkResponse = responses.get(0).getResponse(); + assertThat(bulkResponse.isSuccessStatusCode()).isEqualTo(true); + assertThinClientEndpointUsed(bulkResponse.getCosmosDiagnostics()); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientBatch() { + String pkValue = UUID.randomUUID().toString(); + String idValue1 = UUID.randomUUID().toString(); + String idValue2 = UUID.randomUUID().toString(); + ObjectNode doc1 = createTestDocument(idValue1, pkValue); + ObjectNode doc2 = createTestDocument(idValue2, pkValue); + + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); + batch.createItemOperation(doc1); + batch.createItemOperation(doc2); + + CosmosBatchResponse response = container.executeCosmosBatch(batch).block(); + assertThat(response.getStatusCode()).isEqualTo(200); + assertThinClientEndpointUsed(response.getDiagnostics()); + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientQueryE2ETest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientQueryE2ETest.java new file mode 100644 index 000000000000..ffa1246ef368 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientQueryE2ETest.java @@ -0,0 +1,1209 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.rx; + +import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosAsyncDatabase; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.models.CosmosBulkOperations; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.CosmosFullTextIndex; +import com.azure.cosmos.models.CosmosFullTextPath; +import com.azure.cosmos.models.CosmosFullTextPolicy; +import com.azure.cosmos.models.CosmosItemOperation; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosVectorDataType; +import com.azure.cosmos.models.CosmosVectorDistanceFunction; +import com.azure.cosmos.models.CosmosVectorEmbedding; +import com.azure.cosmos.models.CosmosVectorEmbeddingPolicy; +import com.azure.cosmos.models.CosmosVectorIndexSpec; +import com.azure.cosmos.models.CosmosVectorIndexType; +import com.azure.cosmos.models.ExcludedPath; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.models.IncludedPath; +import com.azure.cosmos.models.IndexingMode; +import com.azure.cosmos.models.IndexingPolicy; +import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.models.PartitionKeyDefinition; +import com.azure.cosmos.models.SqlParameter; +import com.azure.cosmos.models.SqlQuerySpec; +import com.azure.cosmos.models.ThroughputProperties; +import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.implementation.directconnectivity.Protocol; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; +import reactor.core.publisher.Flux; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; + +import static com.azure.cosmos.rx.ThinClientTestBase.assertThinClientEndpointUsed; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.assertj.core.api.Fail.fail; + +/** + * Thin client query E2E tests comparing Direct TCP (baseline) vs Gateway V2 (thin client). + *

+ * Each query is executed through both connection modes and results are compared: + *

    + *
  • Direct TCP — baseline, runs against backend partition replicas.
  • + *
  • Gateway V2 (thin client) — system under test, routes through proxy (:10250), + * proxy returns raw PartitionKeyInternal arrays, SDK converts to EPK client-side.
  • + *
+ * Assertions: + *
    + *
  1. Gateway V2 requests routed through the :10250 thin client endpoint.
  2. + *
  3. Result set sizes match between Direct and Gateway V2.
  4. + *
  5. Result set contents match (document IDs in order for ordered queries, set equality for unordered).
  6. + *
+ */ +public class ThinClientQueryE2ETest extends TestSuiteBase { + + private CosmosAsyncClient directClient; // Baseline: Direct TCP + private CosmosAsyncClient thinClient; // SUT: Gateway V2 (thin client) + private CosmosAsyncContainer directContainer; + private CosmosAsyncContainer thinClientContainer; + + private final List seededDocs = new ArrayList<>(); + private final String commonPk = "tc-query-" + UUID.randomUUID().toString().substring(0, 8); + + // Use constants and helpers from ThinClientTestBase to avoid duplication. + private static final String ID_FIELD = ThinClientTestBase.ID_FIELD; + private static final String PK_FIELD = ThinClientTestBase.PARTITION_KEY_FIELD; + private static final ObjectMapper OBJECT_MAPPER = ThinClientTestBase.OBJECT_MAPPER; + + @BeforeClass(groups = {"thinclient"}, timeOut = SETUP_TIMEOUT * 2) + public void before_ThinClientQueryE2ETest() { + try { + // 1. Direct TCP client (baseline) + CosmosClientBuilder directBuilder = createDirectRxDocumentClient(ConsistencyLevel.SESSION, Protocol.TCP, false, null, true, true); + this.directClient = directBuilder.buildAsyncClient(); + this.directContainer = getSharedMultiPartitionCosmosContainer(this.directClient); + + // 2. Gateway V2 thin client (system under test) + ThinClientTestBase.enableThinClientForTest(); + CosmosClientBuilder thinBuilder = createGatewayRxDocumentClient( + TestConfigurations.HOST, null, true, null, true, true, true); + this.thinClient = thinBuilder.buildAsyncClient(); + this.thinClientContainer = this.thinClient.getDatabase( + directContainer.getDatabase().getId()).getContainer(directContainer.getId()); + + // 3. Clean up shared container to prevent cross-test-class pollution + cleanUpContainer(this.directContainer); + + // 4. Seed diverse test data for broad query coverage + seedTestData(); + } catch (Exception e) { + // Clean up any clients that were successfully created before the failure + if (this.thinClient != null) { this.thinClient.close(); this.thinClient = null; } + if (this.directClient != null) { this.directClient.close(); this.directClient = null; } + throw e; + } + } + + /** + * Seeds 10 documents into the shared container with the following schema: + *
+     * {
+     *   "id":        "tcdoc-{i}-{uuid}",  // unique document ID
+     *   "mypk":      "{commonPk}",         // partition key — same for all seeded docs
+     *   "category":  string,               // one of: electronics, books, clothing, toys
+     *   "status":    string,               // "active" or "inactive"
+     *   "age":       int,                  // range: 8–61
+     *   "price":     double,               // range: 7.50–549.99
+     *   "idx":       int,                  // sequential index 0–9
+     *   "isActive":  boolean,              // derived from status == "active"
+     *   "address":   { "city": string, "zip": int },  // nested object
+     *   "scores":    [int, int],           // two-element int array: [i*10, i*10+5]
+     *   "tags":      [string, ...]         // variable-length string array
+     * }
+     * 
+ */ + private void seedTestData() { + String[] categories = {"electronics", "books", "clothing", "electronics", "books", + "clothing", "electronics", "toys", "toys", "books"}; + String[] statuses = {"active", "inactive", "active", "active", "inactive", + "active", "inactive", "active", "active", "active"}; + int[] ages = {25, 30, 17, 42, 55, 19, 38, 12, 8, 61}; + double[] prices = {99.99, 14.50, 45.00, 299.99, 9.99, 25.00, 549.99, 19.99, 7.50, 22.00}; + + for (int i = 0; i < 10; i++) { + String docId = "tcdoc-" + i + "-" + UUID.randomUUID().toString().substring(0, 8); + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, docId); + doc.put(PK_FIELD, commonPk); + doc.put("category", categories[i]); + doc.put("status", statuses[i]); + doc.put("age", ages[i]); + doc.put("price", prices[i]); + doc.put("idx", i); + doc.put("isActive", statuses[i].equals("active")); + + ObjectNode address = OBJECT_MAPPER.createObjectNode(); + address.put("city", i % 2 == 0 ? "Seattle" : "Portland"); + address.put("zip", 98100 + i); + doc.set("address", address); + + doc.putArray("scores").add(i * 10).add(i * 10 + 5); + + // Tags array for JOIN/EXISTS tests — varies per doc + ArrayNode tags = doc.putArray("tags"); + tags.add(categories[i]); // first tag matches category + if (i % 2 == 0) tags.add("on-sale"); + if (i % 3 == 0) tags.add("featured"); + + seededDocs.add(doc); + } + + bulkInsert(directContainer, seededDocs).blockLast(); + } + + @AfterClass(groups = {"thinclient"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + if (directContainer != null && !seededDocs.isEmpty()) { + try { + List deleteOps = seededDocs.stream() + .map(doc -> CosmosBulkOperations.getDeleteItemOperation( + doc.get(ID_FIELD).asText(), new PartitionKey(commonPk))) + .collect(Collectors.toList()); + directContainer.executeBulkOperations(Flux.fromIterable(deleteOps)).blockLast(); + } catch (Exception e) { + logger.warn("Bulk delete of seeded docs failed: {}", e.getMessage()); + } + } + ThinClientTestBase.clearThinClientForTest(); + if (this.thinClient != null) { this.thinClient.close(); } + if (this.directClient != null) { this.directClient.close(); } + } + + // ==================== Equality & Filter Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testSelectAll() { + assertDirectAndThinClientMatch("SELECT * FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereEquality() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.category = 'electronics'"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereEqualityParameterized() { + SqlQuerySpec qs = new SqlQuerySpec("SELECT * FROM c WHERE c.category = @cat"); + qs.setParameters(Arrays.asList(new SqlParameter("@cat", "books"))); + assertDirectAndThinClientMatch(qs, partitionedOptions()); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereRangeGreaterThan() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.age > 30"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereRangeLessThanOrEqual() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.price <= 25.00"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereRangeBetween() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.age >= 18 AND c.age <= 40"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereIn() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.category IN ('electronics', 'toys')"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereCompoundAndOr() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.status = 'active' AND (c.category = 'electronics' OR c.category = 'books')"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereNotEqual() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.status != 'inactive'"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereBooleanField() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.isActive = true"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereIsDefined() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE IS_DEFINED(c.address)"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereStartsWith() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE STARTSWITH(c.category, 'elec')"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereContains() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE CONTAINS(c.category, 'ook')"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereArrayContains() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE ARRAY_CONTAINS(c.scores, 50)"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testWhereNestedProperty() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.address.city = 'Seattle'"); + } + + // ==================== Projection Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testSelectSpecificFields() { + String query = "SELECT c.id, c.category, c.price FROM c"; + QueryResult gwResult = drainQuery(directContainer, query, partitionedOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(thinClientContainer, query, partitionedOptions(), ObjectNode.class); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("Count mismatch: " + query).isEqualTo(gwResult.results.size()); + for (int i = 0; i < gwResult.results.size(); i++) { + assertThat(tcResult.results.get(i).get("category").asText()).isEqualTo(gwResult.results.get(i).get("category").asText()); + assertThat(tcResult.results.get(i).get("price").asDouble()).isEqualTo(gwResult.results.get(i).get("price").asDouble()); + } + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testSelectComputedAlias() { + String query = "SELECT c.id, c.price * 1.1 AS taxedPrice FROM c"; + QueryResult gwResult = drainQuery(directContainer, query, partitionedOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(thinClientContainer, query, partitionedOptions(), ObjectNode.class); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("Count mismatch: " + query).isEqualTo(gwResult.results.size()); + } + + // ==================== ORDER BY Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testOrderByAsc() { + assertDirectAndThinClientMatch("SELECT * FROM c ORDER BY c.age"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testOrderByDesc() { + assertDirectAndThinClientMatch("SELECT * FROM c ORDER BY c.price DESC"); + } + + // ==================== DISTINCT Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testDistinctValue() { + assertScalarDirectAndThinClientMatch("SELECT DISTINCT VALUE c.category FROM c", String.class); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testDistinctValueBoolean() { + assertScalarDirectAndThinClientMatch("SELECT DISTINCT VALUE c.isActive FROM c", Boolean.class); + } + + // ==================== TOP Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testTop() { + assertDirectAndThinClientMatch("SELECT TOP 3 * FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testTopWithOrderBy() { + assertDirectAndThinClientMatch("SELECT TOP 5 * FROM c ORDER BY c.price DESC"); + } + + // ==================== Aggregate Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testCount() { + assertScalarDirectAndThinClientMatch("SELECT VALUE COUNT(1) FROM c", Integer.class); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testSum() { + assertScalarDirectAndThinClientMatch("SELECT VALUE SUM(c.price) FROM c", Double.class); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testAvg() { + assertScalarDirectAndThinClientMatch("SELECT VALUE AVG(c.age) FROM c", Double.class); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMin() { + assertScalarDirectAndThinClientMatch("SELECT VALUE MIN(c.price) FROM c", Double.class); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMax() { + assertScalarDirectAndThinClientMatch("SELECT VALUE MAX(c.age) FROM c", Integer.class); + } + + // ==================== GROUP BY Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testGroupByCount() { + assertGroupByDirectAndThinClientMatch("SELECT c.category, COUNT(1) as cnt FROM c GROUP BY c.category", "category"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testGroupBySumAvg() { + assertGroupByDirectAndThinClientMatch("SELECT c.category, SUM(c.price) as total, AVG(c.price) as avg FROM c GROUP BY c.category", "category"); + } + + // ==================== OFFSET / LIMIT Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testOffsetLimit() { + assertDirectAndThinClientMatch("SELECT * FROM c ORDER BY c.idx OFFSET 3 LIMIT 4"); + } + + // ==================== JOIN Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testJoinScoresArray() { + // Self-join on scores array — produces one row per array element + assertDirectAndThinClientMatch("SELECT c.id, s AS score FROM c JOIN s IN c.scores"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testJoinWithFilter() { + // Self-join with WHERE filter on the joined element + assertDirectAndThinClientMatch("SELECT c.id, s AS score FROM c JOIN s IN c.scores WHERE s >= 50"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testJoinTagsArray() { + // Self-join on tags string array + assertDirectAndThinClientMatch("SELECT c.id, t AS tag FROM c JOIN t IN c.tags"); + } + + // ==================== EXISTS Subquery Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testExistsSubquery() { + // Docs pattern: use EXISTS to check if any array element matches + assertDirectAndThinClientMatch( + "SELECT * FROM c WHERE EXISTS (SELECT VALUE s FROM s IN c.scores WHERE s > 60)"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testExistsSubqueryWithStringMatch() { + // EXISTS on tags array with string match + assertDirectAndThinClientMatch( + "SELECT * FROM c WHERE EXISTS (SELECT VALUE t FROM t IN c.tags WHERE t = 'on-sale')"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testExistsAliasInProjection() { + // EXISTS aliased in SELECT — returns boolean column + assertDirectAndThinClientMatch( + "SELECT c.id, EXISTS (SELECT VALUE s FROM s IN c.scores WHERE s > 60) AS hasHighScore FROM c"); + } + + // ==================== LIKE Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testLikePrefix() { + // LIKE with prefix pattern + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.category LIKE 'elec%'"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testLikeSuffix() { + // LIKE with suffix pattern + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.category LIKE '%ing'"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testLikeContains() { + // LIKE with contains pattern (substring match via wildcards) + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.category LIKE '%ook%'"); + } + + // ==================== BETWEEN Keyword ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testBetween() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.age BETWEEN 18 AND 40"); + } + + // ==================== String Function Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringConcat() { + assertDirectAndThinClientMatch("SELECT CONCAT(c.category, '-', c.status) AS label FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringEndsWith() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE ENDSWITH(c.category, 'ics')"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringLower() { + assertDirectAndThinClientMatch("SELECT LOWER(c.category) AS lowerCat FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringUpper() { + assertDirectAndThinClientMatch("SELECT UPPER(c.status) AS upperStatus FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringLength() { + assertDirectAndThinClientMatch("SELECT c.category, LENGTH(c.category) AS len FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringSubstring() { + assertDirectAndThinClientMatch("SELECT SUBSTRING(c.category, 0, 4) AS prefix FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringReplace() { + assertDirectAndThinClientMatch("SELECT REPLACE(c.category, 'o', '0') AS replaced FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringIndexOf() { + assertDirectAndThinClientMatch("SELECT INDEX_OF(c.category, 'o') AS pos FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringLeft() { + assertDirectAndThinClientMatch("SELECT LEFT(c.category, 3) AS l FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringReverse() { + assertDirectAndThinClientMatch("SELECT REVERSE(c.category) AS rev FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStringTrim() { + assertDirectAndThinClientMatch("SELECT TRIM(c.status) AS trimmed FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testRegexMatch() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE RegexMatch(c.category, '^elec.*')"); + } + + // ==================== Type Checking Function Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIsArray() { + assertDirectAndThinClientMatch("SELECT c.id, IS_ARRAY(c.scores) AS isArr FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIsBool() { + assertDirectAndThinClientMatch("SELECT c.id, IS_BOOL(c.isActive) AS isBool FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIsNull() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE IS_NULL(c.nonExistentField)"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIsNumber() { + assertDirectAndThinClientMatch("SELECT c.id, IS_NUMBER(c.age) AS isNum FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIsString() { + assertDirectAndThinClientMatch("SELECT c.id, IS_STRING(c.category) AS isStr FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIsObject() { + assertDirectAndThinClientMatch("SELECT c.id, IS_OBJECT(c.address) AS isObj FROM c"); + } + + // ==================== Math Function Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMathAbs() { + assertDirectAndThinClientMatch("SELECT ABS(c.age - 30) AS diff FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMathCeilingFloor() { + assertDirectAndThinClientMatch("SELECT CEILING(c.price) AS ceil, FLOOR(c.price) AS flr FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMathRound() { + assertDirectAndThinClientMatch("SELECT ROUND(c.price) AS rounded FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMathPower() { + assertDirectAndThinClientMatch("SELECT POWER(c.age, 2) AS ageSq FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testMathSqrt() { + assertDirectAndThinClientMatch("SELECT SQRT(c.price) AS sqrtPrice FROM c"); + } + + // ==================== Array Function Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testArrayLength() { + assertDirectAndThinClientMatch("SELECT c.id, ARRAY_LENGTH(c.scores) AS len FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testArraySlice() { + assertDirectAndThinClientMatch("SELECT c.id, ARRAY_SLICE(c.tags, 0, 1) AS firstTag FROM c"); + } + + // ==================== Conditional Function Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testIif() { + assertDirectAndThinClientMatch("SELECT c.id, IIF(c.age >= 18, 'adult', 'minor') AS ageGroup FROM c"); + } + + // ==================== Date/Time Function Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testGetCurrentDateTime() { + // Only assert both paths return a non-empty ISO 8601 string — exact values + // will differ because gateway and proxy execute at slightly different times. + QueryResult gwResult = drainQuery(directContainer, + "SELECT VALUE GetCurrentDateTime()", partitionedOptions(), String.class); + QueryResult tcResult = drainQuery(thinClientContainer, + "SELECT VALUE GetCurrentDateTime()", partitionedOptions(), String.class); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + assertThat(gwResult.results.size()).isEqualTo(1); + assertThat(tcResult.results.size()).isEqualTo(1); + assertThat(gwResult.results.get(0)).matches("\\d{4}-\\d{2}-\\d{2}T.*Z"); + assertThat(tcResult.results.get(0)).matches("\\d{4}-\\d{2}-\\d{2}T.*Z"); + } + + // ==================== SELECT VALUE / Nested Projection Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testSelectValueObject() { + assertDirectAndThinClientMatch( + "SELECT VALUE { name: c.category, loc: c.address.city } FROM c"); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testSelectValueScalar() { + assertScalarDirectAndThinClientMatch("SELECT VALUE c.category FROM c", String.class); + } + + // ==================== Cross-Partition Tests ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testCrossPartitionSelectAll() { + assertDirectAndThinClientMatch("SELECT * FROM c ORDER BY c.idx", new CosmosQueryRequestOptions()); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testCrossPartitionWhereFilter() { + assertDirectAndThinClientMatch("SELECT * FROM c WHERE c.category = 'electronics' ORDER BY c.idx", + new CosmosQueryRequestOptions()); + } + + // ==================== Multi-EPK-Range Tests (Sort Validation) ==================== + // These tests use a dedicated 24,000 RU/s container (3 physical partitions) to ensure + // documents with different partition keys land on different physical partitions. + // After PartitionKeyInternal → EPK hash conversion, the sort in + // parseQueryRangesForThinClient() ensures RoutingMapProviderHelper.getOverlappingRanges() + // doesn't throw IllegalArgumentException for unsorted ranges. + + /** + * Helper: creates a 24K RU container, runs the test, deletes the container. + */ + private void runMultiRangeTest(String[] pkValues, String queryTemplate, int expectedCount) { + String containerId = "multiRange_" + UUID.randomUUID().toString().substring(0, 8); + CosmosAsyncDatabase db = directClient.getDatabase(directContainer.getDatabase().getId()); + CosmosAsyncContainer directTestContainer = db.getContainer(containerId); + + try { + PartitionKeyDefinition pkDef = new PartitionKeyDefinition(); + pkDef.setPaths(Collections.singletonList("/" + PK_FIELD)); + CosmosContainerProperties props = new CosmosContainerProperties(containerId, pkDef); + db.createContainer(props, ThroughputProperties.createManualThroughput(24000)).block(); + + CosmosAsyncContainer tcContainer = thinClient.getDatabase(db.getId()).getContainer(containerId); + + for (int i = 0; i < pkValues.length; i++) { + String docId = "mr-" + i + "-" + UUID.randomUUID().toString().substring(0, 8); + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, docId); + doc.put(PK_FIELD, pkValues[i]); + doc.put("idx", i); + doc.put("val", i * 100); + directTestContainer.createItem(doc, new PartitionKey(pkValues[i]), null).block(); + } + + String query = queryTemplate; + + QueryResult directResult = drainQuery(directTestContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(tcContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("Multi-range count mismatch for: " + query).isEqualTo(directResult.results.size()); + assertThat(tcResult.results.size()).isEqualTo(expectedCount); + + List directIds = directResult.results.stream().map(d -> d.get(ID_FIELD).asText()).sorted().collect(Collectors.toList()); + List tcIds = tcResult.results.stream().map(d -> d.get(ID_FIELD).asText()).sorted().collect(Collectors.toList()); + assertThat(tcIds).isEqualTo(directIds); + + } finally { + safeDeleteContainer(directTestContainer); + } + } + + /** + * Test: IN clause on partition key with 3 values → 3 disjoint EPK ranges across 3 physical partitions. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 3) + public void testMultiRangePartitionKeyInClause() { + String[] pkValues = {"pk-alpha", "pk-beta", "pk-gamma", "pk-delta", "pk-epsilon"}; + runMultiRangeTest(pkValues, + "SELECT * FROM c WHERE c.mypk IN ('pk-alpha', 'pk-gamma', 'pk-epsilon')", + 3); + } + + /** + * Test: OR on partition key values → 2 disjoint EPK ranges. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 3) + public void testMultiRangePartitionKeyOrClause() { + String[] pkValues = {"pk-or-1", "pk-or-2", "pk-or-3"}; + runMultiRangeTest(pkValues, + "SELECT * FROM c WHERE c.mypk = 'pk-or-1' OR c.mypk = 'pk-or-3'", + 2); + } + + /** + * Test: IN clause with 10 PK values → 10 disjoint EPK ranges, stress test for sort correctness. + * Uses UUID-based PK values to maximize EPK hash spread. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 3) + public void testMultiRangeManyPartitionKeys() { + String[] pkValues = new String[10]; + for (int i = 0; i < 10; i++) { + pkValues[i] = "pk-many-" + UUID.randomUUID().toString(); + } + + // Build IN clause dynamically from the random PK values + StringBuilder sb = new StringBuilder("SELECT * FROM c WHERE c.mypk IN ("); + for (int i = 0; i < pkValues.length; i++) { + if (i > 0) sb.append(", "); + sb.append("'").append(pkValues[i]).append("'"); + } + sb.append(")"); + runMultiRangeTest(pkValues, sb.toString(), 10); + } + + // ==================== Continuation Token Draining ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testContinuationTokenDraining() { + // Drain gateway fully for expected count + QueryResult gwResult = drainQuery(directContainer, "SELECT * FROM c", partitionedOptions(), ObjectNode.class); + + // Drain thin client with small page size to force multiple continuations + List tcAll = new ArrayList<>(); + List tcDiag = new ArrayList<>(); + String continuationToken = null; + int pageCount = 0; + int maxIterations = 100; + do { + Iterable> pages = thinClientContainer + .queryItems("SELECT * FROM c", partitionedOptions(), ObjectNode.class) + .byPage(continuationToken, 3) // small page size + .toIterable(); + for (FeedResponse page : pages) { + tcAll.addAll(page.getResults()); + tcDiag.add(page.getCosmosDiagnostics()); + continuationToken = page.getContinuationToken(); + pageCount++; + } + } while (continuationToken != null && --maxIterations > 0); + + for (CosmosDiagnostics d : tcDiag) { assertThinClientEndpointUsed(d); } + assertThat(pageCount).as("Should have multiple pages with page size 3").isGreaterThan(1); + assertThat(tcAll.size()).as("Continuation draining count mismatch").isEqualTo(gwResult.results.size()); + } + + // ==================== Invalid Query ==================== + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testInvalidQueryReturnsBadRequest() { + try { + thinClientContainer.queryItems("SELEC * FORM c", new CosmosQueryRequestOptions(), ObjectNode.class) + .byPage().blockFirst(); + fail("Expected exception for invalid query"); + } catch (CosmosException e) { + assertThat(e.getStatusCode() == 400 || e.getStatusCode() == 0) + .as("Invalid query should return 400 Bad Request or a thin-client transport rejection, got " + + e.getStatusCode()) + .isTrue(); + if (e.getStatusCode() == 0) { + assertThinClientEndpointUsed(e.getDiagnostics()); + } + logger.info("Expected error for invalid query: {} (status {})", e.getMessage(), e.getStatusCode()); + } + } + + // ==================== Vector Search ==================== + + /** + * Creates a vector-enabled container, runs VectorDistance query through both + * Direct and thin client, compares results. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 2) + public void testVectorSearchGatewayVsThinClient() { + String vectorContainerId = "vecCompare_" + UUID.randomUUID().toString().substring(0, 8); + CosmosAsyncDatabase db = directClient.getDatabase(directContainer.getDatabase().getId()); + CosmosAsyncContainer directVecContainer = db.getContainer(vectorContainerId); + + try { + PartitionKeyDefinition pkDef = new PartitionKeyDefinition(); + pkDef.setPaths(Collections.singletonList("/" + PK_FIELD)); + + CosmosContainerProperties props = new CosmosContainerProperties(vectorContainerId, pkDef); + + CosmosVectorEmbeddingPolicy policy = new CosmosVectorEmbeddingPolicy(); + CosmosVectorEmbedding emb = new CosmosVectorEmbedding(); + emb.setPath("/embedding"); + emb.setDataType(CosmosVectorDataType.FLOAT32); + emb.setEmbeddingDimensions(3); + emb.setDistanceFunction(CosmosVectorDistanceFunction.COSINE); + policy.setCosmosVectorEmbeddings(Collections.singletonList(emb)); + props.setVectorEmbeddingPolicy(policy); + + IndexingPolicy idxPolicy = new IndexingPolicy(); + idxPolicy.setIndexingMode(IndexingMode.CONSISTENT); + idxPolicy.setIncludedPaths(Collections.singletonList(new IncludedPath("/*"))); + idxPolicy.setExcludedPaths(Arrays.asList(new ExcludedPath("/embedding/*"), new ExcludedPath("/\"_etag\"/?"))); + CosmosVectorIndexSpec vecIdx = new CosmosVectorIndexSpec(); + vecIdx.setPath("/embedding"); + vecIdx.setType(CosmosVectorIndexType.FLAT.toString()); + idxPolicy.setVectorIndexes(Collections.singletonList(vecIdx)); + props.setIndexingPolicy(idxPolicy); + + db.createContainer(props).block(); + CosmosAsyncContainer tcVecContainer = thinClient.getDatabase(db.getId()).getContainer(vectorContainerId); + + double[][] embeddings = { + {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, + {1.0, 1.0, 0.0}, {0.9, 0.1, 0.0}, + }; + + String vecPk = UUID.randomUUID().toString(); + List docIds = new ArrayList<>(); + for (int i = 0; i < embeddings.length; i++) { + String docId = "vec_" + i + "_" + UUID.randomUUID().toString().substring(0, 8); + docIds.add(docId); + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, docId); + doc.put(PK_FIELD, vecPk); + doc.put("text", "document " + i); + ArrayNode arr = doc.putArray("embedding"); + for (double v : embeddings[i]) { arr.add(v); } + directVecContainer.createItem(doc, new PartitionKey(vecPk), null).block(); + } + + String query = "SELECT TOP 5 c.id, c.text, VectorDistance(c.embedding, [1.0, 0.0, 0.0]) AS score " + + "FROM c ORDER BY VectorDistance(c.embedding, [1.0, 0.0, 0.0])"; + + QueryResult directResult = drainQuery(directVecContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(tcVecContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).isEqualTo(directResult.results.size()); + assertThat(tcResult.results.size()).isEqualTo(5); + + for (int i = 0; i < directResult.results.size(); i++) { + assertThat(tcResult.results.get(i).get("id").asText()).isEqualTo(directResult.results.get(i).get("id").asText()); + } + + assertThat(tcResult.results.get(0).get("id").asText()).isEqualTo(docIds.get(0)); + assertThat(tcResult.results.get(0).get("score").asDouble()).isGreaterThan(0.99); + + // Euclidean variant — validates ORDER BY score semantics with a different distance function + String euclideanQuery = "SELECT TOP 5 c.id, VectorDistance(c.embedding, [1.0, 0.0, 0.0], false, {'distanceFunction':'euclidean'}) AS score " + + "FROM c ORDER BY VectorDistance(c.embedding, [1.0, 0.0, 0.0], false, {'distanceFunction':'euclidean'})"; + + QueryResult directEuclidean = drainQuery(directVecContainer, euclideanQuery, new CosmosQueryRequestOptions(), ObjectNode.class); + QueryResult tcEuclidean = drainQuery(tcVecContainer, euclideanQuery, new CosmosQueryRequestOptions(), ObjectNode.class); + + for (CosmosDiagnostics d : tcEuclidean.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcEuclidean.results.size()).isEqualTo(directEuclidean.results.size()); + assertThat(tcEuclidean.results.size()).isEqualTo(5); + + for (int i = 0; i < directEuclidean.results.size(); i++) { + assertThat(tcEuclidean.results.get(i).get("id").asText()) + .as("Euclidean vector search result mismatch at position " + i) + .isEqualTo(directEuclidean.results.get(i).get("id").asText()); + } + + } finally { + safeDeleteContainer(directVecContainer); + } + } + + // ==================== Full-Text Search ==================== + + /** + * Creates a container with full-text policy and index, runs FullTextContains query. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 2) + public void testFullTextSearchGatewayVsThinClient() { + String containerId = "ftsCompare_" + UUID.randomUUID().toString().substring(0, 8); + CosmosAsyncDatabase db = directClient.getDatabase(directContainer.getDatabase().getId()); + CosmosAsyncContainer directFtsContainer = db.getContainer(containerId); + + try { + PartitionKeyDefinition pkDef = new PartitionKeyDefinition(); + pkDef.setPaths(Collections.singletonList("/" + PK_FIELD)); + + CosmosContainerProperties props = new CosmosContainerProperties(containerId, pkDef); + + CosmosFullTextPath ftPath = new CosmosFullTextPath(); + ftPath.setPath("/text"); + ftPath.setLanguage("en-US"); + CosmosFullTextPolicy ftPolicy = new CosmosFullTextPolicy(); + ftPolicy.setDefaultLanguage("en-US"); + ftPolicy.setPaths(Collections.singletonList(ftPath)); + props.setFullTextPolicy(ftPolicy); + + IndexingPolicy idxPolicy = new IndexingPolicy(); + idxPolicy.setIndexingMode(IndexingMode.CONSISTENT); + idxPolicy.setIncludedPaths(Collections.singletonList(new IncludedPath("/*"))); + idxPolicy.setExcludedPaths(Collections.singletonList(new ExcludedPath("/\"_etag\"/?"))); + CosmosFullTextIndex ftIndex = new CosmosFullTextIndex(); + ftIndex.setPath("/text"); + idxPolicy.setCosmosFullTextIndexes(Collections.singletonList(ftIndex)); + props.setIndexingPolicy(idxPolicy); + + db.createContainer(props).block(); + CosmosAsyncContainer tcFtsContainer = thinClient.getDatabase(db.getId()).getContainer(containerId); + + String ftsPk = UUID.randomUUID().toString(); + String[] texts = { + "The quick brown fox jumps over the lazy dog", + "A red bicycle parked near the mountain trail", + "Electronic devices on sale at the downtown store", + "Mountain biking trails with scenic views", + "The lazy cat sleeps on the warm brown couch" + }; + for (int i = 0; i < texts.length; i++) { + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, "fts_" + i + "_" + UUID.randomUUID().toString().substring(0, 8)); + doc.put(PK_FIELD, ftsPk); + doc.put("text", texts[i]); + directFtsContainer.createItem(doc, new PartitionKey(ftsPk), null).block(); + } + + String query = "SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, 'mountain')"; + + QueryResult directResult = drainQuery(directFtsContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(tcFtsContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + assertThat(directResult.results.size()).as("Full-text query should return results").isPositive(); + assertThat(tcResult.results.size()).isEqualTo(directResult.results.size()); + + List directIds = directResult.results.stream().map(d -> d.get("id").asText()).sorted().collect(Collectors.toList()); + List tcIds = tcResult.results.stream().map(d -> d.get("id").asText()).sorted().collect(Collectors.toList()); + assertThat(tcIds).isEqualTo(directIds); + + } finally { + safeDeleteContainer(directFtsContainer); + } + } + + /** + * Creates a container with full-text policy and index, runs ORDER BY RANK FullTextScore query. + * Compares exact ordering between Direct and thin client. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 2) + public void testFullTextScoreRanking() { + String containerId = "ftsRank_" + UUID.randomUUID().toString().substring(0, 8); + CosmosAsyncDatabase db = directClient.getDatabase(directContainer.getDatabase().getId()); + CosmosAsyncContainer directFtsContainer = db.getContainer(containerId); + + try { + PartitionKeyDefinition pkDef = new PartitionKeyDefinition(); + pkDef.setPaths(Collections.singletonList("/" + PK_FIELD)); + + CosmosContainerProperties props = new CosmosContainerProperties(containerId, pkDef); + + CosmosFullTextPath ftPath = new CosmosFullTextPath(); + ftPath.setPath("/text"); + ftPath.setLanguage("en-US"); + CosmosFullTextPolicy ftPolicy = new CosmosFullTextPolicy(); + ftPolicy.setDefaultLanguage("en-US"); + ftPolicy.setPaths(Collections.singletonList(ftPath)); + props.setFullTextPolicy(ftPolicy); + + IndexingPolicy idxPolicy = new IndexingPolicy(); + idxPolicy.setIndexingMode(IndexingMode.CONSISTENT); + idxPolicy.setIncludedPaths(Collections.singletonList(new IncludedPath("/*"))); + idxPolicy.setExcludedPaths(Collections.singletonList(new ExcludedPath("/\"_etag\"/?"))); + CosmosFullTextIndex ftIndex = new CosmosFullTextIndex(); + ftIndex.setPath("/text"); + idxPolicy.setCosmosFullTextIndexes(Collections.singletonList(ftIndex)); + props.setIndexingPolicy(idxPolicy); + + db.createContainer(props).block(); + CosmosAsyncContainer tcFtsContainer = thinClient.getDatabase(db.getId()).getContainer(containerId); + + String ftsPk = UUID.randomUUID().toString(); + String[] texts = { + "The quick brown fox jumps over the lazy dog", + "A red bicycle parked near the mountain trail", + "Electronic devices on sale at the downtown store", + "Mountain biking trails with scenic views", + "The lazy cat sleeps on the warm brown couch" + }; + for (int i = 0; i < texts.length; i++) { + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, "ftsRank_" + i + "_" + UUID.randomUUID().toString().substring(0, 8)); + doc.put(PK_FIELD, ftsPk); + doc.put("text", texts[i]); + directFtsContainer.createItem(doc, new PartitionKey(ftsPk), null).block(); + } + + String query = "SELECT TOP 5 * FROM c ORDER BY RANK FullTextScore(c.text, 'mountain')"; + + QueryResult directResult = drainQuery(directFtsContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(tcFtsContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + assertThat(directResult.results.size()).as("FullTextScore ranking query should return results").isPositive(); + assertThat(tcResult.results.size()).isEqualTo(directResult.results.size()); + + for (int i = 0; i < directResult.results.size(); i++) { + assertThat(tcResult.results.get(i).get("id").asText()) + .as("FullTextScore ranking result mismatch at position " + i) + .isEqualTo(directResult.results.get(i).get("id").asText()); + } + + } finally { + safeDeleteContainer(directFtsContainer); + } + } + + // ==================== Hybrid Search ==================== + + /** + * Creates a container with vector + full-text policies, runs hybrid RRF query. + */ + @Test(groups = {"thinclient"}, timeOut = TIMEOUT * 2) + public void testHybridSearchGatewayVsThinClient() { + String containerId = "hybridCompare_" + UUID.randomUUID().toString().substring(0, 8); + CosmosAsyncDatabase db = directClient.getDatabase(directContainer.getDatabase().getId()); + CosmosAsyncContainer directHybridContainer = db.getContainer(containerId); + + try { + PartitionKeyDefinition pkDef = new PartitionKeyDefinition(); + pkDef.setPaths(Collections.singletonList("/" + PK_FIELD)); + + CosmosContainerProperties props = new CosmosContainerProperties(containerId, pkDef); + + CosmosVectorEmbeddingPolicy vecPolicy = new CosmosVectorEmbeddingPolicy(); + CosmosVectorEmbedding emb = new CosmosVectorEmbedding(); + emb.setPath("/vector"); + emb.setDataType(CosmosVectorDataType.FLOAT32); + emb.setEmbeddingDimensions(3); + emb.setDistanceFunction(CosmosVectorDistanceFunction.COSINE); + vecPolicy.setCosmosVectorEmbeddings(Collections.singletonList(emb)); + props.setVectorEmbeddingPolicy(vecPolicy); + + CosmosFullTextPath ftPath = new CosmosFullTextPath(); + ftPath.setPath("/text"); + ftPath.setLanguage("en-US"); + CosmosFullTextPolicy ftPolicy = new CosmosFullTextPolicy(); + ftPolicy.setDefaultLanguage("en-US"); + ftPolicy.setPaths(Collections.singletonList(ftPath)); + props.setFullTextPolicy(ftPolicy); + + IndexingPolicy idxPolicy = new IndexingPolicy(); + idxPolicy.setIndexingMode(IndexingMode.CONSISTENT); + idxPolicy.setIncludedPaths(Collections.singletonList(new IncludedPath("/*"))); + idxPolicy.setExcludedPaths(Arrays.asList(new ExcludedPath("/vector/*"), new ExcludedPath("/\"_etag\"/?"))); + CosmosVectorIndexSpec vecIdx = new CosmosVectorIndexSpec(); + vecIdx.setPath("/vector"); + vecIdx.setType(CosmosVectorIndexType.FLAT.toString()); + idxPolicy.setVectorIndexes(Collections.singletonList(vecIdx)); + CosmosFullTextIndex ftIndex = new CosmosFullTextIndex(); + ftIndex.setPath("/text"); + idxPolicy.setCosmosFullTextIndexes(Collections.singletonList(ftIndex)); + props.setIndexingPolicy(idxPolicy); + + db.createContainer(props).block(); + CosmosAsyncContainer tcHybridContainer = thinClient.getDatabase(db.getId()).getContainer(containerId); + + String hybridPk = UUID.randomUUID().toString(); + String[] texts = { + "Red bicycle on the mountain trail", + "Blue car parked in the city", + "Green bicycle near the lake" + }; + double[][] vectors = { + {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0} + }; + for (int i = 0; i < texts.length; i++) { + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, "hybrid_" + i + "_" + UUID.randomUUID().toString().substring(0, 8)); + doc.put(PK_FIELD, hybridPk); + doc.put("text", texts[i]); + ArrayNode arr = doc.putArray("vector"); + for (double v : vectors[i]) { arr.add(v); } + directHybridContainer.createItem(doc, new PartitionKey(hybridPk), null).block(); + } + + String query = "SELECT TOP 3 * FROM c " + + "ORDER BY RANK RRF(VectorDistance(c.vector, [1.0, 0.0, 0.0]), FullTextScore(c.text, 'bicycle'))"; + + QueryResult directResult = drainQuery(directHybridContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(tcHybridContainer, query, new CosmosQueryRequestOptions(), ObjectNode.class); + + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + assertThat(tcResult.results.size()).isEqualTo(directResult.results.size()); + + for (int i = 0; i < directResult.results.size(); i++) { + assertThat(tcResult.results.get(i).get("id").asText()) + .as("Hybrid search result mismatch at position " + i) + .isEqualTo(directResult.results.get(i).get("id").asText()); + } + + } finally { + safeDeleteContainer(directHybridContainer); + } + } + + // ==================== Assertion & Drain Helpers ==================== + + private static void safeDeleteContainer(CosmosAsyncContainer container) { + if (container != null) { + try { container.delete().block(); } catch (Exception e) { logger.warn("Container cleanup failed: {}", e.getMessage()); } + } + } + + /** Holds query results and per-page diagnostics from a fully drained query. */ + private static class QueryResult { + final List results = new ArrayList<>(); + final List diagnostics = new ArrayList<>(); + } + + private CosmosQueryRequestOptions partitionedOptions() { + CosmosQueryRequestOptions opts = new CosmosQueryRequestOptions(); + opts.setPartitionKey(new PartitionKey(commonPk)); + return opts; + } + + private QueryResult drainQuery(CosmosAsyncContainer c, String query, CosmosQueryRequestOptions opts, Class type) { + QueryResult result = new QueryResult<>(); + for (FeedResponse page : c.queryItems(query, opts, type).byPage().toIterable()) { + result.results.addAll(page.getResults()); + result.diagnostics.add(page.getCosmosDiagnostics()); + } + return result; + } + + private QueryResult drainQuery(CosmosAsyncContainer c, SqlQuerySpec qs, CosmosQueryRequestOptions opts, Class type) { + QueryResult result = new QueryResult<>(); + for (FeedResponse page : c.queryItems(qs, opts, type).byPage().toIterable()) { + result.results.addAll(page.getResults()); + result.diagnostics.add(page.getCosmosDiagnostics()); + } + return result; + } + + /** + * Direct vs thin client comparison: run query via both Direct TCP and thin client. + * Assert: (1) thin client used :10250, (2) same count, (3) same document IDs in order. + */ + private void assertDirectAndThinClientMatch(String query) { + assertDirectAndThinClientMatch(query, partitionedOptions()); + } + + private void assertDirectAndThinClientMatch(String query, CosmosQueryRequestOptions options) { + QueryResult gwResult = drainQuery(directContainer, query, options, ObjectNode.class); + QueryResult tcResult = drainQuery(thinClientContainer, query, options, ObjectNode.class); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("Count mismatch: " + query).isEqualTo(gwResult.results.size()); + + List gwIds = gwResult.results.stream().filter(d -> d.has(ID_FIELD)).map(d -> d.get(ID_FIELD).asText()).collect(Collectors.toList()); + List tcIds = tcResult.results.stream().filter(d -> d.has(ID_FIELD)).map(d -> d.get(ID_FIELD).asText()).collect(Collectors.toList()); + assertThat(tcIds).as("IDs mismatch: " + query).isEqualTo(gwIds); + } + + private void assertDirectAndThinClientMatch(SqlQuerySpec querySpec, CosmosQueryRequestOptions options) { + QueryResult gwResult = drainQuery(directContainer, querySpec, options, ObjectNode.class); + QueryResult tcResult = drainQuery(thinClientContainer, querySpec, options, ObjectNode.class); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("Count mismatch: " + querySpec.getQueryText()).isEqualTo(gwResult.results.size()); + + List gwIds = gwResult.results.stream().filter(d -> d.has(ID_FIELD)).map(d -> d.get(ID_FIELD).asText()).collect(Collectors.toList()); + List tcIds = tcResult.results.stream().filter(d -> d.has(ID_FIELD)).map(d -> d.get(ID_FIELD).asText()).collect(Collectors.toList()); + assertThat(tcIds).as("IDs mismatch: " + querySpec.getQueryText()).isEqualTo(gwIds); + } + + private void assertScalarDirectAndThinClientMatch(String query, Class resultType) { + assertScalarDirectAndThinClientMatch(query, partitionedOptions(), resultType); + } + + private void assertScalarDirectAndThinClientMatch(String query, CosmosQueryRequestOptions options, Class resultType) { + QueryResult gwResult = drainQuery(directContainer, query, options, resultType); + QueryResult tcResult = drainQuery(thinClientContainer, query, options, resultType); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("Scalar count mismatch: " + query).isEqualTo(gwResult.results.size()); + for (int i = 0; i < gwResult.results.size(); i++) { + assertThat(tcResult.results.get(i).toString()).as("Scalar value mismatch at " + i + ": " + query) + .isEqualTo(gwResult.results.get(i).toString()); + } + } + + /** Direct vs thin client comparison for GROUP BY where result order may vary — compare as sets. */ + private void assertGroupByDirectAndThinClientMatch(String query, String groupField) { + QueryResult gwResult = drainQuery(directContainer, query, partitionedOptions(), ObjectNode.class); + QueryResult tcResult = drainQuery(thinClientContainer, query, partitionedOptions(), ObjectNode.class); + for (CosmosDiagnostics d : tcResult.diagnostics) { assertThinClientEndpointUsed(d); } + + assertThat(tcResult.results.size()).as("GROUP BY count mismatch: " + query).isEqualTo(gwResult.results.size()); + for (ObjectNode gwRow : gwResult.results) { + String key = gwRow.get(groupField).asText(); + boolean found = tcResult.results.stream().anyMatch(tc -> tc.get(groupField).asText().equals(key) + && tc.toString().equals(gwRow.toString())); + assertThat(found).as("GROUP BY row not found in thin client results: " + key).isTrue(); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientStoredProcedureE2ETest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientStoredProcedureE2ETest.java new file mode 100644 index 000000000000..6af552954082 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientStoredProcedureE2ETest.java @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.rx; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.models.CosmosStoredProcedureProperties; +import com.azure.cosmos.models.CosmosStoredProcedureRequestOptions; +import com.azure.cosmos.models.CosmosStoredProcedureResponse; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.PartitionKey; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.UUID; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.assertj.core.api.Fail.fail; + +/** + * Thin client E2E tests for stored procedure execution. + * Container is truncated in {@code @BeforeClass} — no per-test cleanup needed. + */ +public class ThinClientStoredProcedureE2ETest extends ThinClientTestBase { + + @Factory(dataProvider = "clientBuildersWithGatewayAndHttp2") + public ThinClientStoredProcedureE2ETest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientStoredProcedure() { + String sprocId = "createDocSproc_" + UUID.randomUUID(); + String pkValue = UUID.randomUUID().toString(); + String docId = UUID.randomUUID().toString(); + + CosmosStoredProcedureProperties storedProcedureDef = new CosmosStoredProcedureProperties( + sprocId, + "function createDocument(docToCreate) {" + + "var context = getContext();" + + "var container = context.getCollection();" + + "var response = context.getResponse();" + + "var accepted = container.createDocument(" + + " container.getSelfLink()," + + " docToCreate," + + " function(err, docCreated) {" + + " if (err) throw new Error('Error creating document: ' + err.message);" + + " response.setBody(docCreated);" + + " });" + + "if (!accepted) throw new Error('Document creation was not accepted');" + + "}" + ); + + CosmosStoredProcedureResponse createResponse = container.getScripts() + .createStoredProcedure(storedProcedureDef).block(); + assertThat(createResponse).isNotNull(); + assertThat(createResponse.getStatusCode()).isEqualTo(201); + + CosmosStoredProcedureRequestOptions options = new CosmosStoredProcedureRequestOptions(); + options.setPartitionKey(new PartitionKey(pkValue)); + + ObjectNode docToCreate = createTestDocument(docId, pkValue); + + CosmosStoredProcedureResponse executeResponse = container.getScripts() + .getStoredProcedure(sprocId) + .execute(Arrays.asList(docToCreate), options).block(); + + assertThat(executeResponse).isNotNull(); + assertThat(executeResponse.getStatusCode()).isEqualTo(200); + assertThat(executeResponse.getRequestCharge()).isGreaterThan(0.0); + assertThinClientEndpointUsed(executeResponse.getDiagnostics()); + + CosmosItemResponse readResponse = container.readItem(docId, new PartitionKey(pkValue), ObjectNode.class).block(); + assertThat(readResponse).isNotNull(); + assertThat(readResponse.getItem().get(ID_FIELD).asText()).isEqualTo(docId); + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testStoredProcedureExecutionWithoutPartitionKeyThrows() { + String sprocId = "noPartitionKeySproc_" + UUID.randomUUID(); + + CosmosStoredProcedureProperties storedProcedureDef = new CosmosStoredProcedureProperties( + sprocId, "function() { getContext().getResponse().setBody('Hello'); }"); + + container.getScripts().createStoredProcedure(storedProcedureDef).block(); + + CosmosStoredProcedureRequestOptions options = new CosmosStoredProcedureRequestOptions(); + + try { + container.getScripts().getStoredProcedure(sprocId).execute(null, options).block(); + fail("Expected UnsupportedOperationException for sproc execution without partition key"); + } catch (UnsupportedOperationException e) { + assertThat(e.getMessage()).contains("PartitionKey value must be supplied"); + } + } + + @Test(groups = {"thinclient"}, timeOut = TIMEOUT) + public void testThinClientStoredProcedureWithPartitionKeyNone() { + String sprocId = "pkNoneSproc_" + UUID.randomUUID(); + + CosmosStoredProcedureProperties storedProcedureDef = new CosmosStoredProcedureProperties( + sprocId, "function() { getContext().getResponse().setBody('Hello from PK.NONE'); }"); + + container.getScripts().createStoredProcedure(storedProcedureDef).block(); + + CosmosStoredProcedureRequestOptions options = new CosmosStoredProcedureRequestOptions(); + options.setPartitionKey(PartitionKey.NONE); + + CosmosStoredProcedureResponse executeResponse = container.getScripts() + .getStoredProcedure(sprocId).execute(null, options).block(); + + assertThat(executeResponse).isNotNull(); + assertThat(executeResponse.getStatusCode()).isEqualTo(200); + assertThat(executeResponse.getRequestCharge()).isGreaterThan(0.0); + assertThinClientEndpointUsed(executeResponse.getDiagnostics()); + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientTestBase.java new file mode 100644 index 000000000000..379d12bd3720 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ThinClientTestBase.java @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.rx; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.CosmosDiagnosticsRequestInfo; +import com.azure.cosmos.CosmosClientBuilder; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +import java.util.Collection; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +/** + * Base class for thin client E2E tests. Provides shared setup/teardown, + * constants, and helper methods common to all thin client test classes. + */ +public abstract class ThinClientTestBase extends TestSuiteBase { + + protected static final String THIN_CLIENT_ENDPOINT_INDICATOR = ":10250/"; + protected static final String ID_FIELD = "id"; + protected static final String PARTITION_KEY_FIELD = "mypk"; + protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + protected CosmosAsyncClient client; + protected CosmosAsyncContainer container; + + protected ThinClientTestBase(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"thinclient"}, timeOut = SETUP_TIMEOUT) + public void before_ThinClientTest() { + assertThat(this.client).isNull(); + enableThinClientForTest(); + this.client = getClientBuilder().buildAsyncClient(); + this.container = getSharedMultiPartitionCosmosContainer(this.client); + + // Clean up shared container to prevent cross-test-class pollution. + cleanUpContainer(this.container); + } + + @AfterClass(groups = {"thinclient"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + clearThinClientForTest(); + if (this.client != null) { + this.client.close(); + } + } + + protected static void enableThinClientForTest() { + System.setProperty("COSMOS.THINCLIENT_ENABLED", "true"); + } + + protected static void clearThinClientForTest() { + System.clearProperty("COSMOS.THINCLIENT_ENABLED"); + } + + /** + * Creates a test document with id and mypk fields (matching shared container partition key). + */ + protected ObjectNode createTestDocument(String id, String mypk) { + ObjectNode doc = OBJECT_MAPPER.createObjectNode(); + doc.put(ID_FIELD, id); + doc.put(PARTITION_KEY_FIELD, mypk); + return doc; + } + + /** + * Asserts that all requests in the diagnostics were routed through the thin client endpoint. + */ + protected static void assertThinClientEndpointUsed(CosmosDiagnostics diagnostics) { + assertThat(diagnostics).isNotNull(); + CosmosDiagnosticsContext ctx = diagnostics.getDiagnosticsContext(); + assertThat(ctx).isNotNull(); + Collection requests = ctx.getRequestInfo(); + assertThat(requests).isNotNull(); + assertThat(requests.size()).isPositive(); + int requestCountAgainstThinClientEndpoint = 0; + for (CosmosDiagnosticsRequestInfo requestInfo : requests) { + if (requestInfo.getEndpoint().contains(THIN_CLIENT_ENDPOINT_INDICATOR)) { + requestCountAgainstThinClientEndpoint++; + } + } + assertThat(requestCountAgainstThinClientEndpoint).isEqualTo(requests.size()); + } + + /** + * Asserts that NO requests in the diagnostics were routed through the thin client endpoint, + * confirming the gateway client used the standard :443 path. + */ + protected static void assertGatewayEndpointUsed(CosmosDiagnostics diagnostics) { + assertThat(diagnostics).isNotNull(); + CosmosDiagnosticsContext ctx = diagnostics.getDiagnosticsContext(); + assertThat(ctx).isNotNull(); + Collection requests = ctx.getRequestInfo(); + assertThat(requests).isNotNull(); + assertThat(requests.size()).isPositive(); + for (CosmosDiagnosticsRequestInfo requestInfo : requests) { + assertThat(requestInfo.getEndpoint()) + .as("Gateway client must not route through thin client endpoint, but found: " + requestInfo.getEndpoint()) + .doesNotContain(THIN_CLIENT_ENDPOINT_INDICATOR); + } + } +} diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 5146e18db2e8..9730a18dac8e 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -9,6 +9,7 @@ #### Bugs Fixed #### Other Changes +* Enabled Gateway V2 (thin-client) data-plane routing by default for eligible Gateway-mode clients (`COSMOS.THINCLIENT_ENABLED=true`); gated by an HTTP/2 connectivity probe with automatic fallback to Gateway V1. ### 4.81.0 (2026-06-08) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index cf1a812e10f2..1b2539e7681b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -51,10 +51,34 @@ public class Configs { private static final String DEFAULT_THINCLIENT_ENDPOINT = ""; private static final String THINCLIENT_ENDPOINT = "COSMOS.THINCLIENT_ENDPOINT"; private static final String THINCLIENT_ENDPOINT_VARIABLE = "COSMOS_THINCLIENT_ENDPOINT"; - private static final boolean DEFAULT_THINCLIENT_ENABLED = false; + private static final boolean DEFAULT_THINCLIENT_ENABLED = true; private static final String THINCLIENT_ENABLED = "COSMOS.THINCLIENT_ENABLED"; private static final String THINCLIENT_ENABLED_VARIABLE = "COSMOS_THINCLIENT_ENABLED"; + // Thin-client connectivity probe (POST /connectivity-probe over HTTP/2 to each thin-client regional endpoint). + // The probe runs after every successful account-topology refresh. When the cycle is RED for + // THINCLIENT_PROBE_FAILURE_THRESHOLD consecutive cycles, the SDK falls back to Gateway V1 for data-plane + // requests until a subsequent cycle is GREEN. See proxy contract: only HTTP 200 counts as GREEN. + private static final boolean DEFAULT_THINCLIENT_PROBE_ENABLED = true; + private static final String THINCLIENT_PROBE_ENABLED = "COSMOS.THINCLIENT_PROBE_ENABLED"; + private static final String THINCLIENT_PROBE_ENABLED_VARIABLE = "COSMOS_THINCLIENT_PROBE_ENABLED"; + + private static final int DEFAULT_THINCLIENT_PROBE_FAILURE_THRESHOLD = 1; + private static final String THINCLIENT_PROBE_FAILURE_THRESHOLD = "COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD"; + private static final String THINCLIENT_PROBE_FAILURE_THRESHOLD_VARIABLE = "COSMOS_THINCLIENT_PROBE_FAILURE_THRESHOLD"; + + // Number of consecutive GREEN probe cycles required to restore proxy-healthy after + // a RED-flip. Default 1 preserves the optimistic-recovery behavior shipped in 4.82.0-beta.1. + // Raise this (e.g. to match THINCLIENT_PROBE_FAILURE_THRESHOLD) to reduce routing oscillation + // when a region is intermittently flapping. + private static final int DEFAULT_THINCLIENT_PROBE_RECOVERY_THRESHOLD = 1; + private static final String THINCLIENT_PROBE_RECOVERY_THRESHOLD = "COSMOS.THINCLIENT_PROBE_RECOVERY_THRESHOLD"; + private static final String THINCLIENT_PROBE_RECOVERY_THRESHOLD_VARIABLE = "COSMOS_THINCLIENT_PROBE_RECOVERY_THRESHOLD"; + + private static final String DEFAULT_THINCLIENT_PROBE_PATH = "/connectivity-probe"; + private static final String THINCLIENT_PROBE_PATH = "COSMOS.THINCLIENT_PROBE_PATH"; + private static final String THINCLIENT_PROBE_PATH_VARIABLE = "COSMOS_THINCLIENT_PROBE_PATH"; + private static final boolean DEFAULT_NETTY_HTTP_CLIENT_METRICS_ENABLED = false; private static final String NETTY_HTTP_CLIENT_METRICS_ENABLED = "COSMOS.NETTY_HTTP_CLIENT_METRICS_ENABLED"; private static final String NETTY_HTTP_CLIENT_METRICS_ENABLED_VARIABLE = "COSMOS_NETTY_HTTP_CLIENT_METRICS_ENABLED"; @@ -585,6 +609,130 @@ public static boolean isThinClientEnabled() { return DEFAULT_THINCLIENT_ENABLED; } + /** + * Returns whether the thin-client connectivity probe is enabled. When true, the SDK + * issues {@code POST /connectivity-probe} against every thin-client regional endpoint + * after each topology refresh and gates data-plane routing on the result. + * Default: true. Override with {@code COSMOS.THINCLIENT_PROBE_ENABLED} or + * {@code COSMOS_THINCLIENT_PROBE_ENABLED}. + */ + public static boolean isThinClientProbeEnabled() { + String valueFromSystemProperty = System.getProperty(THINCLIENT_PROBE_ENABLED); + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + return Boolean.parseBoolean(valueFromSystemProperty); + } + + String valueFromEnvVariable = System.getenv(THINCLIENT_PROBE_ENABLED_VARIABLE); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + return Boolean.parseBoolean(valueFromEnvVariable); + } + + return DEFAULT_THINCLIENT_PROBE_ENABLED; + } + + /** + * Number of consecutive probe cycles that must be RED before the SDK flips data-plane + * routing from the thin-client proxy back to Gateway V1. A single GREEN cycle resets + * the counter. Default: 1. Override with {@code COSMOS.THINCLIENT_PROBE_FAILURE_THRESHOLD} + * or {@code COSMOS_THINCLIENT_PROBE_FAILURE_THRESHOLD}. Values less than 1 are coerced to 1. + */ + public static int getThinClientProbeFailureThreshold() { + int value = DEFAULT_THINCLIENT_PROBE_FAILURE_THRESHOLD; + + String valueFromSystemProperty = System.getProperty(THINCLIENT_PROBE_FAILURE_THRESHOLD); + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + try { + value = Integer.parseInt(valueFromSystemProperty); + } catch (NumberFormatException ignored) { + logger.warn( + "Invalid non-numeric value '{}' for system property {}. Falling back to environment variable or default.", + valueFromSystemProperty, + THINCLIENT_PROBE_FAILURE_THRESHOLD); + valueFromSystemProperty = null; + } + } + + if (valueFromSystemProperty == null || valueFromSystemProperty.isEmpty()) { + String valueFromEnvVariable = System.getenv(THINCLIENT_PROBE_FAILURE_THRESHOLD_VARIABLE); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + try { + value = Integer.parseInt(valueFromEnvVariable); + } catch (NumberFormatException ignored) { + logger.warn( + "Invalid non-numeric value '{}' for environment variable {}. Falling back to default: {}.", + valueFromEnvVariable, + THINCLIENT_PROBE_FAILURE_THRESHOLD_VARIABLE, + DEFAULT_THINCLIENT_PROBE_FAILURE_THRESHOLD); + } + } + } + + return Math.max(1, value); + } + + /** + * Number of consecutive GREEN probe cycles required to restore data-plane routing + * back to the thin-client proxy after the SDK has flipped to Gateway V1. Default: 1 + * (a single GREEN cycle restores). Raise this (e.g. to match + * {@link #getThinClientProbeFailureThreshold()}) to reduce routing oscillation when a + * region is intermittently flapping. Override with + * {@code COSMOS.THINCLIENT_PROBE_RECOVERY_THRESHOLD} or + * {@code COSMOS_THINCLIENT_PROBE_RECOVERY_THRESHOLD}. Values less than 1 are coerced to 1. + */ + public static int getThinClientProbeRecoveryThreshold() { + int value = DEFAULT_THINCLIENT_PROBE_RECOVERY_THRESHOLD; + + String valueFromSystemProperty = System.getProperty(THINCLIENT_PROBE_RECOVERY_THRESHOLD); + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + try { + value = Integer.parseInt(valueFromSystemProperty); + } catch (NumberFormatException ignored) { + logger.warn( + "Invalid non-numeric value '{}' for system property {}. Falling back to environment variable or default.", + valueFromSystemProperty, + THINCLIENT_PROBE_RECOVERY_THRESHOLD); + valueFromSystemProperty = null; + } + } + + if (valueFromSystemProperty == null || valueFromSystemProperty.isEmpty()) { + String valueFromEnvVariable = System.getenv(THINCLIENT_PROBE_RECOVERY_THRESHOLD_VARIABLE); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + try { + value = Integer.parseInt(valueFromEnvVariable); + } catch (NumberFormatException ignored) { + logger.warn( + "Invalid non-numeric value '{}' for environment variable {}. Falling back to default: {}.", + valueFromEnvVariable, + THINCLIENT_PROBE_RECOVERY_THRESHOLD_VARIABLE, + DEFAULT_THINCLIENT_PROBE_RECOVERY_THRESHOLD); + } + } + } + + return Math.max(1, value); + } + + /** + * URL path for the thin-client connectivity probe. Default: {@code /connectivity-probe} + * (matches proxy contract from CosmosDB PR 2107592). Override with + * {@code COSMOS.THINCLIENT_PROBE_PATH} or {@code COSMOS_THINCLIENT_PROBE_PATH}; intended + * primarily for testing. + */ + public static String getThinClientProbePath() { + String valueFromSystemProperty = System.getProperty(THINCLIENT_PROBE_PATH); + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + return valueFromSystemProperty; + } + + String valueFromEnvVariable = System.getenv(THINCLIENT_PROBE_PATH_VARIABLE); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + return valueFromEnvVariable; + } + + return DEFAULT_THINCLIENT_PROBE_PATH; + } + public static boolean isNettyHttpClientMetricsEnabled() { return Boolean.parseBoolean( System.getProperty(NETTY_HTTP_CLIENT_METRICS_ENABLED, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/EndpointProbeClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/EndpointProbeClient.java new file mode 100644 index 000000000000..31febd8224cf --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/EndpointProbeClient.java @@ -0,0 +1,424 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation; + +import com.azure.cosmos.implementation.http.HttpClient; +import com.azure.cosmos.implementation.http.HttpHeaders; +import com.azure.cosmos.implementation.http.HttpRequest; +import com.azure.cosmos.implementation.http.HttpResponse; +import io.netty.handler.codec.http.HttpMethod; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.io.Closeable; +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.time.Instant; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Drives the thin-client HTTP/2 connectivity probe lifecycle. + * + *

For every thin-client regional endpoint discovered via {@code GlobalEndpointManager} + * topology refresh, this client issues a {@code POST /connectivity-probe} (path + * configurable via {@link Configs#getThinClientProbePath()}) over the thin-client HTTP/2 + * {@link HttpClient}. The probe contract (confirmed via CosmosDB PR 2107592) is strict: + *

    + *
  • HTTP 200 → region is green.
  • + *
  • Any other status (notably 503 when {@code enableConnectivityProbe} is OFF, 400 + * for wrong path, anything else) → region is red.
  • + *
  • Connection error / TLS failure / HTTP/2 negotiation failure / timeout → red.
  • + *
+ * + *

A cycle is GREEN only if every supplied regional endpoint returns 200 within the + * per-probe budget; otherwise the cycle is RED. The client applies a configurable + * consecutive-failure threshold ({@link Configs#getThinClientProbeFailureThreshold()}) + * before flipping {@link #isProxyHealthy()} from {@code true} to {@code false}; a + * single GREEN cycle resets the counter and restores health. + * + *

Routing decisions are made strictly at refresh boundaries; this class does not + * implement any per-request circuit-breaker. The data-plane routing site is expected to + * AND its existing thin-client-eligibility check with {@link #isProxyHealthy()}. + * + *

Optimistic startup: {@link #isProxyHealthy()} returns {@code true} until the first + * probe cycle completes RED enough times to cross the threshold. + * + *

This class is internal; it is not part of the published public API. + */ +public class EndpointProbeClient implements Closeable { + + private static final Logger logger = LoggerFactory.getLogger(EndpointProbeClient.class); + private static final byte[] EMPTY_BODY = new byte[0]; + + private final HttpClient httpClient; + private final int failureThreshold; + private final int recoveryThreshold; + private final Duration perProbeTimeout; + private final String probePath; + + private final AtomicBoolean proxyHealthy = new AtomicBoolean(true); + private final AtomicBoolean closed = new AtomicBoolean(false); + private final AtomicBoolean cycleInProgress = new AtomicBoolean(false); + private final AtomicLong cycleIdSeq = new AtomicLong(0); + private final AtomicInteger consecutiveFailures = new AtomicInteger(0); + private final AtomicInteger consecutiveSuccesses = new AtomicInteger(0); + // Lean diagnostic surface: only the last cycle outcome (true=GREEN/success, false=RED/failed) + // and the wall-clock instant at which it was recorded. Anything beyond this is best + // observed via logs to keep the diagnostic shape stable across releases. + private final AtomicReference lastCycleSuccess = new AtomicReference<>(null); + private final AtomicReference lastStateUpdatedAt = new AtomicReference<>(null); + + public EndpointProbeClient(HttpClient httpClient) { + this.httpClient = Objects.requireNonNull(httpClient, "httpClient"); + this.failureThreshold = Configs.getThinClientProbeFailureThreshold(); + this.recoveryThreshold = Configs.getThinClientProbeRecoveryThreshold(); + this.perProbeTimeout = Duration.ofMillis(Configs.getThinClientConnectionTimeoutInMs()); + this.probePath = Configs.getThinClientProbePath(); + } + + /** + * Runs one probe cycle against the supplied set of thin-client regional endpoints, + * updates internal health state with hysteresis, and emits the post-cycle value of + * {@link #isProxyHealthy()}. + * + *

When the feature flag {@link Configs#isThinClientProbeEnabled()} is {@code false}, + * this is a no-op that emits the current health value without issuing any HTTP traffic. + * + *

When the endpoint collection is {@code null} or empty (no thin-client regions + * discovered), the cycle is treated as a no-op and health state is left unchanged. + * + *

The returned Mono never errors; internal exceptions are absorbed and counted as a + * RED cycle so that probe failures do not propagate out and fail topology refresh. + */ + public Mono runProbeCycle(Collection regionalEndpoints) { + // All preconditions are re-evaluated at subscription time so an upstream + // cancellation (e.g. GlobalEndpointManager.close() disposing the swap-disposable) + // is honored before any HTTP I/O is initiated. + return Mono.defer(() -> { + if (this.closed.get()) { + return Mono.fromSupplier(this.proxyHealthy::get); + } + + if (!Configs.isThinClientProbeEnabled()) { + return Mono.fromSupplier(this.proxyHealthy::get); + } + + if (regionalEndpoints == null || regionalEndpoints.isEmpty()) { + // No thin-client regions in topology -> probe is moot. Leave state unchanged. + return Mono.fromSupplier(this.proxyHealthy::get); + } + + Set endpoints = new HashSet<>(regionalEndpoints); + endpoints.removeIf(Objects::isNull); + if (endpoints.isEmpty()) { + return Mono.fromSupplier(this.proxyHealthy::get); + } + + // Single-flight: if a cycle is already running, skip this trigger. Combined + // with the monotonic cycleId below, this guarantees that overlapping refresh + // calls cannot increment consecutiveFailures faster than once per *completed* + // cycle (addresses eager failover under refresh storms) and cannot let a stale + // older cycle clobber a newer one (addresses missed/flapping failover). + if (!this.cycleInProgress.compareAndSet(false, true)) { + logger.debug("Thin-client probe cycle already in progress; skipping overlapping trigger."); + return Mono.fromSupplier(this.proxyHealthy::get); + } + + final long cycleId = this.cycleIdSeq.incrementAndGet(); + final Instant cycleStart = Instant.now(); + + return Flux + .fromIterable(endpoints) + .flatMap(this::probeEndpoint) + .collectList() + .map(results -> applyCycleResult(results, endpoints, cycleStart, cycleId)) + .onErrorResume(t -> { + logger.warn( + "Thin-client probe cycle threw an unexpected error; counting as RED cycle.", t); + return Mono.just(applyCycleResult( + Collections.singletonList(new ProbeResult(null, false, "exception:" + t.getClass().getSimpleName())), + endpoints, + cycleStart, + cycleId)); + }) + .doFinally(s -> this.cycleInProgress.set(false)); + }); + } + + /** @return current proxy-health flag; {@code true} means SDK may route data plane to proxy. */ + public boolean isProxyHealthy() { + return this.proxyHealthy.get(); + } + + /** + * Forces the proxy-health flag to {@code false} without running a probe cycle. Used by + * {@code GlobalEndpointManager} as a safeguard when account topology reports thin-client + * read locations but {@code LocationCache} cannot resolve a single thin-client regional + * endpoint (e.g. name-normalization mismatch between the gateway and thin-client region + * lists). In that scenario no probe can fire, so the optimistic-startup default would + * silently bypass the safety net and pin traffic to thin-client even when the proxy is + * effectively unreachable. Calling this method flips the gate to RED and increments + * {@code consecutiveFailures} so the next genuine cycle's hysteresis behavior is honored. + * + * @param reason short human-readable reason captured in the log. + */ + public void forceUnhealthy(String reason) { + if (this.closed.get()) { + return; + } + this.consecutiveSuccesses.set(0); + int now = this.consecutiveFailures.incrementAndGet(); + this.lastCycleSuccess.set(Boolean.FALSE); + this.lastStateUpdatedAt.set(Instant.now()); + if (this.proxyHealthy.compareAndSet(true, false)) { + logger.warn( + "Thin-client probe gate flipped UNHEALTHY without an HTTP cycle (consecutiveFailures={}, reason='{}'). " + + "SDK will route data plane to Gateway V1 until {} consecutive GREEN cycle(s).", + now, reason, this.recoveryThreshold); + } + } + + /** @return read-only snapshot of probe state suitable for diagnostics. */ + public DiagnosticsSnapshot getDiagnosticsSnapshot() { + return new DiagnosticsSnapshot(this.lastCycleSuccess.get(), this.lastStateUpdatedAt.get()); + } + + /** + * Marks the probe client as closed. Subsequent {@link #runProbeCycle(Collection)} + * invocations short-circuit and issue no further HTTP/2 probes. The shared + * thin-client {@link HttpClient} is owned by {@code RxDocumentClientImpl} and is NOT + * closed here — its lifetime is bound to the {@code CosmosClient} itself. + * + *

In-flight probe Monos are not actively cancelled; they will self-terminate via + * the per-probe timeout. Their results are still applied to internal state but, since + * the host {@code GlobalEndpointManager} is also closed, no consumer will observe the + * flip. + */ + @Override + public void close() { + if (this.closed.compareAndSet(false, true)) { + logger.debug("EndpointProbeClient closed; no further thin-client probes will be issued."); + } + } + + private Mono probeEndpoint(URI regionalEndpoint) { + URI probeUri; + try { + probeUri = buildProbeUri(regionalEndpoint, this.probePath); + } catch (URISyntaxException e) { + logger.warn("Failed to build probe URI for {}: {}", regionalEndpoint, e.getMessage()); + return Mono.just(new ProbeResult(regionalEndpoint, false, "bad-uri")); + } + + HttpHeaders headers = new HttpHeaders(); + // Mirror thin-client traffic so any proxy-side routing/diagnostics treat this + // request the same way as a real data-plane request. + headers.set(HttpConstants.HttpHeaders.THINCLIENT_PROXY_OPERATION_TYPE, "ConnectivityProbe"); + + HttpRequest request = new HttpRequest( + HttpMethod.POST, + probeUri, + probeUri.getPort(), + headers); + request.withThinClientRequest(true); + request.withBody(EMPTY_BODY); + + return this.httpClient + .send(request, this.perProbeTimeout) + .flatMap(response -> { + int status = response.statusCode(); + boolean ok = status == 200; + if (!ok) { + logger.debug("Thin-client probe to {} returned status {}", regionalEndpoint, status); + } + // Drain the body within the probe Mono lifecycle so reactor-netty releases + // the underlying buffer and a slow/trickling body cannot leak resources + // outside `perProbeTimeout`. doFinally + onErrorResume guarantee that + // status-based RED/GREEN classification still wins regardless of how the + // drain stream terminates. + final ProbeResult result = new ProbeResult(regionalEndpoint, ok, "status:" + status); + return response.body() + .doOnNext(buf -> { + if (buf != null) { + buf.release(); + } + }) + .then(Mono.just(result)) + .timeout(this.perProbeTimeout) + .doFinally(s -> safeClose(response)) + .onErrorResume(drainError -> { + logger.debug("Thin-client probe body drain to {} failed: {}", + regionalEndpoint, drainError.toString()); + return Mono.just(result); + }); + }) + .onErrorResume(t -> { + logger.debug( + "Thin-client probe to {} failed: {}", regionalEndpoint, t.toString()); + return Mono.just(new ProbeResult(regionalEndpoint, false, "transport:" + t.getClass().getSimpleName())); + }); + } + + private Boolean applyCycleResult( + List results, + Set attemptedEndpoints, + Instant cycleStart, + long cycleId) { + + // If the probe client was closed (e.g. CosmosClient.close()) while this cycle was + // in flight, drop the result so we don't mutate health state on a dead client. + if (this.closed.get()) { + logger.debug( + "Thin-client probe cycle {} completed after close; dropping result.", cycleId); + return this.proxyHealthy.get(); + } + + int successCount = 0; + Set failedEndpoints = new HashSet<>(); + for (ProbeResult r : results) { + if (r.success) { + successCount++; + } else if (r.endpoint != null) { + failedEndpoints.add(r.endpoint); + } + } + // Treat any endpoint that didn't produce a ProbeResult as failed (defensive). + if (results.size() < attemptedEndpoints.size()) { + for (URI attempted : attemptedEndpoints) { + boolean covered = false; + for (ProbeResult r : results) { + if (attempted.equals(r.endpoint)) { + covered = true; + break; + } + } + if (!covered) { + failedEndpoints.add(attempted); + } + } + } + int failureCount = attemptedEndpoints.size() - successCount; + + boolean cycleGreen = (successCount == attemptedEndpoints.size()) && failedEndpoints.isEmpty(); + + this.lastCycleSuccess.set(cycleGreen); + this.lastStateUpdatedAt.set(cycleStart); + + if (cycleGreen) { + this.consecutiveFailures.set(0); + int greens = this.consecutiveSuccesses.incrementAndGet(); + if (this.proxyHealthy.get()) { + if (greens == 1) { + // already healthy; suppress noisy log + return this.proxyHealthy.get(); + } + logger.debug( + "Thin-client probe cycle GREEN ({} endpoints). Consecutive GREEN={}; proxy remains healthy.", + successCount, greens); + } else if (greens >= this.recoveryThreshold) { + if (this.proxyHealthy.compareAndSet(false, true)) { + logger.info( + "Thin-client probe cycle GREEN ({} endpoints). Consecutive GREEN={} (recovery threshold={}); " + + "proxy marked HEALTHY; SDK will resume routing data plane to thin-client.", + successCount, greens, this.recoveryThreshold); + } + } else { + logger.info( + "Thin-client probe cycle GREEN ({} endpoints). Consecutive GREEN={} (recovery threshold={}); " + + "proxy remains UNHEALTHY pending further GREEN cycles.", + successCount, greens, this.recoveryThreshold); + } + } else { + this.consecutiveSuccesses.set(0); + int now = this.consecutiveFailures.incrementAndGet(); + if (now >= this.failureThreshold) { + if (this.proxyHealthy.compareAndSet(true, false)) { + logger.warn( + "Thin-client probe cycle RED ({} succeeded / {} failed) for {} consecutive cycles (threshold={}). " + + "Marking proxy UNHEALTHY; SDK will route data plane to Gateway V1 until {} consecutive GREEN cycle(s). " + + "Failed endpoints: {}", + successCount, failureCount, now, this.failureThreshold, this.recoveryThreshold, failedEndpoints); + } else { + logger.warn( + "Thin-client probe cycle RED ({} succeeded / {} failed); consecutive failures={} (threshold={}); proxy remains UNHEALTHY. Failed endpoints: {}", + successCount, failureCount, now, this.failureThreshold, failedEndpoints); + } + } else { + logger.info( + "Thin-client probe cycle RED ({} succeeded / {} failed); consecutive failures={} (threshold={}); proxy currently healthy={}. Failed endpoints: {}", + successCount, failureCount, now, this.failureThreshold, this.proxyHealthy.get(), failedEndpoints); + } + } + + return this.proxyHealthy.get(); + } + + private static URI buildProbeUri(URI regionalEndpoint, String probePath) throws URISyntaxException { + String normalizedPath = probePath.startsWith("/") ? probePath : "/" + probePath; + return new URI( + regionalEndpoint.getScheme(), + null, + regionalEndpoint.getHost(), + regionalEndpoint.getPort(), + normalizedPath, + null, + null); + } + + private static void safeClose(HttpResponse response) { + try { + response.close(); + } catch (Exception ignored) { + // best-effort + } + } + + private static final class ProbeResult { + final URI endpoint; + final boolean success; + @SuppressWarnings("unused") + final String reason; + + ProbeResult(URI endpoint, boolean success, String reason) { + this.endpoint = endpoint; + this.success = success; + this.reason = reason; + } + } + + /** + * Immutable, intentionally-lean snapshot of probe state for client diagnostics. + * Exposes only the outcome of the most recent probe cycle (or {@code null} if no + * cycle has run yet) and the wall-clock time at which that state was last updated. + * Richer details (per-endpoint failures, hysteresis counters, thresholds) are + * intentionally not surfaced here to keep the diagnostic shape stable across + * releases — consult the logs for those details. + */ + public static final class DiagnosticsSnapshot { + private final Boolean lastCycleSuccess; + private final Instant lastStateUpdatedAt; + + DiagnosticsSnapshot(Boolean lastCycleSuccess, Instant lastStateUpdatedAt) { + this.lastCycleSuccess = lastCycleSuccess; + this.lastStateUpdatedAt = lastStateUpdatedAt; + } + + /** @return {@code true} if the most recent cycle was GREEN, {@code false} if RED, {@code null} if no cycle has completed yet. */ + public Boolean getLastCycleSuccess() { return lastCycleSuccess; } + + /** @return wall-clock instant at which {@link #getLastCycleSuccess()} was last updated, or {@code null} if never. */ + public Instant getLastStateUpdatedAt() { return lastStateUpdatedAt; } + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index e313b5219713..a7176fd159e9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -4,6 +4,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.http.HttpClient; import com.azure.cosmos.implementation.routing.LocationCache; import com.azure.cosmos.implementation.routing.LocationHelper; import com.azure.cosmos.implementation.routing.RegionalRoutingContext; @@ -20,6 +21,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Set; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -48,6 +50,7 @@ public class GlobalEndpointManager implements AutoCloseable { private volatile DatabaseAccount latestDatabaseAccount; private final AtomicBoolean hasThinClientReadLocations = new AtomicBoolean(false); private final AtomicBoolean lastRecordedPerPartitionAutomaticFailoverEnabledOnClient = new AtomicBoolean(false); + private final AtomicReference thinClientProbeClient = new AtomicReference<>(null); private final ReentrantReadWriteLock.WriteLock databaseAccountWriteLock; @@ -196,6 +199,18 @@ public void close() { if (disposable != null && !disposable.isDisposed()) { disposable.dispose(); } + // Stop accepting new thin-client probe cycles. The shared HttpClient is owned by + // RxDocumentClientImpl and is closed there. In-flight probe cycles are chained into + // the topology-refresh reactor pipeline, so cancellation propagates through the + // outer subscription disposed above. + EndpointProbeClient probeClient = this.thinClientProbeClient.getAndSet(null); + if (probeClient != null) { + try { + probeClient.close(); + } catch (Throwable t) { + logger.debug("Ignoring error while closing thin-client probe client.", t); + } + } logger.debug("GlobalEndpointManager closed."); } @@ -209,7 +224,7 @@ public Mono refreshLocationAsync(DatabaseAccount databaseAccount, boolean new ArrayList<>(this.getEffectivePreferredRegions()), this::getDatabaseAccountAsync); - return databaseAccountObs.map(dbAccount -> { + return databaseAccountObs.flatMap(dbAccount -> { this.databaseAccountWriteLock.lock(); try { @@ -218,9 +233,7 @@ public Mono refreshLocationAsync(DatabaseAccount databaseAccount, boolean this.databaseAccountWriteLock.unlock(); } - return dbAccount; - }).flatMap(dbAccount -> { - return Mono.empty(); + return this.runThinClientProbeCycleMono(); }); } @@ -254,6 +267,7 @@ private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) return Mono.defer(() -> { logger.debug("refreshLocationPrivateAsync() refreshing locations"); + Mono probePrefix = Mono.empty(); if (databaseAccount != null) { this.databaseAccountWriteLock.lock(); @@ -262,63 +276,67 @@ private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) } finally { this.databaseAccountWriteLock.unlock(); } + + probePrefix = this.runThinClientProbeCycleMono(); } - Utils.ValueHolder canRefreshInBackground = new Utils.ValueHolder<>(); - if (this.locationCache.shouldRefreshEndpoints(canRefreshInBackground)) { - logger.debug("shouldRefreshEndpoints: true"); + return probePrefix.then(Mono.defer(() -> { + Utils.ValueHolder canRefreshInBackground = new Utils.ValueHolder<>(); + if (this.locationCache.shouldRefreshEndpoints(canRefreshInBackground)) { + logger.debug("shouldRefreshEndpoints: true"); - if (databaseAccount == null && !canRefreshInBackground.v) { - logger.debug("shouldRefreshEndpoints: can't be done in background"); + if (databaseAccount == null && !canRefreshInBackground.v) { + logger.debug("shouldRefreshEndpoints: can't be done in background"); - Mono databaseAccountObs = getDatabaseAccountFromAnyLocationsAsync( - this.defaultEndpoint, - new ArrayList<>(this.getEffectivePreferredRegions()), - this::getDatabaseAccountAsync); + Mono databaseAccountObs = getDatabaseAccountFromAnyLocationsAsync( + this.defaultEndpoint, + new ArrayList<>(this.getEffectivePreferredRegions()), + this::getDatabaseAccountAsync); - return databaseAccountObs.map(dbAccount -> { - this.databaseAccountWriteLock.lock(); + return databaseAccountObs.flatMap(dbAccount -> { + this.databaseAccountWriteLock.lock(); - try { - this.locationCache.onDatabaseAccountRead(dbAccount); - } finally { - this.databaseAccountWriteLock.unlock(); - } + try { + this.locationCache.onDatabaseAccountRead(dbAccount); + } finally { + this.databaseAccountWriteLock.unlock(); + } - this.isRefreshing.set(false); - return dbAccount; - }).flatMap(dbAccount -> { - // trigger a startRefreshLocationTimerAsync don't wait on it. - if (!this.refreshInBackground.get()) { - this.startRefreshLocationTimerAsync(); - } - return Mono.empty(); - }); - } + this.isRefreshing.set(false); + return this.runThinClientProbeCycleMono(); + }).then(Mono.defer(() -> { + // trigger a startRefreshLocationTimerAsync don't wait on it. + if (!this.refreshInBackground.get()) { + this.startRefreshLocationTimerAsync(); + } + return Mono.empty(); + })); + } - // trigger a startRefreshLocationTimerAsync don't wait on it. - if (!this.refreshInBackground.get()) { - this.startRefreshLocationTimerAsync(); - } + // trigger a startRefreshLocationTimerAsync don't wait on it. + if (!this.refreshInBackground.get()) { + this.startRefreshLocationTimerAsync(); + } - this.isRefreshing.set(false); - return Mono.empty(); - } else { - logger.debug("shouldRefreshEndpoints: false, nothing to do."); - - // Even when no endpoint refresh is needed right now, we must keep the - // background refresh timer running so that future database account - // topology changes are detected — e.g., multi-write <-> single-write - // transitions, failover priority changes, region add/remove. - // This aligns with the .NET SDK behavior where the background loop - // continues unconditionally as long as the client is alive. - if (!this.refreshInBackground.get()) { - this.startRefreshLocationTimerAsync(); - } + this.isRefreshing.set(false); + return Mono.empty(); + } else { + logger.debug("shouldRefreshEndpoints: false, nothing to do."); + + // Even when no endpoint refresh is needed right now, we must keep the + // background refresh timer running so that future database account + // topology changes are detected — e.g., multi-write <-> single-write + // transitions, failover priority changes, region add/remove. + // This aligns with the .NET SDK behavior where the background loop + // continues unconditionally as long as the client is alive. + if (!this.refreshInBackground.get()) { + this.startRefreshLocationTimerAsync(); + } - this.isRefreshing.set(false); - return Mono.empty(); - } + this.isRefreshing.set(false); + return Mono.empty(); + } + })); }); } @@ -383,6 +401,93 @@ public boolean hasThinClientReadLocations() { return this.hasThinClientReadLocations.get(); } + /** + * Wires the thin-client HTTP/2 {@link HttpClient} used by the connectivity-probe + * probeClient. Must be invoked by the client bootstrap before {@link #init()} so + * that the very first topology refresh can issue probes. + * + *

If {@link Configs#isThinClientProbeEnabled()} is {@code false}, the probeClient + * is still instantiated but {@link EndpointProbeClient#runProbeCycle(Collection)} + * short-circuits to a no-op and {@link EndpointProbeClient#isProxyHealthy()} stays + * optimistically {@code true}, preserving today's behavior. + */ + public void setThinClientHttpClient(HttpClient httpClient) { + if (httpClient == null) { + return; + } + try { + this.thinClientProbeClient.compareAndSet(null, new EndpointProbeClient(httpClient)); + } catch (Throwable t) { + // Probe wiring must never trip CosmosClient initialization. If the probe client + // can't be constructed for any reason, leave it null — `isProxyProbeHealthy()` + // then returns true (optimistic) and routing behaves as if no probe were wired. + logger.warn("Failed to wire thin-client connectivity-probe client; thin-client routing will proceed without probe gating.", t); + } + } + + /** + * Returns {@code true} when the thin-client connectivity-probe client considers + * the proxy fleet healthy enough to receive data-plane traffic. Returns {@code true} + * by default (optimistic) when no probe client has been wired (e.g. tests, or + * non-thin-client clients) so existing routing decisions are unaffected. + */ + public boolean isProxyProbeHealthy() { + EndpointProbeClient probeClient = this.thinClientProbeClient.get(); + return probeClient == null || probeClient.isProxyHealthy(); + } + + /** + * @return a read-only diagnostics snapshot of the probe state, or {@code null} when + * no probe client has been wired. + */ + public EndpointProbeClient.DiagnosticsSnapshot getThinClientProbeDiagnostics() { + EndpointProbeClient probeClient = this.thinClientProbeClient.get(); + return probeClient == null ? null : probeClient.getDiagnosticsSnapshot(); + } + + private Mono runThinClientProbeCycleMono() { + return Mono.defer(() -> { + EndpointProbeClient probeClient = this.thinClientProbeClient.get(); + if (probeClient == null) { + return Mono.empty(); + } + if (!this.hasThinClientReadLocations.get()) { + return Mono.empty(); + } + Set endpoints = this.locationCache.getThinClientRegionalEndpoints(); + if (endpoints.isEmpty()) { + // Safeguard against eligibility/resolution disagreement: hasThinClientReadLocations is + // derived from the raw account-topology response, while getThinClientRegionalEndpoints + // is derived from the resolved LocationCache contexts (which can drop endpoints when + // gateway and thin-client region names fail to normalize-match). Without this branch + // the routing gate (`useThinClientStoreModel`) would still pass `hasThinClientReadLocations` + // and our optimistic `proxyHealthy=true` default — and pin data-plane traffic to a + // thin-client model that has no resolved endpoint to route to. Flip the probe gate + // to RED so the SDK falls back to Gateway V1 until the resolution mismatch clears. + probeClient.forceUnhealthy("hasThinClientReadLocations=true but resolved endpoint set is empty"); + return Mono.empty(); + } + // Chained into the topology-refresh reactor pipeline. Cancellation propagates + // through the outer subscription (disposed in close() via backgroundRefreshDisposable). + // The probe client's internal single-flight CAS guarantees only one cycle runs at + // a time; runProbeCycle absorbs all per-probe errors and never errors the Mono. + return probeClient + .runProbeCycle(endpoints) + .subscribeOn(CosmosSchedulers.GLOBAL_ENDPOINT_MANAGER_BOUNDED_ELASTIC) + .doOnNext(healthy -> { + if (logger.isDebugEnabled()) { + logger.debug("Thin-client probe cycle completed; proxyHealthy={}", healthy); + } + }) + .then(); + }).onErrorResume(t -> { + // Defensive: probe issues must never bubble out and fail topology refresh or + // CosmosClient init. Log and move on — the gate stays at its current state. + logger.warn("Thin-client probe cycle threw; ignoring to protect topology refresh.", t); + return Mono.empty(); + }); + } + private Mono getDatabaseAccountAsync(URI serviceEndpoint) { return this.owner.getDatabaseAccountFromEndpoint(serviceEndpoint) .doOnNext(databaseAccount -> { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index d4f99c183c24..fc5e59189e94 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -882,6 +882,26 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.reactorHttpClient, this.additionalHeaders); + // Wire thin-client HttpClient into GEM so the connectivity-probe orchestrator + // can fan out probes after every topology refresh. Must happen BEFORE + // globalEndpointManager.init() so the first refresh probes immediately. + // Caveats — probe is only wired when: + // 1. Connection mode is GATEWAY (skip for Direct mode), AND + // 2. HTTP/2 is configured and effectively enabled, AND + // 3. Thin-client is enabled via Configs. + // All three are encoded in `this.useThinClient`. If false, no probe overhead + // is incurred and `isProxyProbeHealthy()` returns true by default (no-op gate). + // Wiring itself is guarded inside GEM so any failure cannot trip client init. + if (this.useThinClient) { + try { + this.globalEndpointManager.setThinClientHttpClient(this.reactorHttpClient); + } catch (Throwable t) { + // Defense in depth: GEM already swallows wiring failures, but if anything + // does escape we must not fail CosmosClient construction over a probe. + logger.warn("Failed to wire thin-client connectivity-probe HttpClient; continuing without probe gating.", t); + } + } + this.perPartitionFailoverConfigModifier = (databaseAccount -> { this.initializePerPartitionFailover(databaseAccount); @@ -5602,6 +5622,11 @@ public GlobalEndpointManager getGlobalEndpointManager() { public GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { return RxDocumentClientImpl.this.globalPartitionEndpointManagerForPerPartitionCircuitBreaker; } + + @Override + public boolean useThinClient(RxDocumentServiceRequest request) { + return RxDocumentClientImpl.this.useThinClientStoreModel(request); + } }; } @@ -7388,7 +7413,8 @@ private RxStoreModel getStoreProxy(RxDocumentServiceRequest request) { resourceType == ResourceType.ClientEncryptionKey || resourceType.isScript() && operationType != OperationType.ExecuteJavaScript || resourceType == ResourceType.PartitionKeyRange || - resourceType == ResourceType.PartitionKey && operationType == OperationType.Delete) { + resourceType == ResourceType.PartitionKey && operationType == OperationType.Delete || + operationType == OperationType.QueryPlan) { return this.gatewayProxy; } @@ -7426,7 +7452,7 @@ private RxStoreModel getStoreProxy(RxDocumentServiceRequest request) { if ((operationType == OperationType.Query || operationType == OperationType.SqlQuery || operationType == OperationType.ReadFeed) && - Utils.isCollectionChild(request.getResourceType())) { + Utils.isCollectionChild(request.getResourceType())) { // Go to gateway only when partition key range and partition key are not set. This should be very rare if (request.getPartitionKeyRangeIdentity() == null && request.getHeaders().get(HttpConstants.HttpHeaders.PARTITION_KEY) == null) { @@ -8981,9 +9007,30 @@ public boolean useThinClient() { } private boolean useThinClientStoreModel(RxDocumentServiceRequest request) { + return shouldUseThinClientStoreModel( + this.useThinClient, + this.globalEndpointManager.hasThinClientReadLocations(), + this.globalEndpointManager.isProxyProbeHealthy(), + request); + } + + /** + * Pure-function gate predicate exposed at package visibility so unit tests can verify + * routing-fallback behavior (probe-unhealthy => gateway V1) without standing up a full + * {@link RxDocumentClientImpl}. All four inputs must be supplied by the caller; this + * method does not touch any client state. + */ + static boolean shouldUseThinClientStoreModel( + boolean useThinClient, + boolean hasThinClientReadLocations, + boolean isProxyProbeHealthy, + RxDocumentServiceRequest request) { + if (!useThinClient - || !this.globalEndpointManager.hasThinClientReadLocations() - || request.getResourceType() != ResourceType.Document) { + || !hasThinClientReadLocations + || !isProxyProbeHealthy + || (request.getResourceType() != ResourceType.Document + && !request.isExecuteStoredProcedureBasedRequest())) { return false; } @@ -8993,7 +9040,10 @@ private boolean useThinClientStoreModel(RxDocumentServiceRequest request) { return operationType.isPointOperation() || operationType == OperationType.Query || operationType == OperationType.Batch - || request.isChangeFeedRequest() && !request.isAllVersionsAndDeletesChangeFeedMode(); + || (request.isChangeFeedRequest() + && !request.isAllVersionsAndDeletesChangeFeedMode()) + || request.isExecuteStoredProcedureBasedRequest() + || operationType == OperationType.QueryPlan; } private DocumentClientRetryPolicy getRetryPolicyForPointOperation( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index 03f27156f62f..1e2958040978 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -968,6 +968,10 @@ public boolean isChangeFeedRequest() { return this.headers.containsKey(HttpConstants.HttpHeaders.A_IM); } + public boolean isExecuteStoredProcedureBasedRequest() { + return this.resourceType == ResourceType.StoredProcedure && this.operationType == OperationType.ExecuteJavaScript; + } + public boolean isAllVersionsAndDeletesChangeFeedMode() { String aImHeader = this.headers.get(HttpConstants.HttpHeaders.A_IM); return this.headers.containsKey(HttpConstants.HttpHeaders.A_IM) && HttpConstants.A_IMHeaderValues.FULL_FIDELITY_FEED.equals(aImHeader); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java index 3a849b4c5baa..0f9cc8ae18cf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java @@ -252,10 +252,13 @@ public HttpRequest wrapInHttpRequest(RxDocumentServiceRequest request, URI reque byte[] epk = partitionKey.getEffectivePartitionKeyBytes(request.getPartitionKeyInternal(), request.getPartitionKeyDefinition()); rntbdRequest.setHeaderValue(RntbdConstants.RntbdRequestHeader.EffectivePartitionKey, epk); } else if (request.requestContext.resolvedPartitionKeyRange == null) { - throw new IllegalStateException( - "Resolved partition key range should not be null at this point. ResourceType: " - + request.getResourceType() + ", OperationType: " - + request.getOperationType()); + + if (request.getOperationType() != OperationType.QueryPlan) { + throw new IllegalStateException( + "Resolved partition key range should not be null at this point. ResourceType: " + + request.getResourceType() + ", OperationType: " + + request.getOperationType()); + } } else { PartitionKeyRange pkRange = request.requestContext.resolvedPartitionKeyRange; rntbdRequest.setHeaderValue(RntbdConstants.RntbdRequestHeader.StartEpkHash, HexConvert.hexToBytes(pkRange.getMinInclusive())); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdConstants.java index 15b4b50df559..4313be8e8293 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdConstants.java @@ -283,7 +283,8 @@ public enum RntbdOperationType { PreReplaceValidation((short) 0x0020, OperationType.PreReplaceValidation), AddComputeGatewayRequestCharges((short) 0x0021, OperationType.AddComputeGatewayRequestCharges), MigratePartition((short) 0x0022, OperationType.MigratePartition), - Batch((short) 0x0025, OperationType.Batch); + Batch((short) 0x0025, OperationType.Batch), + QueryPlan((short) 0x0042, OperationType.QueryPlan); private final short id; private final OperationType type; @@ -367,6 +368,8 @@ public static RntbdOperationType fromId(final short id) { return RntbdOperationType.MigratePartition; case 0x0025: return RntbdOperationType.Batch; + case 0x0042: + return RntbdOperationType.QueryPlan; default: throw new DecoderException(String.format("expected byte value matching %s value, not %s", RntbdOperationType.class.getSimpleName(), @@ -444,6 +447,8 @@ public static RntbdOperationType fromType(OperationType type) { return RntbdOperationType.AddComputeGatewayRequestCharges; case Batch: return RntbdOperationType.Batch; + case QueryPlan: + return RntbdOperationType.QueryPlan; default: throw new IllegalArgumentException(String.format("unrecognized operation type: %s", type)); } @@ -619,7 +624,10 @@ public enum RntbdRequestHeader implements RntbdHeader { ThroughputBucket((short)0x00DB, RntbdTokenType.Byte, false), WorkloadId((short)0x00DC, RntbdTokenType.Byte, false), HubRegionProcessingOnly((short)0x00EF, RntbdTokenType.Byte , false), - ReadConsistencyStrategy((short)0x00FE, RntbdTokenType.Byte, false); + ReadConsistencyStrategy((short)0x00FE, RntbdTokenType.Byte, false), + // QueryPlan headers for proxy — IDs match server-side RntbdConstants.cs (ADO PR 1982503) + SupportedQueryFeatures((short) 0x00FF, RntbdTokenType.String, false), + QueryVersion((short) 0x0100, RntbdTokenType.SmallString, false); public static final List thinClientHeadersInOrderList = Arrays.asList( EffectivePartitionKey, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestFrame.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestFrame.java index 8eb632c48c8c..294b7399d6f5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestFrame.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestFrame.java @@ -204,6 +204,8 @@ private static RntbdOperationType map(final OperationType operationType) { return RntbdOperationType.AddComputeGatewayRequestCharges; case Batch: return RntbdOperationType.Batch; + case QueryPlan: + return RntbdOperationType.QueryPlan; default: final String reason = String.format("Unrecognized operation type: %s", operationType); throw new UnsupportedOperationException(reason); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestHeaders.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestHeaders.java index 38de8021c44f..6652d3d94cc8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestHeaders.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestHeaders.java @@ -195,6 +195,11 @@ private static ImplementationBridgeHelpers.PriorityLevelHelper.PriorityLevelAcce // and BE will respect the per-request value. this.fillTokenFromHeader(headers, this::getClientVersion, HttpHeaders.VERSION); + + // QueryPlan headers — needed for proxy to extract supported features and query version + // from the RNTBD body (IDs match server-side proxy: ADO PR 1982503) + this.fillTokenFromHeader(headers, this::getSupportedQueryFeatures, HttpHeaders.SUPPORTED_QUERY_FEATURES); + this.fillTokenFromHeader(headers, this::getQueryVersion, HttpHeaders.QUERY_VERSION); } private RntbdRequestHeaders(ByteBuf in) { @@ -653,6 +658,14 @@ private RntbdToken getChangeFeedWireFormatVersion() { return this.get(RntbdRequestHeader.ChangeFeedWireFormatVersion); } + private RntbdToken getSupportedQueryFeatures() { + return this.get(RntbdRequestHeader.SupportedQueryFeatures); + } + + private RntbdToken getQueryVersion() { + return this.get(RntbdRequestHeader.QueryVersion); + } + private void addAimHeader(final Map headers) { final String value = headers.get(HttpHeaders.A_IM); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java index 11528ed4f7ea..5ac697dad68b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java @@ -127,6 +127,7 @@ private static Mono getPartitionKeyRangesAn query, resourceLink, cosmosQueryRequestOptions, + collection, queryPlanCachingEnabled, queryPlanCache) .flatMap( @@ -145,6 +146,7 @@ private static Mono fetchQueryPlan( SqlQuerySpec query, String resourceLink, CosmosQueryRequestOptions cosmosQueryRequestOptions, + DocumentCollection collection, boolean queryPlanCachingEnabled, Map queryPlanCache) { @@ -163,7 +165,8 @@ private static Mono fetchQueryPlan( client, query, resourceLink, - cosmosQueryRequestOptions) + cosmosQueryRequestOptions, + collection) .doOnNext(partitionedQueryExecutionInfo -> { if (queryPlanCachingEnabled && isScopedToSinglePartition(cosmosQueryRequestOptions)) { tryCacheQueryPlan(query, partitionedQueryExecutionInfo, queryPlanCache); @@ -355,6 +358,7 @@ public static Mono fetchQueryPlanForValidation( sqlQuerySpec, resourceLink, queryRequestOptions, + null, queryPlanCachingEnabled, queryPlanCache); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/HybridSearchDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/HybridSearchDocumentQueryExecutionContext.java index 8dd42f5aa32c..830a37a2dda4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/HybridSearchDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/HybridSearchDocumentQueryExecutionContext.java @@ -4,6 +4,7 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.BridgeInternal; +import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.CosmosException; import com.azure.cosmos.implementation.ClientSideRequestStatistics; import com.azure.cosmos.implementation.DiagnosticsClientContext; @@ -173,6 +174,7 @@ private void initialize( aggregatedGlobalStatistics = Flux.fromIterable(globalStatsProducers) .flatMap(producer -> producer.produceAsync() .map(documentProducerFeedResponse -> { + this.captureClientSideRequestStatistics(documentProducerFeedResponse.pageResult); List results = documentProducerFeedResponse.pageResult.getResults(); return new GlobalFullTextSearchQueryStatistics(results.get(0)); })) @@ -443,10 +445,27 @@ private Flux getComponentQueryResults(List targetFee return Flux.fromIterable(componentProducers) .flatMap(DocumentProducer::produceAsync) + .doOnNext(response -> this.captureClientSideRequestStatistics(response.pageResult)) .flatMap(response -> Flux.fromIterable(response.pageResult.getResults())); }); } + private void captureClientSideRequestStatistics(FeedResponse response) { + if (response == null || response.getCosmosDiagnostics() == null) { + return; + } + + CosmosDiagnostics diagnostics = response.getCosmosDiagnostics(); + Collection requestStatistics = + diagAccessor().getFeedResponseDiagnostics(diagnostics) != null + ? diagAccessor().getClientSideRequestStatisticsForQueryPipelineAggregations(diagnostics) + : diagAccessor().getClientSideRequestStatistics(diagnostics); + + if (requestStatistics != null && !requestStatistics.isEmpty()) { + this.clientSideRequestStatistics.addAll(requestStatistics); + } + } + private Flux retrieveRewrittenQueryInfos(List componentQueryInfos) { return aggregatedGlobalStatistics.hasElement().flatMapMany(globalStatistics -> { if (globalStatistics != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index 91679d40490e..bfc2293f6aeb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -110,4 +110,6 @@ Mono addPartitionLevelUnavailableRegionsOnRequest( GlobalEndpointManager getGlobalEndpointManager(); GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); + + boolean useThinClient(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PartitionedQueryExecutionInfo.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PartitionedQueryExecutionInfo.java index 4967d2b1e6be..41b33ae2846b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PartitionedQueryExecutionInfo.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PartitionedQueryExecutionInfo.java @@ -5,10 +5,11 @@ import com.azure.cosmos.implementation.RequestTimeline; import com.azure.cosmos.implementation.query.hybridsearch.HybridSearchQueryInfo; +import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.implementation.JsonSerializable; -import com.azure.cosmos.implementation.Constants; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.azure.cosmos.models.PartitionKeyDefinition; import java.util.List; @@ -25,23 +26,32 @@ public final class PartitionedQueryExecutionInfo extends JsonSerializable { private RequestTimeline queryPlanRequestTimeline; private HybridSearchQueryInfo hybridSearchQueryInfo; - PartitionedQueryExecutionInfo(QueryInfo queryInfo, List> queryRanges) { - this.queryInfo = queryInfo; - this.queryRanges = queryRanges; - - this.set( - PartitionedQueryExecutionInfoInternal.PARTITIONED_QUERY_EXECUTION_INFO_VERSION_PROPERTY, - Constants.PartitionedQueryExecutionInfo.VERSION_1 - ); - } - + /** + * Constructs with EPK hex string format expected for queryRanges. + */ public PartitionedQueryExecutionInfo(ObjectNode content, RequestTimeline queryPlanRequestTimeline) { super(content); this.queryPlanRequestTimeline = queryPlanRequestTimeline; } - public PartitionedQueryExecutionInfo(String jsonString) { - super(jsonString); + /** + * Constructs with PartitionKeyInternal array format expected for queryRanges. + * The {@code partitionKeyDefinition} is required for converting PartitionKeyInternal + * values to EPK hex strings. + * + * @param partitionKeyDefinition the container's partition key definition, must not be null. + */ + PartitionedQueryExecutionInfo(ObjectNode content, RequestTimeline queryPlanRequestTimeline, PartitionKeyDefinition partitionKeyDefinition) { + super(content); + if (partitionKeyDefinition == null) { + throw new IllegalArgumentException("partitionKeyDefinition must not be null"); + } + this.queryPlanRequestTimeline = queryPlanRequestTimeline; + this.queryRanges = PartitionKeyInternalHelper.convertToSortedEpkRanges( + PartitionedQueryExecutionInfoInternal.QUERY_RANGES_PROPERTY, + content, + partitionKeyDefinition); + this.getPropertyBag().remove(PartitionedQueryExecutionInfoInternal.QUERY_RANGES_PROPERTY); } public int getVersion() { @@ -54,10 +64,26 @@ public QueryInfo getQueryInfo() { PartitionedQueryExecutionInfoInternal.QUERY_INFO_PROPERTY, QueryInfo.class)); } + /** + * Returns the query ranges as sorted EPK hex string ranges. + *

+ * Two formats exist: + *

    + *
  • Gateway V1: {@code min}/{@code max} are EPK hex strings and are + * deserialized directly via {@code getList()}.
  • + *
  • Thin client proxy: {@code min}/{@code max} are PartitionKeyInternal + * JSON arrays and are converted to EPK hex by the thin-client constructor.
  • + *
+ */ public List> getQueryRanges() { - return this.queryRanges != null ? this.queryRanges - : (this.queryRanges = super.getList( - PartitionedQueryExecutionInfoInternal.QUERY_RANGES_PROPERTY, QUERY_RANGES_CLASS)); + if (this.queryRanges != null) { + return this.queryRanges; + } + + // EPK hex string format — direct deserialization + this.queryRanges = super.getList( + PartitionedQueryExecutionInfoInternal.QUERY_RANGES_PROPERTY, QUERY_RANGES_CLASS); + return this.queryRanges; } public RequestTimeline getQueryPlanRequestTimeline() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryInfo.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryInfo.java index b8ea415ebf2f..e65101b02d51 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryInfo.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryInfo.java @@ -16,6 +16,7 @@ import java.time.Duration; import java.time.Instant; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -166,10 +167,23 @@ public boolean hasNonStreamingOrderBy() { return this.nonStreamingOrderBy; } - public Map getGroupByAliasToAggregateType(){ - Map groupByAliasToAggregateMap; - groupByAliasToAggregateMap = super.getMap("groupByAliasToAggregateType"); - return groupByAliasToAggregateMap; + public Map getGroupByAliasToAggregateType() { + Map rawMap = super.getMap("groupByAliasToAggregateType"); + if (rawMap == null) { + return null; + } + + Map groupByAliasToAggregateMap = new HashMap<>(rawMap.size()); + rawMap.forEach((key, value) -> { + if (value == null || (value instanceof String && ((String) value).isEmpty())) { + groupByAliasToAggregateMap.put(key, null); + } else if (value instanceof AggregateOperator) { + groupByAliasToAggregateMap.put(key, (AggregateOperator) value); + } else { + groupByAliasToAggregateMap.put(key, AggregateOperator.valueOf(String.valueOf(value))); + } + }); + return groupByAliasToAggregateMap; } public List getGroupByAliases() { @@ -267,4 +281,3 @@ public int hashCode() { return super.hashCode(); } } - diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java index 591b601eb7c4..308dcefb1c3b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java @@ -8,13 +8,18 @@ import com.azure.cosmos.CosmosException; import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.DiagnosticsClientContext; +import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.PathsHelper; +import com.azure.cosmos.implementation.RequestTimeline; import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.routing.PartitionKeyInternal; +import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; +import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.models.CosmosQueryRequestOptions; import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.SqlParameter; import com.azure.cosmos.models.SqlQuerySpec; import com.azure.cosmos.implementation.BackoffRetryUtility; @@ -50,6 +55,9 @@ private static ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.Cosmo // new NonStreamingOrderBy query feature the client might run into some issue of not being able to recognize this, // and throw a 400 exception. If the environment variable `AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY` is set to // True to opt out of this new query feature, we will return the OLD query features to operate correctly. + // TODO: Add ListAndSetAggregate after Java supports MAKELIST/MAKESET query aggregation. + // TODO: Add HybridSearchSkipOrderByRewrite after the Java hybrid query pipeline can consume the optimized plan. + // See PR #47759 review. private static final String SUPPORTED_QUERY_FEATURES = QueryFeature.Aggregate.name() + ", " + QueryFeature.CompositeAggregate.name() + ", " + QueryFeature.MultipleOrderBy.name() + ", " + @@ -63,6 +71,7 @@ private static ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.Cosmo QueryFeature.NonValueAggregate.name() + ", " + QueryFeature.NonStreamingOrderBy.name() + ", " + QueryFeature.HybridSearch.name() + ", " + + QueryFeature.CountIf.name() + ", " + QueryFeature.WeightedRankFusion.name(); private static final String OLD_SUPPORTED_QUERY_FEATURES = QueryFeature.Aggregate.name() + ", " + @@ -81,7 +90,8 @@ static Mono getQueryPlanThroughGatewayAsync(Diagn IDocumentQueryClient queryClient, SqlQuerySpec sqlQuerySpec, String resourceLink, - CosmosQueryRequestOptions initialQueryRequestOptions) { + CosmosQueryRequestOptions initialQueryRequestOptions, + DocumentCollection collection) { CosmosQueryRequestOptions nonNullRequestOptions = initialQueryRequestOptions != null ? initialQueryRequestOptions @@ -89,6 +99,8 @@ static Mono getQueryPlanThroughGatewayAsync(Diagn PartitionKey partitionKey = nonNullRequestOptions.getPartitionKey(); + PartitionKeyDefinition partitionKeyDefinition = collection != null ? collection.getPartitionKey() : null; + final Map requestHeaders = new HashMap<>(); requestHeaders.put(HttpConstants.HttpHeaders.CONTENT_TYPE, RuntimeConstants.MediaTypes.JSON); requestHeaders.put(HttpConstants.HttpHeaders.IS_QUERY_PLAN_REQUEST, TRUE); @@ -106,7 +118,10 @@ static Mono getQueryPlanThroughGatewayAsync(Diagn ResourceType.Document, resourceLink, requestHeaders); - queryPlanRequest.useGatewayMode = true; + // Validation callers do not have collection metadata, so keep those query-plan + // requests on Gateway V1. Normal query execution can route through thin client, + // where PartitionKeyDefinition is available to convert proxy query ranges. + queryPlanRequest.useGatewayMode = partitionKeyDefinition == null; // Create a defensive copy to prevent concurrent modification of the shared // SqlQuerySpec's internal ObjectNode when multiple threads retrieve the query @@ -138,10 +153,29 @@ static Mono getQueryPlanThroughGatewayAsync(Diagn return BackoffRetryUtility.executeRetry(() -> { retryPolicyInstance.onBeforeSendRequest(req); return queryClient.executeQueryAsync(req).flatMap(rxDocumentServiceResponse -> { - PartitionedQueryExecutionInfo partitionedQueryExecutionInfo = - new PartitionedQueryExecutionInfo( - (ObjectNode) rxDocumentServiceResponse.getResponseBody(), - rxDocumentServiceResponse.getGatewayHttpRequestTimeline()); + ObjectNode responseBody = (ObjectNode) rxDocumentServiceResponse.getResponseBody(); + RequestTimeline timeline = rxDocumentServiceResponse.getGatewayHttpRequestTimeline(); + + PartitionedQueryExecutionInfo partitionedQueryExecutionInfo; + + // In thin client mode, the proxy returns queryRanges in PartitionKeyInternal + // format (e.g., {"min": ["value"], "max": ["Infinity"]}). Convert to sorted + // List> with EPK hex strings and pass directly to the DTO — + // avoiding a redundant JSON round-trip. + if (req.useThinClientMode && partitionKeyDefinition == null) { + throw new IllegalStateException( + "PartitionKeyDefinition must not be null in thin client mode. " + + "Ensure DocumentCollection is resolved before calling getQueryPlanThroughGatewayAsync."); + } + + if (req.useThinClientMode) { + partitionedQueryExecutionInfo = new PartitionedQueryExecutionInfo( + responseBody, timeline, partitionKeyDefinition); + } else { + partitionedQueryExecutionInfo = new PartitionedQueryExecutionInfo( + responseBody, timeline); + } + return Mono.just(partitionedQueryExecutionInfo); }); }, retryPolicyInstance); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index fe09800f1773..f9e3f037a365 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -137,6 +137,66 @@ public List getAvailableReadRegionalRoutingContexts() { return this.locationInfo.availableReadRegionalRoutingContexts; } + /** + * Returns the set of non-null thin-client regional endpoints discovered in the most + * recent topology refresh. Used by the thin-client connectivity probe to fan out + * HTTP/2 probes after each topology update. + * + *

All-or-nothing semantics. Probing requires every region in the resolved + * read/write topology to expose a thin-client endpoint. If any region is missing one, + * this method returns an empty set so the caller skips probing entirely (and the + * routing gate falls back to Gateway V1) rather than partially routing through a + * thin-client mesh that has gaps. + * + * @return immutable snapshot of thin-client regional endpoints, or an empty set when + * no thin-client locations are present or when the resolved topology has a + * region that does not expose a thin-client endpoint. + */ + public Set getThinClientRegionalEndpoints() { + Set endpoints = new HashSet<>(); + if (!collectThinClientEndpointsAllOrNothing( + this.locationInfo.availableReadRegionalRoutingContextsByRegionName, endpoints)) { + return Collections.emptySet(); + } + // Also walk write regions: useThinClientStoreModel() routes writes (point ops, batch) through + // thin-client too, so a write-only region's thin-client endpoint must be probed as well. + // Set semantics dedupe the common case where a region is both readable and writable. + if (!collectThinClientEndpointsAllOrNothing( + this.locationInfo.availableWriteRegionalRoutingContextsByRegionName, endpoints)) { + return Collections.emptySet(); + } + if (endpoints.isEmpty()) { + return Collections.emptySet(); + } + return Collections.unmodifiableSet(endpoints); + } + + /** + * Collects thin-client endpoints from {@code byRegion} into {@code sink}. + * + * @return {@code true} if every region in {@code byRegion} contributed a non-null + * thin-client endpoint (or {@code byRegion} was empty/null); {@code false} + * if any region was missing its thin-client endpoint, in which case the + * caller must treat the entire topology as ineligible for probing. + */ + private static boolean collectThinClientEndpointsAllOrNothing( + UnmodifiableMap byRegion, Set sink) { + if (byRegion == null || byRegion.isEmpty()) { + return true; + } + for (RegionalRoutingContext ctx : byRegion.values()) { + if (ctx == null) { + continue; + } + URI thinclientEndpoint = ctx.getThinclientRegionalEndpoint(); + if (thinclientEndpoint == null) { + return false; + } + sink.add(thinclientEndpoint); + } + return true; + } + public List getAvailableWriteRegionalRoutingContexts() { return this.locationInfo.availableWriteRegionalRoutingContexts; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/PartitionKeyInternalHelper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/PartitionKeyInternalHelper.java index 2803536084be..5a1e3c0528e1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/PartitionKeyInternalHelper.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/PartitionKeyInternalHelper.java @@ -9,13 +9,24 @@ import com.azure.cosmos.implementation.ByteBufferOutputStream; import com.azure.cosmos.implementation.Bytes; import com.azure.cosmos.implementation.RMResources; +import com.azure.cosmos.implementation.Utils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; +import java.util.Spliterators; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; public class PartitionKeyInternalHelper { + private static final Range.MinComparator MIN_COMPARATOR = new Range.MinComparator<>(); + public static final String MinimumInclusiveEffectivePartitionKey = toHexEncodedBinaryString(PartitionKeyInternal.EmptyPartitionKey.components); public static final byte[] MinimumInclusiveEffectivePartitionKeyBytes = toBinary(PartitionKeyInternal.EmptyPartitionKey.components); public static final String MaximumExclusiveEffectivePartitionKey = toHexEncodedBinaryString(PartitionKeyInternal.InfinityPartitionKey.components); @@ -294,4 +305,104 @@ static public Range getEPKRangeForPrefixPartitionKey( String maxEPK = minEPK + MaximumExclusiveEffectivePartitionKey; return new Range<>(minEPK, maxEPK, true, false); } + + /** + * Converts query ranges from PartitionKeyInternal JSON format to sorted EPK hex string ranges. + * + *

The thin client proxy returns queryRanges as PartitionKeyInternal JSON arrays + * (e.g., {@code {"min": ["value"], "max": ["Infinity"]}}). This method parses each range, + * computes the EPK hex string via {@link #getEffectivePartitionKeyString}, and sorts the + * result using {@link Range.MinComparator} to satisfy + * {@link RoutingMapProviderHelper#getOverlappingRanges} which requires sorted, non-overlapping input. + * + * @param queryRangesProperty the name of the JSON property containing the ranges array + * @param queryPlanJson the raw query plan JSON containing PartitionKeyInternal ranges + * @param partitionKeyDefinition the container's partition key definition + * @return sorted list of EPK hex string ranges; empty list if the property is absent + */ + public static List> convertToSortedEpkRanges( + String queryRangesProperty, + ObjectNode queryPlanJson, + PartitionKeyDefinition partitionKeyDefinition) { + + JsonNode queryRangesNode = queryPlanJson.get(queryRangesProperty); + if (queryRangesNode == null || !queryRangesNode.isArray()) { + String actualType = queryRangesNode == null ? "null (property absent)" : queryRangesNode.getNodeType().name(); + throw new IllegalStateException( + "Thin client proxy query plan response has missing or invalid '" + queryRangesProperty + "' property. " + + "Expected: JSON array of {min, max, isMinInclusive, isMaxInclusive} range objects. " + + "Actual node type: " + actualType + ". " + + "Response keys: " + StreamSupport.stream( + Spliterators.spliteratorUnknownSize(queryPlanJson.fieldNames(), 0), false) + .collect(Collectors.joining(", ")) + ". " + + "This indicates a protocol mismatch between the SDK and the thin client proxy."); + } + + ArrayNode rawRanges = (ArrayNode) queryRangesNode; + List> epkRanges = new ArrayList<>(rawRanges.size()); + + for (JsonNode rangeNode : rawRanges) { + if (!rangeNode.isObject()) { + throw new IllegalStateException( + "Thin client proxy query plan response contains a non-object element in queryRanges array. " + + "Expected: JSON object with {min, max, isMinInclusive, isMaxInclusive}. " + + "Array size: " + rawRanges.size() + ". " + + "Actual node type: " + rangeNode.getNodeType().name() + "."); + } + ObjectNode rangeObj = (ObjectNode) rangeNode; + + String minEpk = partitionKeyInternalToEpkString(rangeObj.get("min"), partitionKeyDefinition); + String maxEpk = partitionKeyInternalToEpkString(rangeObj.get("max"), partitionKeyDefinition); + + JsonNode minInclusiveNode = rangeObj.get("isMinInclusive"); + JsonNode maxInclusiveNode = rangeObj.get("isMaxInclusive"); + if (minInclusiveNode == null || maxInclusiveNode == null) { + throw new IllegalStateException( + "Thin client proxy query plan range missing required fields. " + + "Expected: isMinInclusive and isMaxInclusive. " + + "Range object fields: " + StreamSupport.stream( + Spliterators.spliteratorUnknownSize(rangeObj.fieldNames(), 0), false) + .collect(Collectors.joining(", ")) + "."); + } + boolean isMinInclusive = minInclusiveNode.asBoolean(); + boolean isMaxInclusive = maxInclusiveNode.asBoolean(); + + epkRanges.add(new Range<>(minEpk, maxEpk, isMinInclusive, isMaxInclusive)); + } + + epkRanges.sort(MIN_COMPARATOR); + return epkRanges; + } + + /** + * Converts a single PartitionKeyInternal JSON node to its EPK hex string representation. + * + * @param rangeBoundaryNode the JSON node representing a range boundary (min or max) in PartitionKeyInternal format + * @param partitionKeyDefinition the container's partition key definition + * @return the EPK hex string + */ + private static String partitionKeyInternalToEpkString(JsonNode rangeBoundaryNode, PartitionKeyDefinition partitionKeyDefinition) { + if (rangeBoundaryNode == null || rangeBoundaryNode.isNull()) { + throw new IllegalStateException( + "Thin client proxy query plan range has null boundary value. " + + "Expected: PartitionKeyInternal JSON array (e.g., [\"value\"] or [{\"type\":\"Infinity\"}])."); + } + + PartitionKeyInternal partitionKey; + try { + partitionKey = Utils.getSimpleObjectMapper().treeToValue(rangeBoundaryNode, PartitionKeyInternal.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException( + "Failed to parse PartitionKeyInternal from range boundary. " + + "Boundary node type: " + rangeBoundaryNode.getNodeType().name() + ".", e); + } + + if (partitionKey.getComponents() == null) { + throw new IllegalStateException( + "Thin client proxy query plan range boundary deserialized to NonePartitionKey (null components). " + + "Boundary node type: " + rangeBoundaryNode.getNodeType().name() + "."); + } + + return getEffectivePartitionKeyString(partitionKey, partitionKeyDefinition); + } }