diff --git a/changelog/unreleased/SOLR-18176-shardhandler-bottleneck.yml b/changelog/unreleased/SOLR-18176-shardhandler-bottleneck.yml new file mode 100644 index 000000000000..7ce9084e0cac --- /dev/null +++ b/changelog/unreleased/SOLR-18176-shardhandler-bottleneck.yml @@ -0,0 +1,9 @@ +title: Increased query throughput by removing a call to ZooKeeper for cluster state that should have been cached. Happens when Solr does distributed search over multiple collections, and when the coordinator has no local replica for some of them. +type: changed +authors: + - name: Matthew Biscocho +links: + - name: SOLR-18176 + url: https://issues.apache.org/jira/browse/SOLR-18176 + - name: SOLR-15352 + url: https://issues.apache.org/jira/browse/SOLR-15352 diff --git a/solr/core/src/java/org/apache/solr/handler/component/CloudReplicaSource.java b/solr/core/src/java/org/apache/solr/handler/component/CloudReplicaSource.java index 836d0951f062..5315d413f421 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/CloudReplicaSource.java +++ b/solr/core/src/java/org/apache/solr/handler/component/CloudReplicaSource.java @@ -109,12 +109,13 @@ private void withShardsParam(Builder builder, String shardsParam) { if (sliceOrUrl.indexOf('/') < 0) { // this is a logical shard this.slices[i] = sliceOrUrl; - replicas[i] = - findReplicas( - builder, - shardsParam, - clusterState, - clusterState.getCollection(builder.collection).getSlice(sliceOrUrl)); + DocCollection coll = clusterState.getCollectionOrNull(builder.collection, true); + if (coll == null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Could not find collection to resolve replicas: " + builder.collection); + } + replicas[i] = findReplicas(builder, shardsParam, clusterState, coll.getSlice(sliceOrUrl)); } else { // this has urls this.replicas[i] = StrUtils.splitSmart(sliceOrUrl, "|", true); @@ -189,7 +190,12 @@ private void addSlices( String collectionName, String shardKeys, boolean multiCollection) { - DocCollection coll = state.getCollection(collectionName); + DocCollection coll = state.getCollectionOrNull(collectionName, true); + if (coll == null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Could not find collection to add slices: " + collectionName); + } Collection slices = coll.getRouter().getSearchSlices(shardKeys, params, coll); ClientUtils.addSlices(target, collectionName, slices, multiCollection); } diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedQueryComponentOptimizationTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedQueryComponentOptimizationTest.java index 349d6dda7114..9387d8680b75 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedQueryComponentOptimizationTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedQueryComponentOptimizationTest.java @@ -24,15 +24,19 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.jetty.HttpJettySolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.SolrQuery; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.cloud.SolrZKMetricsListener; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; +import org.apache.solr.embedded.JettySolrRunner; import org.junit.BeforeClass; import org.junit.Test; @@ -707,6 +711,57 @@ private QueryResponse queryWithAsserts(String... q) throws Exception { return response; } + /** + * When a node resolves collection state for a collection it doesn't host, queries should use + * cached state and not make ZK calls on every query. + */ + @Test + public void testDistributedQueryDoesNotReadFromZk() throws Exception { + final String secondColl = "secondColl"; + + // Create a collection on only 1 node so the other node uses LazyCollectionRef for state + List jettys = cluster.getJettySolrRunners(); + CollectionAdminRequest.createCollection(secondColl, "conf", 1, 1) + .setCreateNodeSet(jettys.get(0).getNodeName()) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster + .getZkStateReader() + .waitForState( + secondColl, + DEFAULT_TIMEOUT, + TimeUnit.SECONDS, + (n, c) -> SolrCloudTestCase.replicasForCollectionAreFullyActive(n, c, 1, 1)); + + try { + // Node 1 hosts COLLECTION but not secondColl. + // Send a multi-collection query to trigger LazyCollectionRef get call + JettySolrRunner nodeWithoutSecondColl = jettys.get(1); + try (SolrClient client = + new HttpJettySolrClient.Builder(nodeWithoutSecondColl.getBaseUrl().toString()).build()) { + + String collectionsParameter = COLLECTION + "," + secondColl; + + // Warm up LazyCollectionRef state cache with query + client.query(COLLECTION, new SolrQuery("q", "*:*", "collection", collectionsParameter)); + + // Get ZK metrics from the coordinator node (the one we're querying) + SolrZKMetricsListener metrics = + nodeWithoutSecondColl.getCoreContainer().getZkController().getZkClient().getMetrics(); + long existsBefore = metrics.getExistsChecks(); + + // Query again and assert that exists call is not made + client.query(COLLECTION, new SolrQuery("q", "*:*", "collection", collectionsParameter)); + assertEquals( + "Query should not cause ZK exists checks as collection state should be cached", + existsBefore, + metrics.getExistsChecks()); + } + } finally { + CollectionAdminRequest.deleteCollection(secondColl) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + private int getNumRequests( Map> requests) { int beforeNumRequests = 0;