From cddb96f426608402bb6f762119f0d3de8e2920d3 Mon Sep 17 00:00:00 2001 From: April M <36110273+aimurphy@users.noreply.github.com> Date: Thu, 19 Feb 2026 18:26:59 -0800 Subject: [PATCH 1/5] more attributes --- antora.yml | 31 +-- local-preview-playbook.yml | 178 +++++++++++++++--- modules/ROOT/nav.adoc | 2 +- modules/ROOT/pages/astra-migration-paths.adoc | 2 +- .../ROOT/pages/cassandra-data-migrator.adoc | 40 ++-- modules/ROOT/pages/components.adoc | 14 +- .../ROOT/pages/connect-clients-to-proxy.adoc | 16 +- .../ROOT/pages/connect-clients-to-target.adoc | 28 +-- modules/ROOT/pages/create-target.adoc | 14 +- .../ROOT/pages/deployment-infrastructure.adoc | 10 +- modules/ROOT/pages/faqs.adoc | 20 +- .../ROOT/pages/feasibility-checklists.adoc | 36 ++-- modules/ROOT/pages/hcd-migration-paths.adoc | 2 +- modules/ROOT/pages/index.adoc | 8 +- modules/ROOT/pages/introduction.adoc | 4 +- .../ROOT/pages/manage-proxy-instances.adoc | 4 +- .../ROOT/pages/migrate-and-validate-data.adoc | 16 +- .../ROOT/pages/setup-ansible-playbooks.adoc | 4 +- modules/ROOT/pages/troubleshooting-tips.adoc | 22 +-- .../ROOT/pages/zdm-proxy-migration-paths.adoc | 2 +- .../partials/dsbulk-migrator-deprecation.adoc | 4 +- .../sideloader/pages/migrate-sideloader.adoc | 4 +- .../sideloader/pages/prepare-sideloader.adoc | 22 +-- .../sideloader/pages/sideloader-overview.adoc | 8 +- modules/sideloader/partials/no-return.adoc | 2 +- modules/sideloader/partials/validate.adoc | 4 +- 26 files changed, 321 insertions(+), 176 deletions(-) diff --git a/antora.yml b/antora.yml index 737e374b..75fabcea 100644 --- a/antora.yml +++ b/antora.yml @@ -8,19 +8,27 @@ nav: asciidoc: attributes: - #General attributes company: 'DataStax' support-url: 'https://www.ibm.com/mysupport/s/' - #Other product attributes + spark-reg: 'Apache Spark(TM)' + spark: 'Apache Spark' + spark-short: 'Spark' + hadoop-reg: 'Apache Hadoop(R)' cass-reg: 'Apache Cassandra(R)' cass: 'Apache Cassandra' cass-short: 'Cassandra' + cql: 'Cassandra Query Language (CQL)' + cql-short: 'CQL' + cql-shell: 'CQL shell' + cql-console: 'CQL console' + cql-proxy-url: 'https://github.com/datastax/cql-proxy' + java-driver-url: 'https://github.com/apache/cassandra-java-driver' dse: 'DataStax Enterprise (DSE)' dse-short: 'DSE' + metrics-collector: 'DSE Metrics Collector' hcd: 'Hyper-Converged Database (HCD)' hcd-short: 'HCD' mc: 'Mission Control' - #Astra DB attributes astra-db: 'Astra DB' astra: 'Astra' db-serverless: 'Serverless (non-vector)' @@ -31,14 +39,9 @@ asciidoc: scb: 'Secure Connect Bundle (SCB)' scb-short: 'SCB' scb-brief: 'Secure Connect Bundle' - #Sideloader has a specific name in this repo. It is not identical to the one in the Serverless repo. - sstable-sideloader: '{astra-db} Sideloader' - #devops api attributes devops-api: 'DevOps API' devops-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$devops-api/index.html' - #data api attributes data-api: 'Data API' - #Migration docs attributes product: 'Zero Downtime Migration' product-short: 'ZDM' product-proxy: 'ZDM Proxy' @@ -48,9 +51,13 @@ asciidoc: product-automation-repo: 'https://github.com/datastax/zdm-proxy-automation' product-automation-shield: 'image:https://img.shields.io/github/v/release/datastax/zdm-proxy-automation?label=latest[alt="Latest zdm-proxy-automation release on GitHub",link="{product-automation-repo}/releases"]' product-demo: 'ZDM Demo Client' - dsbulk-loader: 'DSBulk Loader' - dsbulk-loader-repo: 'https://github.com/datastax/dsbulk' - cass-migrator: 'Cassandra Data Migrator' + dsbulk: 'DataStax Bulk Loader (DSBulk)' + dsbulk-short: 'DSBulk' + dsbulk-repo: 'https://github.com/datastax/dsbulk' + dsbulk-migrator: 'DSBulk Migrator' + cass-migrator: 'Cassandra Data Migrator (CDM)' cass-migrator-short: 'CDM' cass-migrator-repo: 'https://github.com/datastax/cassandra-data-migrator' - cass-migrator-shield: 'image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=latest[alt="Latest cassandra-data-migrator release on GitHub",link="{cass-migrator-repo}/packages"]' \ No newline at end of file + cass-migrator-shield: 'image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=latest[alt="Latest cassandra-data-migrator release on GitHub",link="{cass-migrator-repo}/packages"]' + # Sideloader has a specific name in this repo. It is not identical to the one in the Serverless repo. + sstable-sideloader: '{astra-db} Sideloader' \ No newline at end of file diff --git a/local-preview-playbook.yml b/local-preview-playbook.yml index c4c4fa93..8513ddfa 100644 --- a/local-preview-playbook.yml +++ b/local-preview-playbook.yml @@ -60,34 +60,170 @@ asciidoc: xrefstyle: short # CUSTOM ATTRIBUTES company: 'DataStax' - astra_db: 'Astra DB' - astra_stream: 'Astra Streaming' - astra-stream: 'Astra Streaming' # AIM: Once all instances of astra_stream are removed, keep only the astra-stream attribute. - astra_ui: 'Astra Portal' - astra_cli: 'Astra CLI' - astra-cli: 'Astra CLI' # AIM: Once all instances of astra_cli are removed, keep only the astra-cli attribute. + trust-center: 'IBM Trust Center' + trust-center-url: 'https://www.ibm.com/trust' + trust-center-link: '{trust-center-url}[{trust-center}]' + support-url: 'https://www.ibm.com/mysupport/s/' + dsbulk: 'DataStax Bulk Loader (DSBulk)' + dsbulk-short: 'DSBulk' + dsbulk-repo: 'https://github.com/datastax/dsbulk' + astra: 'Astra' + astra-db: 'Astra DB' + astra-ui: 'Astra Portal' + astra-url: 'https://astra.datastax.com' + astra-ui-link: '{astra-url}[{astra-ui}^]' + db-classic: 'Managed Cluster' + db-serverless: 'Serverless (non-vector)' + db-serverless-vector: 'Serverless (vector)' scb: 'Secure Connect Bundle (SCB)' scb-short: 'SCB' scb-brief: 'Secure Connect Bundle' - astra-streaming-examples-repo: 'https://raw.githubusercontent.com/datastax/astra-streaming-examples/master' - luna-streaming-examples-repo: 'https://raw.githubusercontent.com/datastaxdevs/luna-streaming-examples/main' - support_url: 'https://www.ibm.com/mysupport/s/' - glossary-url: 'https://docs.datastax.com/en/glossary/docs/index.html#' - emoji-tada: "🎉" - emoji-rocket: "🚀" - emoji-smile: "😀" + devops-api: 'DevOps API' + devops-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$devops-api/index.html' + astra-cli: 'Astra CLI' + astra-stream: 'Astra Streaming' + starlight-kafka: 'Starlight for Kafka' + starlight-rabbitmq: 'Starlight for RabbitMQ' + astra-streaming-examples-repo: 'https://github.com/datastax/astra-streaming-examples' + hcd: 'Hyper-Converged Database (HCD)' + hcd-short: 'HCD' dse: 'DataStax Enterprise (DSE)' - cassandra: 'Apache Cassandra(R)' - classic: 'classic' - classic_cap: 'Classic' - serverless: 'serverless' - serverless_cap: 'Serverless' - py-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$python-client-1x/astrapy' - ts-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$typescript-client-1x' - java-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$java-client-1x' + dse-short: 'DSE' + metrics-collector: 'DSE Metrics Collector' + mc: 'Mission Control' + opscenter: 'DSE OpsCenter' + studio: 'DataStax Studio' + cass-reg: 'Apache Cassandra(R)' + cass: 'Apache Cassandra' + cass-short: 'Cassandra' + cql: 'Cassandra Query Language (CQL)' + cql-short: 'CQL' + cql-shell: 'CQL shell' + cql-console: 'CQL console' + pulsar-reg: 'Apache Pulsar(TM)' + pulsar: 'Apache Pulsar' + pulsar-short: 'Pulsar' + spark-reg: 'Apache Spark(TM)' + spark: 'Apache Spark' + spark-short: 'Spark' + spark-connect: 'Spark Connect' + spark-connector: 'Apache Cassandra Spark Connector' + spark-connector-short: 'Spark Connector' + kafka-reg: 'Apache Kafka(R)' + kafka: 'Apache Kafka' + kafka-short: 'Kafka' + kafka-connect: 'Kafka Connect' + kafka-connector: 'DataStax Apache Kafka Connector' + kafka-connector-short: 'Kafka Connector' + solr-reg: 'Apache Solr(TM)' + solr: 'Apache Solr' + solr-short: 'Solr' + lucene-reg: 'Apache Lucene(TM)' + lucene: 'Apache Lucene' + lucene-short: 'Lucene' + hadoop-reg: 'Apache Hadoop(R)' + hadoop: 'Apache Hadoop' + hadoop-short: 'Hadoop' + airflow-reg: 'Apache Airflow(R)' + airflow: 'Apache Airflow' + airflow-short: 'Airflow' + maven-reg: 'Apache Maven(TM)' + maven: 'Apache Maven' + maven-short: 'Maven' + flink-reg: 'Apache Flink(R)' + flink: 'Apache Flink' + flink-short: 'Flink' + beam-reg: 'Apache Beam(R)' + beam: 'Apache Beam' + beam-short: 'Beam' + geode-reg: 'Apache Geode(TM)' + geode: 'Apache Geode' + geode-short: 'Geode' + hbase-reg: 'Apache HBase(R)' + hbase: 'Apache HBase' + hbase-short: 'HBase' + kudu-reg: 'Apache Kudu(TM)' + kudu: 'Apache Kudu' + kudu-short: 'Kudu' + phoenix-reg: 'Apache Phoenix(TM)' + phoenix: 'Apache Phoenix' + phoenix-short: 'Phoenix' + zookeeper-reg: 'Apache ZooKeeper(TM)' + zookeeper: 'Apache ZooKeeper' + zookeeper-short: 'ZooKeeper' + asf: 'Apache Software Foundation (ASF)' + asf-short: 'ASF' + tinkerpop-reg: 'Apache TinkerPop(TM)' + tinkerpop: 'Apache TinkerPop' + tinkerpop-short: 'TinkerPop' + cloudstack-reg: 'Apache CloudStack(R)' + cloudstack: 'Apache CloudStack' + cloudstack-short: 'CloudStack' + tomcat-reg: 'Apache Tomcat(R)' + tomcat: 'Apache Tomcat' + tomcat-short: 'Tomcat' + ajp: 'Apache JServ Protocol (AJP)' + ajp-short: 'AJP' + activemq-reg: 'Apache ActiveMQ(R)' + activemq: 'Apache ActiveMQ' + activemq-short: 'ActiveMQ' + tomee-reg: 'Apache TomEE(TM)' + tomee: 'Apache TomEE' + tomee-short: 'TomEE' + bookkeeper-reg: 'Apache BookKeeper(TM)' + bookkeeper: 'Apache BookKeeper' + bookkeeper-short: 'BookKeeper' + groovy-reg: 'Apache Groovy(TM)' + groovy: 'Apache Groovy' + groovy-short: 'Groovy' + cpp-driver-url: 'https://github.com/datastax/cpp-driver' + csharp-driver-url: 'https://github.com/datastax/csharp-driver' + gocql-astra-url: 'https://github.com/datastax/gocql-astra' + go-driver-url: 'https://github.com/apache/cassandra-gocql-driver' + cql-proxy-url: 'https://github.com/datastax/cql-proxy' + java-driver-url: 'https://github.com/apache/cassandra-java-driver' + nodejs-driver-url: 'https://github.com/datastax/nodejs-driver' + python-driver-url: 'https://github.com/datastax/python-driver' + scala-driver-url: 'https://github.com/apache/cassandra-spark-connector' + cass-driver-cpp-shield: 'image:https://img.shields.io/github/v/tag/datastax/cpp-driver?label=latest[alt="Latest cpp-driver release on GitHub",link="{cpp-driver-url}/tags"]' + cass-driver-csharp-shield: 'image:https://img.shields.io/nuget/v/CassandraCSharpDriver?label=latest[alt="Latest CassandraCSharpDriver release on NuGet",link="https://www.nuget.org/packages/CassandraCSharpDriver"]' + cass-driver-go-shield: 'image:https://img.shields.io/github/v/tag/apache/cassandra-gocql-driver?label=latest%20gocql[alt="Latest gocql release on GitHub",link="{go-driver-url}/tags"]' + cass-driver-java-shield: 'image:https://img.shields.io/github/v/tag/apache/cassandra-java-driver?label=latest[alt="Latest cassandra-java-driver release on GitHub",link="{java-driver-url}/tags"]' + cass-driver-nodejs-shield: 'image:https://img.shields.io/github/v/tag/datastax/nodejs-driver?label=latest[alt="Latest nodejs-driver release on GitHub",link="{nodejs-driver-url}/tags"]' + cass-driver-python-shield: 'image:https://img.shields.io/github/v/tag/datastax/python-driver?label=latest[alt="Latest python-driver release on GitHub",link="{python-driver-url}/tags"]' + cass-driver-scala-shield: 'image:https://img.shields.io/github/v/tag/apache/cassandra-spark-connector?label=latest[alt="Latest cassandra-spark-connector release on GitHub",link="{scala-driver-url}/releases"]' + data-api: 'Data API' + csharp-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$csharp-client' py-client-api-ref-url-2x: 'xref:astra-api-docs:ROOT:attachment$python-client/astrapy' ts-client-api-ref-url-2x: 'xref:astra-api-docs:ROOT:attachment$typescript-client' java-client-api-ref-url-2x: 'xref:astra-api-docs:ROOT:attachment$java-client' + python-client-repo-url: 'https://github.com/datastax/astrapy' + typescript-client-repo-url: 'https://github.com/datastax/astra-db-ts' + typescript-client-examples-url: '{typescript-client-repo-url}/blob/v2.x/examples' + java-client-repo-url: 'https://github.com/datastax/astra-db-java' + csharp-client-repo-url: 'https://github.com/datastax/astra-db-csharp' + python-client-python-version: '3.8' + dataapi-java-client-shield: 'image:https://img.shields.io/maven-central/v/com.datastax.astra/astra-db-java.svg?label=latest[alt="Latest astra-db-java release on Maven Central",link="https://search.maven.org/artifact/com.datastax.astra/astra-db-java"]' + dataapi-python-client-shield: 'image:https://img.shields.io/github/v/tag/datastax/astrapy?label=latest[alt="Latest astrapy release on GitHub",link="{python-client-repo-url}/releases"]' + dataapi-typescript-client-shield: 'image:https://img.shields.io/github/v/tag/datastax/astra-db-ts?label=latest[alt="Latest astra-db-ts release on GitHub",link="{typescript-client-repo-url}/releases"]' + dataapi-csharp-client-shield: 'image:https://img.shields.io/github/v/tag/datastax/astra-db-csharp?label=latest[alt="Latest astra-db-csharp release on GitHub",link="{csharp-client-repo-url}/releases"]' + agent: 'DataStax Agent' + repair-service: 'Repair Service' + backup-service: 'Backup Service' + performance-service: 'Performance Service' + monitoring-service: 'OpsCenter Monitoring' + nodesync-service: 'NodeSync Service' + bestpractice-service: 'Best Practice Service' + capacity-service: 'Capacity Service' + lcm: 'Lifecycle Manager (LCM)' + lcm-short: 'LCM' + cr: 'custom resource (CR)' + cr-short: 'CR' + crd: 'custom resource definition (CRD)' + crd-short: 'CRD' + # Custom attributes only used in ragstack-ai + astra_db: 'Astra DB' + astra_ui: 'Astra Portal' # Antora Atlas primary-site-url: https://docs.datastax.com/en primary-site-manifest-url: https://docs.datastax.com/en/site-manifest.json diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 9cc4ed72..e576fbb3 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -42,5 +42,5 @@ * xref:ROOT:cassandra-data-migrator.adoc[] * {cass-migrator-repo}/releases[{cass-migrator-short} release notes] -.{dsbulk-loader} +.{dsbulk} * xref:dsbulk:overview:dsbulk-about.adoc[] \ No newline at end of file diff --git a/modules/ROOT/pages/astra-migration-paths.adoc b/modules/ROOT/pages/astra-migration-paths.adoc index 54a09736..9b4b8f9f 100644 --- a/modules/ROOT/pages/astra-migration-paths.adoc +++ b/modules/ROOT/pages/astra-migration-paths.adoc @@ -9,7 +9,7 @@ If you have questions about migrating from a specific source to {astra-db}, cont .Migration tool compatibility [cols="2,1,1,1,1"] |=== -|Origin |{sstable-sideloader} |{cass-migrator} |{product-proxy} |{dsbulk-loader} +|Origin |{sstable-sideloader} |{cass-migrator} |{product-proxy} |{dsbulk} |Aiven for {cass-short} |icon:check[role="text-success",alt="Supported"] diff --git a/modules/ROOT/pages/cassandra-data-migrator.adoc b/modules/ROOT/pages/cassandra-data-migrator.adoc index 501e6c36..e69cf14d 100644 --- a/modules/ROOT/pages/cassandra-data-migrator.adoc +++ b/modules/ROOT/pages/cassandra-data-migrator.adoc @@ -1,6 +1,6 @@ = Use {cass-migrator} with {product-proxy} -:navtitle: Use {cass-migrator} -:description: You can use {cass-migrator} ({cass-migrator-short}) for data migration and validation between {cass-reg}-based databases. +:navtitle: Use {cass-migrator-short} +:description: You can use {cass-migrator} for data migration and validation between {cass-reg}-based databases. :page-aliases: cdm-parameters.adoc, ROOT:cdm-steps.adoc, ROOT:cdm-overview.adoc {description} @@ -51,41 +51,41 @@ The container's `assets` directory includes all required migration tools: `cassa Install as a JAR file:: + -- -. Install Java 11 or later, which includes Spark binaries. +. Install Java 11 or later, which includes {spark-short} binaries. -. Install https://spark.apache.org/downloads.html[Apache Spark(TM)] version 3.5.x with Scala 2.13 and Hadoop 3.3 and later. +. Install https://spark.apache.org/downloads.html[{spark-reg}] version 3.5.x with Scala 2.13 and {hadoop-reg} 3.3 and later. + [tabs] ==== Single VM:: + -For one-off migrations, you can install the Spark binary on a single VM where you will run the {cass-migrator-short} job. +For one-off migrations, you can install the {spark-short} binary on a single VM where you will run the {cass-migrator-short} job. + -. Get the Spark tarball from the Apache Spark archive. +. Get the {spark-reg} tarball from the {spark} archive. + [source,bash,subs="+quotes"] ---- wget https://archive.apache.org/dist/spark/spark-3.5.**PATCH**/spark-3.5.**PATCH**-bin-hadoop3-scala2.13.tgz ---- + -Replace `**PATCH**` with your Spark patch version. +Replace `**PATCH**` with your {spark-short} patch version. + -. Change to the directory where you want install Spark, and then extract the tarball: +. Change to the directory where you want install {spark-short}, and then extract the tarball: + [source,bash,subs="+quotes"] ---- tar -xvzf spark-3.5.**PATCH**-bin-hadoop3-scala2.13.tgz ---- + -Replace `**PATCH**` with your Spark patch version. +Replace `**PATCH**` with your {spark-short} patch version. -Spark cluster:: +{spark-reg} cluster:: + -For large (several terabytes) migrations, complex migrations, and use of {cass-migrator-short} as a long-term data transfer utility, {company} recommends that you use a Spark cluster or Spark Serverless platform. +For large (several terabytes) migrations, complex migrations, and use of {cass-migrator-short} as a long-term data transfer utility, {company} recommends that you use a {spark} cluster or {spark-short} Serverless platform. + -If you deploy CDM on a Spark cluster, you must modify your `spark-submit` commands as follows: +If you deploy {cass-migrator-short} on a {spark-short} cluster, you must modify your `spark-submit` commands as follows: + -* Replace `--master "local[*]"` with the host and port for your Spark cluster, as in `--master "spark://**MASTER_HOST**:**PORT**"`. +* Replace `--master "local[*]"` with the host and port for your {spark-short} cluster, as in `--master "spark://**MASTER_HOST**:**PORT**"`. * Remove parameters related to single-VM installations, such as `--driver-memory` and `--executor-memory`. ==== @@ -167,7 +167,7 @@ You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- -Spark cluster:: +{spark-reg} cluster:: + -- [source,bash,subs="+quotes"] @@ -188,7 +188,7 @@ Depending on where your properties file is stored, you might need to specify the + You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file using the same format of `**KEYSPACE_NAME**.**TABLE_NAME**`. -* `--master`: Provide the URL of your Spark cluster. +* `--master`: Provide the URL of your {spark-short} cluster. * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- @@ -236,7 +236,7 @@ You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- -Spark cluster:: +{spark-reg} cluster:: + -- [source,bash,subs="+quotes"] @@ -257,7 +257,7 @@ Depending on where your properties file is stored, you might need to specify the + You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file using the same format of `**KEYSPACE_NAME**.**TABLE_NAME**`. -* `--master`: Provide the URL of your Spark cluster. +* `--master`: Provide the URL of your {spark-short} cluster. * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- @@ -331,15 +331,15 @@ Specifically, see the {cass-migrator-repo}/blob/main/src/resources/cdm-detailed. .Java NoSuchMethodError [%collapsible] ==== -If you installed Spark as a JAR file, and your Spark and Scala versions aren't compatible with your installed version of {cass-migrator-short}, {cass-migrator-short} jobs can throw exceptions such a the following: +If you installed {spark-short} as a JAR file, and your {spark-short} and Scala versions aren't compatible with your installed version of {cass-migrator-short}, {cass-migrator-short} jobs can throw exceptions such a the following: [source,console] ---- Exception in thread "main" java.lang.NoSuchMethodError: 'void scala.runtime.Statics.releaseFence()' ---- -Make sure that your Spark binary is compatible with your {cass-migrator-short} version. -If you installed an earlier version of {cass-migrator-short}, you might need to install an earlier Spark binary. +Make sure that your {spark-short} binary is compatible with your {cass-migrator-short} version. +If you installed an earlier version of {cass-migrator-short}, you might need to install an earlier {spark-short} binary. ==== .Rerun a failed or partially completed job diff --git a/modules/ROOT/pages/components.adoc b/modules/ROOT/pages/components.adoc index 7dcb5c25..408b5ff3 100644 --- a/modules/ROOT/pages/components.adoc +++ b/modules/ROOT/pages/components.adoc @@ -30,7 +30,7 @@ Typically, you only need to update the connection string. === How {product-proxy} handles reads and writes {company} created {product-proxy} to orchestrate requests between a client application and both the origin and target clusters. -These clusters can be any xref:cql:ROOT:index.adoc[CQL]-compatible data store, such as {cass-reg}, {dse}, and {astra-db}. +These clusters can be any data store that supports the xref:cql:ROOT:index.adoc[{cql}], such as {cass-reg}, {dse}, {hcd}, and {astra-db}. During the migration process, you designate one cluster as the _primary cluster_, which serves as the source of truth for reads. For the majority of the migration process, this is typically the origin cluster. @@ -145,21 +145,21 @@ For more information, see xref:sideloader:sideloader-overview.adoc[]. === {cass-migrator} -You can use {cass-migrator} ({cass-migrator-short}) for data migration and validation between {cass-reg}-based databases. +You can use {cass-migrator-short} for data migration and validation between {cass-short}-based databases. It offers extensive functionality and configuration options to support large and complex migrations as well as post-migration data validation. You can use {cass-migrator-short} by itself, with {product-proxy}, or for data validation after using another data migration tool. For more information, see xref:ROOT:cassandra-data-migrator.adoc[]. -=== {dsbulk-loader} +=== {dsbulk} -{dsbulk-loader} is a high-performance data loading and unloading tool for {cass-short}-based databases. +{dsbulk-short} is a high-performance data loading and unloading tool for {cass-short}-based databases. You can use it to load, unload, and count records. -Because {dsbulk-loader} doesn't have the same data validation capabilities as {cass-migrator-short}, it is best for migrations that don't require extensive data validation, aside from post-migration row counts. +Because {dsbulk-short} doesn't have the same data validation capabilities as {cass-migrator-short}, it is best for migrations that don't require extensive data validation, aside from post-migration row counts. -You can use {dsbulk-loader} alone or with {product-proxy}. +You can use {dsbulk-short} alone or with {product-proxy}. For more information, see xref:dsbulk:overview:dsbulk-about.adoc[]. @@ -171,7 +171,7 @@ include::ROOT:partial$dsbulk-migrator-deprecation.adoc[] === Other data migration processes Depending on your origin and target databases, there might be other data migration tools available for your migration. -For example, if you want to write your own custom data migration processes, you can use a tool like Apache Spark(TM). +For example, if you want to write your own custom data migration processes, you can use a tool like {spark-reg}. To use a data migration tool with {product-proxy}, it must meet the following requirements: diff --git a/modules/ROOT/pages/connect-clients-to-proxy.adoc b/modules/ROOT/pages/connect-clients-to-proxy.adoc index f4f31007..e9b20883 100644 --- a/modules/ROOT/pages/connect-clients-to-proxy.adoc +++ b/modules/ROOT/pages/connect-clients-to-proxy.adoc @@ -3,12 +3,12 @@ {product-proxy} is designed to mimic communication with a typical cluster based on {cass-reg}. This means that your client applications connect to {product-proxy} in the same way that they already connect to your existing {cass-short}-based clusters. -You can communicate with {product-proxy} using the same xref:cql:ROOT:index.adoc[CQL] statements used in your existing client applications. +You can communicate with {product-proxy} using the same xref:cql:ROOT:index.adoc[{cql}] statements used in your existing client applications. It understands the same messaging protocols used by {cass-short}, {dse-short}, {hcd-short}, and {astra-db}. As a result, most client applications cannot distinguish between connections to {product-proxy} and direct connections to a {cass-short}-based cluster. This page explains how to connect client applications to a {cass-short}-based cluster using {cass-short} drivers, and then describes the how you need to modify your client applications to connect to {product-proxy}. -It also explains how to connect `cqlsh` to {product-proxy}, which is often used in conjunction with {cass-short} drivers and during the migration process for certain validation tasks. +It also explains how to connect the {cql-shell} (`cqlsh`) to {product-proxy}, which is often used in conjunction with {cass-short} drivers and during the migration process for certain validation tasks. == Connect applications to {cass-short} clusters @@ -198,18 +198,20 @@ For more information, see the https://github.com/absurdfarce/themis/blob/main/RE In addition to being a validation tool, Themis also provides an example of a larger client application that uses the Java driver to connect to {product-proxy}, or directly to a {cass-short} cluster, and perform operations. The configuration logic, as well as the cluster and session management code, are separated into distinct packages to make them easy to understand. -== Connect cqlsh to {product-proxy} +== Connect the {cql-shell} to {product-proxy} -`cqlsh` is a command-line tool that you can use to send CQL statements and `cqlsh`-specific commands to your {cass-short}-based clusters, including {astra-db}, {dse-short}, {hcd-short}, and {cass} databases. +The {cql-shell} (`cqlsh`) is a command-line tool that you can use to send {cql-short} and `cqlsh` commands to your {cass-short}-based clusters, including {astra-db}, {dse-short}, {hcd-short}, and {cass} databases. -You can use your database's included version of `cqlsh`, or you can download and run a standalone `cqlsh`. +When issuing these commands directly to a cluster, you can use your database's included version of `cqlsh`, or you can download and run a standalone `cqlsh`. + +With {product-proxy}, you use a standalone `cqlsh` installation to issue commands to both clusters through {product-proxy}. Your origin and target clusters must have a common `cql_version` between them. -If there is no CQL version that is compatible with both clusters, `cqlsh` won't be able to connect to {product-proxy}. +If there is no {cql-short} version that is compatible with both clusters, `cqlsh` won't be able to connect to {product-proxy}. To connect `cqlsh` to a {product-proxy} instance, do the following: -. On a machine that can connect to your {product-proxy} instance, xref:cql:connect:start-cqlsh-linux-mac.adoc[download and install `cqlsh`]. +. On a machine that can connect to your {product-proxy} instance, xref:cql:connect:start-cqlsh-linux-mac.adoc[download and install the {cql-shell}]. + Any version of `cqlsh` can connect to {product-proxy}, but some clusters require a specific `cqlsh` version. diff --git a/modules/ROOT/pages/connect-clients-to-target.adoc b/modules/ROOT/pages/connect-clients-to-target.adoc index d4bd855e..913bbdba 100644 --- a/modules/ROOT/pages/connect-clients-to-target.adoc +++ b/modules/ROOT/pages/connect-clients-to-target.adoc @@ -8,7 +8,7 @@ This removes the dependency on {product-proxy} and the origin cluster, thereby c image::migration-phase5ra.png[In Phase 5, your applications no longer use the proxy and, instead, connect directly to the target cluster] -The minimum requirements for reconfiguring these connections depend on whether your target cluster is {astra-db} or a generic CQL cluster, such as {cass-reg}, {dse}, or {hcd}. +The minimum requirements for reconfiguring these connections depend on whether your target cluster is {astra-db} or a self-managed {cass-short} cluster, such as {cass-reg}, {dse}, or {hcd}. [WARNING] ==== @@ -18,9 +18,9 @@ From this point onward, the clusters will diverge, and the target cluster become Be sure that you have thoroughly xref:ROOT:migrate-and-validate-data.adoc[validated your data (Phase 2)], xref:ROOT:enable-async-dual-reads.adoc[tested your target cluster's performance (Phase 3)], and xref:ROOT:change-read-routing.adoc[routed all reads to the target (Phase 4)] before permanently switching the connection. ==== -== Connect a driver to a generic CQL cluster +== Connect a driver to a self-managed {cass-short} cluster -If your origin and target clusters are both generic CQL clusters, such as {cass-short}, {dse-short}, or {hcd-short}, then the driver connection strings can be extremely similar. +If your origin and target clusters are both self-managed {cass-short} clusters, such as {cass-short}, {dse-short}, or {hcd-short}, then the driver connection strings can be extremely similar. It's possible that your code requires only minor changes to connect to the target cluster. . At minimum, update your driver configuration to use the appropriate contact points for your target cluster. @@ -40,7 +40,7 @@ For more information on supported drivers, see xref:datastax-drivers:compatibili == Connect a driver to {astra-db} -Connections to {astra-db} are different from connections to generic CQL clusters. +Connections to {astra-db} are different from connections to self-managed {cass-short} clusters. === Get {astra-db} credentials @@ -199,7 +199,7 @@ If your driver version is fully compatible with {astra-db}, then it has built-in The changes required to connect your application to {astra-db} are minimal. If your client application uses an earlier driver version without built-in {scb-short} support, {company} strongly recommends upgrading to a compatible driver to simplify configuration and get the latest features and bug fixes. -If you prefer to make this change after the migration, or you must support a legacy application that relies on an earlier driver, you can connect to {astra-db} with https://github.com/datastax/cql-proxy[CQL Proxy], or you must extract the {scb-short} archive and provide the individual files to enable mTLS in your driver's configuration. +If you prefer to make this change after the migration, or you must support a legacy application that relies on an earlier driver, you can connect to {astra-db} with `{cql-proxy-url}[cql-proxy]`, or you must extract the {scb-short} archive and provide the individual files to enable mTLS in your driver's configuration. The following example demonstrates how the {cass-short} Python driver connects to {astra-db} using an application token and {scb-short}: @@ -231,7 +231,7 @@ In addition to updating connection strings, you might also need to make the foll * Feature compatibility between your previous and current database platform. + -For example, after migrating to {astra-db}, your drivers cannot create keyspaces because xref:astra-db-serverless:cql:develop-with-cql.adoc[CQL for {astra-db}] doesn't support `CREATE KEYSPACE`. +For example, after migrating to {astra-db}, your drivers cannot create keyspaces because xref:astra-db-serverless:cql:develop-with-cql.adoc[{cql-short} for {astra-db}] doesn't support `CREATE KEYSPACE`. + Similarly, {astra-db} doesn't support {dse-short}-specific features like {dse-short} Insights Monitoring. @@ -239,19 +239,19 @@ Similarly, {astra-db} doesn't support {dse-short}-specific features like {dse-sh Depending on your application's requirements, you might need to make these changes immediately, or you might make them after switching the connection. -== Switch to the Data API +== Switch to the {data-api} -If you migrated to {astra-db} or {hcd-short}, you have the option of using the Data API instead of, or in addition to, a {cass-short} driver. +If you migrated to {astra-db} or {hcd-short}, you have the option of using the {data-api} instead of, or in addition to, a {cass-short} driver. -Although the Data API can read and write to CQL tables, it is significantly different from driver code. -To use the Data API, you must rewrite your application code or create a new application. +Although the {data-api} can read and write to {cql-short} tables, it is significantly different from driver code. +To use the {data-api}, you must rewrite your application code or create a new application. For more information, see the following: -* xref:astra-db-serverless:api-reference:dataapiclient.adoc[Get started with the Data API in {astra-db}] -* xref:astra-db-serverless:api-reference:compare-dataapi-to-cql.adoc[Migrate to the Data API from CQL in {astra-db}] -* xref:hyper-converged-database:api-reference:dataapiclient.adoc[Get started with the Data API in {hcd-short}] -* xref:hyper-converged-database:api-reference:compare-dataapi-to-cql.adoc[Migrate to the Data API from CQL in {hcd-short}] +* xref:astra-db-serverless:api-reference:dataapiclient.adoc[Get started with the {data-api} in {astra-db}] +* xref:astra-db-serverless:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from {cql-short} in {astra-db}] +* xref:hyper-converged-database:api-reference:dataapiclient.adoc[Get started with the {data-api} in {hcd-short}] +* xref:hyper-converged-database:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from {cql-short} in {hcd-short}] == Migration complete diff --git a/modules/ROOT/pages/create-target.adoc b/modules/ROOT/pages/create-target.adoc index 7e8f2905..fe2b3dd8 100644 --- a/modules/ROOT/pages/create-target.adoc +++ b/modules/ROOT/pages/create-target.adoc @@ -62,13 +62,13 @@ On your new database, the keyspace names, table names, column names, data types, + Note the following limitations and exceptions for tables in {astra-db}: + -* In {astra-db}, you must xref:astra-db-serverless:databases:manage-keyspaces.adoc[create keyspaces in the {astra-ui} or with the {devops-api}] because xref:astra-db-serverless:cql:develop-with-cql.adoc[CQL for {astra-db}] doesn't support `CREATE KEYSPACE`. -* You can use `cqlsh`, drivers, or the {data-api} to xref:astra-db-serverless:databases:manage-collections.adoc#create-a-table[create tables in {astra-db}]. +* In {astra-db}, you must xref:astra-db-serverless:databases:manage-keyspaces.adoc[create keyspaces in the {astra-ui} or with the {devops-api}] because xref:astra-db-serverless:cql:develop-with-cql.adoc[{cql-short} for {astra-db}] doesn't support `CREATE KEYSPACE`. +* You can use the {cql-shell} (`cqlsh`), {cass-short} drivers, or the {data-api} to xref:astra-db-serverless:databases:manage-collections.adoc#create-a-table[create tables in {astra-db}]. However, the only optional table properties that {astra-db} supports are `default_time_to_live` and `comment`. As a best practice, omit xref:astra-db-serverless:cql:develop-with-cql.adoc#unsupported-values-are-ignored[unsupported DDL properties], such as compaction strategy and `gc_grace_seconds`, when creating tables in {astra-db}. * {astra-db} doesn't support Materialized Views (MVs) and certain types of indexes. You must adjust your data model and application logic to discard or replace these structures before beginning your migration. -For more information, see xref:astra-db-serverless:cql:develop-with-cql.adoc#limitations-on-cql-for-astra-db[Limitations on CQL for {astra-db}]. +For more information, see xref:astra-db-serverless:cql:develop-with-cql.adoc#limitations-on-cql-for-astra-db[Limitations on {cql-short} for {astra-db}]. * If you plan to use {sstable-sideloader} for xref:ROOT:migrate-and-validate-data.adoc[Phase 2], see the xref:sideloader:migrate-sideloader.adoc#record-schema[target database configuration requirements for migrating data with {sstable-sideloader}]. -- @@ -103,16 +103,16 @@ Store the authentication credentials securely for use by your client application On your new cluster, the keyspace names, table names, column names, data types, and primary keys must be identical to the schema on the origin cluster or the migration will fail. ==== + -To copy the schema, you can run CQL `DESCRIBE` on the origin cluster to get the schema that is being migrated, and then run the output on your new cluster. +To copy the schema, you can run {cql-short} `DESCRIBE` on the origin cluster to get the schema that is being migrated, and then run the output on your new cluster. + -If your origin cluster is running an earlier version, you might need to edit CQL clauses that are no longer supported in newer versions, such as `COMPACT STORAGE`. +If your origin cluster is running an earlier version, you might need to edit {cql-short} clauses that are no longer supported in newer versions, such as `COMPACT STORAGE`. For specific changes in each version, see the release notes for your database platform and {cass-short} driver. -- -Other CQL-compatible data stores:: +Other {cql-short}-compatible data stores:: + -- -Support for other CQL-compatible data stores isn't guaranteed for {product-proxy}. +Support for other {cql-short}-compatible data stores isn't guaranteed for {product-proxy}. If your origin and target clusters meet the xref:ROOT:feasibility-checklists.adoc[protocol version compatibility requirements], you might be able to use {product-proxy} for your migration. As with any migration, {company} recommends that you test this in isolation before attempting a full-scale production migration. diff --git a/modules/ROOT/pages/deployment-infrastructure.adoc b/modules/ROOT/pages/deployment-infrastructure.adoc index 01018ed9..0f39f4c1 100644 --- a/modules/ROOT/pages/deployment-infrastructure.adoc +++ b/modules/ROOT/pages/deployment-infrastructure.adoc @@ -49,7 +49,7 @@ In a sidecar deployment, each client application instance would be connecting to [[_machines]] == Hardware requirements -You need a number of machines to run your {product-proxy} instances, plus additional machines for the centralized jumphost, and for running {dsbulk-loader} or {cass-migrator} (which are recommended data migration and validation tools). +You need a number of machines to run your {product-proxy} instances, plus additional machines for the centralized jumphost, and for running {dsbulk} or {cass-migrator} (which are recommended data migration and validation tools). This section uses the term _machine_ broadly to refer to a cloud instance (on any cloud provider), a VM, or a physical server. @@ -83,10 +83,10 @@ The jumphost machine must meet the following specifications: * 200 to 500 GB of storage depending on the amount of metrics history that you want to retain * Equivalent to AWS **c5.2xlarge**, GCP **e2-standard-8**, or Azure **A8 v2** -Data migration tools ({dsbulk-loader} or {cass-migrator}):: -You need at least one machine to run {dsbulk-loader} or {cass-migrator} for the data migration and validation (xref:ROOT:migrate-and-validate-data.adoc[Phase 2]). +Data migration tools ({dsbulk-short} or {cass-migrator-short}):: +You need at least one machine to run {dsbulk-short} or {cass-migrator-short} for the data migration and validation (xref:ROOT:migrate-and-validate-data.adoc[Phase 2]). Even if you plan to use another data migration tool, you might need infrastructure for these tools or your chosen tool. -For example, {cass-migrator} is used for data validation after migrating data with {sstable-sideloader}. +For example, {cass-migrator-short} is used for data validation after migrating data with {sstable-sideloader}. + {company} recommends that you start with at least one VM that meets the following minimum specifications: + @@ -102,7 +102,7 @@ Whether you need additional machines depends on the total amount of data you nee All machines must meet the minimum specifications. For example, if you have 20 TBs of existing data to migrate, you could use 4 VMs to speed up the migration. -Then, you would run {dsbulk-loader} or {cass-migrator} in parallel on each VM with each one responsible for migrating specific tables or a portion of the data, such as 25% or 5 TB. +Then, you would run {dsbulk-short} or {cass-migrator-short} in parallel on each VM with each one responsible for migrating specific tables or a portion of the data, such as 25% or 5 TB. If you have one especially large table, such as 75% of the total data in one table, you can use multiple VMs to migrate that one table. For example, if you have 4 VMs, you can use three VMs in parallel for the large table by splitting the table's full token range into three groups. diff --git a/modules/ROOT/pages/faqs.adoc b/modules/ROOT/pages/faqs.adoc index e224f729..20b3035e 100644 --- a/modules/ROOT/pages/faqs.adoc +++ b/modules/ROOT/pages/faqs.adoc @@ -6,7 +6,7 @@ This page includes common questions about the {company} {product} ({product-shor == What is a zero-downtime migration? -A zero-downtime migration with the {company} {product-short} tools means you can reliably migrate your client applications and data between CQL clusters with no interruption of service. +A zero-downtime migration with the {company} {product-short} tools means you can reliably migrate your client applications and data between {cass-short}-based clusters with no interruption of service. == Which platforms and versions are supported by the {product-short} tools? @@ -41,7 +41,7 @@ Endless validation and testing time:: Because your client applications remain fully operational during the migration, and your clusters are kept in sync by {product-proxy}, you can take as much time as you need to validate and test the target cluster before switching over permanently. Migrate to a different platform or perform major version upgrades:: -The {product-short} tools support migrations between different CQL-based platforms, such as open-source {cass-reg} to {astra-db}, as well as major version upgrades of the same platform, such as {dse-short} 5.0 to {dse-short} 6.9. +The {product-short} tools support migrations between different {cass-short}-based platforms, such as open-source {cass-reg} to {astra-db}, as well as major version upgrades of the same platform, such as {dse-short} 5.0 to {dse-short} 6.9. == What are the requirements for true zero-downtime migrations? @@ -60,7 +60,7 @@ Yes, you can use the xref:ROOT:introduction.adoc#lab[{product-short} interactive The {product-short} tools are {product-proxy}, {product-utility}, and {product-automation}. These tools orchestrate the traffic between your client applications and the origin and target clusters during the migration process. -For the actual data migration, there are many tools you can use, such as {sstable-sideloader}, {cass-migrator}, {dsbulk-loader}, and custom data migration scripts. +For the actual data migration, there are many tools you can use, such as {sstable-sideloader}, {cass-migrator}, {dsbulk}, and custom data migration scripts. For more information, see xref:ROOT:components.adoc[]. @@ -92,12 +92,12 @@ At the end of the migration process, your client application connects exclusivel Before the {product-short} tools were available, migrating client applications between clusters involved granular and intrusive client application code changes, extensive migration preparation, and a window of downtime for the client application's end users. -With the {product-short} tools, you can migrate your client applications and data between CQL clusters with minimal code changes and no interruption of service. +With the {product-short} tools, you can migrate your client applications and data between {cass-short}-based clusters with minimal code changes and no interruption of service. You can have the confidence that you are using tools designed specifically to handle the complexities of live traffic during large enterprise migrations. == What is the pricing model? -{product-proxy}, {product-utility}, {product-automation}, {cass-migrator}, and {dsbulk-loader} are free and open-sourced. +{product-proxy}, {product-utility}, {product-automation}, {cass-migrator-short}, and {dsbulk-short} are free and open-sourced. {sstable-sideloader} is part of an {astra-db} *Enterprise* subscription plan, and it incurs costs based on usage. @@ -109,12 +109,12 @@ For any observed problems with {product-proxy} or the other open-source {product * {product-proxy-repo}[{product-proxy} repository] * {product-automation-repo}[{product-automation} repository] (includes {product-automation} and {product-utility}) -* {cass-migrator-repo}[{cass-migrator} repository] -* {dsbulk-loader-repo}[{dsbulk-loader} repository] +* {cass-migrator-repo}[{cass-migrator-short} repository] +* {dsbulk-repo}[{dsbulk-short} repository] == Can I contribute to {product-proxy}? -Yes, see `https://github.com/datastax/zdm-proxy/blob/main/CONTRIBUTING.md[CONTRIBUTING.md]`. +Yes, see `{product-proxy-repo}/blob/main/CONTRIBUTING.md[CONTRIBUTING.md]`. == Does {product-proxy} support Transport Layer Security (TLS)? @@ -158,10 +158,10 @@ For more information, see xref:components.adoc#how-zdm-proxy-handles-reads-and-w In the context of the {product-short} process, the terms _cluster_ and _database_ are used interchangeably to refer to the source and destination for the data that you are moving during your migration. -== What happened to DSBulk Migrator? +== What happened to {dsbulk-migrator}? include::ROOT:partial$dsbulk-migrator-deprecation.adoc[] == See also -* https://github.com/datastax/zdm-proxy/blob/main/faq.md[{product-proxy} FAQ on GitHub] \ No newline at end of file +* {product-proxy-repo}/blob/main/faq.md[{product-proxy} FAQ on GitHub] \ No newline at end of file diff --git a/modules/ROOT/pages/feasibility-checklists.adoc b/modules/ROOT/pages/feasibility-checklists.adoc index d09d8cdf..9c042b05 100644 --- a/modules/ROOT/pages/feasibility-checklists.adoc +++ b/modules/ROOT/pages/feasibility-checklists.adoc @@ -103,7 +103,7 @@ However, be aware that {product-proxy} itself can have a performance impact, reg === Thrift isn't supported by {product-proxy} -If you are using an earlier driver or cluster version that only supports Thrift, you must change your client application to use CQL, and, if needed, upgrade your cluster before starting the migration process. +If you are using an earlier driver or cluster version that only supports Thrift, you must change your client application to use {cql} statements, and, if needed, upgrade your cluster before starting the migration process. == Supported data store providers, versions, and migration paths @@ -114,26 +114,26 @@ include::ROOT:partial$migration-scenarios.adoc[] [IMPORTANT] ==== To successfully migrate with {product-proxy}, the origin and target clusters must have matching schemas, including keyspace names, table names, column names, and data types. -A CQL statement produced by your client application must be able to succeed _without modification_ on both clusters because {product-proxy} routes the exact same CQL statements to both clusters. +A {cql-short} statement produced by your client application must be able to succeed _without modification_ on both clusters because {product-proxy} routes the exact same {cql-short} statements to both clusters. -Even without {product-proxy}, matching schemas are required for most CQL-based data migration tools, such as {sstable-sideloader} and {cass-migrator}. +Even without {product-proxy}, matching schemas are required for most {cql-short}-based data migration tools, such as {sstable-sideloader} and {cass-migrator}. If it is impossible to match the schemas, your migration cannot use {product-proxy}. See xref:ROOT:components.adoc[] for alternative migration tools and strategies, such as extract, transform, and load (ETL) processes. ==== -{product-proxy} _doesn't_ modify or transform CQL statements with the exception of <>. +{product-proxy} _doesn't_ modify or transform {cql-short} statements with the exception of <>. -A CQL statement that your client application sends to {product-proxy} must be able to succeed on both clusters. +A {cql-short} statement that your client application sends to {product-proxy} must be able to succeed on both clusters. This means that any keyspace that your client application uses must exist on both the origin and target clusters with the same keyspace name, table names, column names, and compatible data types. However, the keyspaces can have different replication strategies and durable write settings. -At the column level, the schema doesn't need to be a one-to-one match as long as the CQL statements can be executed successfully on both clusters. +At the column level, the schema doesn't need to be a one-to-one match as long as the {cql-short} statements can be executed successfully on both clusters. For example, if a table has 10 columns, and your client application uses only five of those columns, then you can recreate the table on the target cluster with only the five required columns. However, the data from the other columns won't be migrated to the target cluster if those columns don't exist on the target cluster. Before you decide to omit a column from the target cluster, make sure that it is acceptable to permanently lose that data after the migration. In some cases, you can change the primary key on the target cluster. -For example, if your compound primary key is `PRIMARY KEY (A, B)`, and you always provide parameters for the `A` and `B` columns in your CQL statements, then you could change the key to `PRIMARY KEY (B, A)` when creating the schema on the target because your CQL statements will still run successfully. +For example, if your compound primary key is `PRIMARY KEY (A, B)`, and you always provide parameters for the `A` and `B` columns in your {cql-short} statements, then you could change the key to `PRIMARY KEY (B, A)` when creating the schema on the target because your {cql-short} statements will still run successfully. == Request and error handling with {product-proxy} @@ -142,7 +142,7 @@ See xref:ROOT:components.adoc#how-zdm-proxy-handles-reads-and-writes[How {produc [[non-idempotent-operations]] == Lightweight Transactions and other non-idempotent operations -Non-idempotent CQL operations don't produce the exact same result each time the request is executed. +Non-idempotent {cql-short} operations don't produce the exact same result each time the request is executed. Whether the request is executed and the actual value written to the database depend on the column's current value or other conditions at the time of the request. Examples of non-idempotent operations include the following: @@ -155,7 +155,7 @@ By design, a {product-short} migration involves two separate clusters, and it is If you use non-idempotent operations, {company} recommends adding a reconciliation phase to your migration before and after xref:ROOT:change-read-routing.adoc[Phase 4]. This allows you an additional opportunity to resolve any data inconsistencies that are produced by non-idempotent operations. -The xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}] is ideal for detecting and reconciling these types of inconsistencies. +The xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator-short}] is ideal for detecting and reconciling these types of inconsistencies. However, if your application workloads can tolerate inconsistencies produced by LWTs and non-idempotent operations, you might not need to perform any additional validation or reconciliation steps. This depends entirely on your application business logic and requirements. @@ -253,8 +253,8 @@ LZ4 and Snappy compression algorithms require {product-proxy} version 2.4.0 or l The binary protocol used by {astra}, {dse-short}, {hcd-short}, and open-source {cass-short} supports optional compression of transport-level requests and responses that reduces network traffic at the cost of CPU overhead. When establishing connections from client applications, {product-proxy} responds with a list of compression algorithms supported by both clusters. -The compression algorithm configured in your {company}-compatible driver must match any item from the common list, or CQL request compression must be disabled completely. -{product-proxy} cannot decompress and recompress CQL requests using different compression algorithms. +The compression algorithm configured in your {company}-compatible driver must match any item from the common list, or {cql-short} request compression must be disabled completely. +{product-proxy} cannot decompress and recompress {cql-short} requests using different compression algorithms. This isn't related to storage compression, which you can configure on specific tables with the `compression` table property. Storage/table compression doesn't affect the client application or {product-proxy} in any way. @@ -296,25 +296,25 @@ There is no special handling for these requests, so you must consider the traver If the traversals are non-idempotent, then your must thoroughly validate and reconcile the data and the end of xref:ROOT:migrate-and-validate-data.adoc[Phase 2] to ensure that the target cluster is truly consistent with the origin cluster. For more information, see <>. + -{company}'s recommended tools for data migration and reconciliation are CQL-based, so they only support migrations where the origin cluster is a database that uses the new {dse-short} Graph engine ({dse-short} 6.8 and later). +{company}'s recommended tools for data migration and reconciliation are {cql-short}-based, so they only support migrations where the origin cluster is a database that uses the new {dse-short} Graph engine ({dse-short} 6.8 and later). They _cannot_ be used with the earlier Graph engine in {dse-short} versions prior to 6.8. {dse-short} Search:: Read-only {dse-short} Search workloads can be moved directly from the origin to the target without {product-proxy} being involved. If your client application uses Search and also issues writes, or if you need the read routing capabilities from {product-proxy}, then you can connect your Search workloads to {product-proxy} as long as you are using xref:datastax-drivers:compatibility:driver-matrix.adoc[{company}-compatible drivers] to submit these queries. -This approach means the queries are regular CQL `SELECT` statements, so {product-proxy} handles them as regular read requests. +This approach means the queries are regular {cql-short} `SELECT` statements, so {product-proxy} handles them as regular read requests. + -If you use the HTTP API then you can either modify your applications to use the CQL API instead, or you must move those applications directly from the origin to the target when the migration is complete, if that is acceptable for your use case. +If you use the HTTP API then you can either modify your applications to use the {cql-short} API instead, or you must move those applications directly from the origin to the target when the migration is complete, if that is acceptable for your use case. == {astra} migrations This section describes specific considerations for {astra} migrations. If you need to make any changes to your data model or application logic for compatibility with {astra}, do so before starting the migration. -As mentioned previously, this is because {product-proxy} requires that the same CQL statements can be executed successfully on both clusters, and the data migration tools require matching schemas. +As mentioned previously, this is because {product-proxy} requires that the same {cql-short} statements can be executed successfully on both clusters, and the data migration tools require matching schemas. -{astra} guardrails, limits, and CQL compatibility:: -As a managed database-as-a-service (DBaaS) offering, {astra} implements xref:astra-db-serverless:databases:database-limits.adoc[guardrails and limits] on its databases, and xref:astra-db-serverless:cql:develop-with-cql.adoc[{astra} doesn't support all CQL functionality]. -Make sure your application workloads and CQL statements are compatible with these limitations. +{astra} guardrails, limits, and {cql-short} compatibility:: +As a managed database-as-a-service (DBaaS) offering, {astra} implements xref:astra-db-serverless:databases:database-limits.adoc[guardrails and limits] on its databases, and xref:astra-db-serverless:cql:develop-with-cql.adoc[{astra} doesn't support all {cql-short} functionality]. +Make sure your application workloads and {cql-short} statements are compatible with these limitations. + In self-managed clusters, such as {dse-short} and {cass-short}, you can configure the database limits in `cassandra.yml`. However, the xref:astra-db-serverless:databases:database-limits.adoc#cassandra-configuration-properties[{astra} `cassandra.yml`] cannot be changed unless explicitly approved and implemented by {company}. diff --git a/modules/ROOT/pages/hcd-migration-paths.adoc b/modules/ROOT/pages/hcd-migration-paths.adoc index b1183216..e1a1f147 100644 --- a/modules/ROOT/pages/hcd-migration-paths.adoc +++ b/modules/ROOT/pages/hcd-migration-paths.adoc @@ -25,7 +25,7 @@ During the {product-short} process, you use a xref:ROOT:migrate-and-validate-dat {company} recommends that you do the following: -* Choose a data migration tool that also includes strong validation capabilities, such as xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator} ({cass-migrator-short})]. +* Choose a data migration tool that also includes strong validation capabilities, such as xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}]. * Be aware of incompatible data types that can fail to migrate from your old cluster. Data validation tools can identify inconsistencies as missing or mismatched data, but you still need to have a plan to resolve them. diff --git a/modules/ROOT/pages/index.adoc b/modules/ROOT/pages/index.adoc index 4b519302..65997e64 100644 --- a/modules/ROOT/pages/index.adoc +++ b/modules/ROOT/pages/index.adoc @@ -68,7 +68,7 @@ svg::sideloader:astra-migration-toolkit.svg[role="absolute bottom-1/2 translate- svg:common:ROOT:icons/datastax/insert-data.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] -

{cass-migrator} ({cass-migrator-short})

+

{cass-migrator}

{cass-migrator-short} can migrate and validate data between {cass-short}-based clusters, with optional logging and reconciliation support.

@@ -81,12 +81,12 @@ svg::sideloader:astra-migration-toolkit.svg[role="absolute bottom-1/2 translate- svg:common:ROOT:icons/datastax/migrate.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] -

{dsbulk-loader}

+

{dsbulk}

-

{dsbulk-loader} can load, unload, and count large volumes of data from your {cass-short}-based clusters.

+

{dsbulk-short} can load, unload, and count large volumes of data from your {cass-short}-based clusters.

- xref:dsbulk:overview:dsbulk-about.adoc[Get started with {dsbulk-loader}] + xref:dsbulk:overview:dsbulk-about.adoc[Get started with {dsbulk-short}]
diff --git a/modules/ROOT/pages/introduction.adoc b/modules/ROOT/pages/introduction.adoc index aa40b575..dc489523 100644 --- a/modules/ROOT/pages/introduction.adoc +++ b/modules/ROOT/pages/introduction.adoc @@ -20,7 +20,7 @@ For example, you might move from self-managed clusters to a cloud-based Database * You want to consolidate client applications running on separate clusters onto one shared cluster to minimize sprawl and maintenance. ==== -The {product-short} process uses {product-proxy}, {product-utility}, and {product-automation} to orchestrate live reads and writes on your databases while you move and validate data with a data migration tool, such as {sstable-sideloader}, {cass-migrator}, or {dsbulk-loader}. +The {product-short} process uses {product-proxy}, {product-utility}, and {product-automation} to orchestrate live reads and writes on your databases while you move and validate data with a data migration tool, such as {sstable-sideloader}, {cass-migrator}, or {dsbulk}. {product-proxy} keeps your databases in sync at all times through its dual-writes feature, which means you can seamlessly stop or abandon the migration at any point before the last phase of the migration (the final cutover to the new database). For more information about these tools, see xref:ROOT:components.adoc[]. @@ -45,7 +45,7 @@ The _target cluster_ is your new {cass-short}-based environment where you want t === Plan and prepare for your migration -From a functional standpoint, before you begin a migration, your client applications perform read/write operations directly with your existing xref:cql:ROOT:index.adoc[CQL]-compatible database, such as {cass}, {dse-short}, {hcd-short}, or {astra-db}. +From a functional standpoint, before you begin a migration, your client applications perform read/write operations directly with your existing xref:cql:ROOT:index.adoc[{cql-short}]-compatible database, such as {cass}, {dse-short}, {hcd-short}, or {astra-db}. Over the course of the migration, the contact points change to incorporate {product-proxy} and eventually switch to the target database. image:pre-migration0ra.png[Before the migration begins, your applications connect exclusively to your origin cluster] diff --git a/modules/ROOT/pages/manage-proxy-instances.adoc b/modules/ROOT/pages/manage-proxy-instances.adoc index 73fc440e..7c83c01d 100644 --- a/modules/ROOT/pages/manage-proxy-instances.adoc +++ b/modules/ROOT/pages/manage-proxy-instances.adoc @@ -136,7 +136,7 @@ Adjust this variable to limit the total number of connections on each instance. Default: `1000` `replace_cql_functions`:: -Whether {product-proxy} replaces standard `now()` CQL function calls in write requests with an explicit timeUUID value computed at proxy level. +Whether {product-proxy} replaces standard `now()` {cql-short} function calls in write requests with an explicit timeUUID value computed at proxy level. + * **`false` (default)**: Replacement of `now()` is disabled. * **`true`**: {product-proxy} replaces instances of `now()` in write requests with an explicit timeUUID value before sending the write to each cluster. @@ -201,7 +201,7 @@ This isn't recommended. Set the maximum pool size of available stream IDs managed by {product-proxy} per client connection. Use the same value as your driver's maximum stream IDs configuration. + -In the CQL protocol, every request has a unique stream ID. +In the {cql-short} protocol, every request has a unique stream ID. However, if there are a lot of requests in a given amount of time, errors can occur due to xref:datastax-drivers:developing:speculative-retry.adoc#stream-id-exhaustion[stream ID exhaustion]. + In the client application, the stream IDs are managed internally by the driver, and, in most drivers, the max number is 2048, which is the same default value used by {product-proxy}. diff --git a/modules/ROOT/pages/migrate-and-validate-data.adoc b/modules/ROOT/pages/migrate-and-validate-data.adoc index d56b54ab..833862c9 100644 --- a/modules/ROOT/pages/migrate-and-validate-data.adoc +++ b/modules/ROOT/pages/migrate-and-validate-data.adoc @@ -7,7 +7,7 @@ In Phase 2 of {product}, you migrate data from the origin to the target, and the image::migration-phase2ra.png[In {product-short} Phase 2, you migrate data from the origin cluster to the target cluster] -To move and validate data, you can use a dedicated data migration tool, such as {sstable-sideloader}, {cass-migrator}, or {dsbulk-loader}, or your can create your own custom data migration script. +To move and validate data, you can use a dedicated data migration tool, such as {sstable-sideloader}, {cass-migrator}, or {dsbulk}, or your can create your own custom data migration script. //Migration tool summaries are also on ROOT:components.adoc. @@ -16,7 +16,7 @@ To move and validate data, you can use a dedicated data migration tool, such as This tool is exclusively for migrations that move data to {astra-db}. {sstable-sideloader} is a service running in {astra-db} that imports data from snapshots of your existing {cass-reg}-based cluster. -Because it imports data directly, {sstable-sideloader} can offer several advantages over CQL-based tools like {dsbulk-loader} and {cass-migrator}, including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. +Because it imports data directly, {sstable-sideloader} can offer several advantages over {cql-short}-based tools like {dsbulk-short} and {cass-migrator-short}, including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. To migrate data with {sstable-sideloader}, you use `nodetool`, a cloud provider's CLI, and the {astra} {devops-api}: @@ -34,21 +34,21 @@ svg::sideloader:astra-migration-toolkit.svg[] == {cass-migrator} -You can use {cass-migrator} ({cass-migrator-short}) for data migration and validation between {cass-short}-based databases. +You can use {cass-migrator-short} for data migration and validation between {cass-short}-based databases. It offers extensive functionality and configuration options to support large and complex migrations as well as post-migration data validation. You can use {cass-migrator-short} alone, with {product-proxy}, or for data validation after using another data migration tool. For more information, see xref:ROOT:cassandra-data-migrator.adoc[]. -== {dsbulk-loader} +== {dsbulk} -{dsbulk-loader} is a high-performance data loading and unloading tool for {cass-short}-based databases. +{dsbulk-short} is a high-performance data loading and unloading tool for {cass-short}-based databases. You can use it to load, unload, and count records. -Because {dsbulk-loader} doesn't have the same data validation capabilities as {cass-migrator-short}, it is best for migrations that don't require extensive data validation, aside from post-migration row counts. +Because {dsbulk-short} doesn't have the same data validation capabilities as {cass-migrator-short}, it is best for migrations that don't require extensive data validation, aside from post-migration row counts. -You can use {dsbulk-loader} alone or with {product-proxy}. +You can use {dsbulk-short} alone or with {product-proxy}. For more information, see xref:dsbulk:overview:dsbulk-about.adoc[]. @@ -59,7 +59,7 @@ include::ROOT:partial$dsbulk-migrator-deprecation.adoc[] == Other data migration processes -Depending on your origin and target databases, there might be other {product-short}-compatible data migration tools available, or you can write your own custom data migration processes with a tool like Apache Spark(TM). +Depending on your origin and target databases, there might be other {product-short}-compatible data migration tools available, or you can write your own custom data migration processes with a tool like {spark-reg}. To use a data migration tool with {product-proxy}, it must meet the following requirements: diff --git a/modules/ROOT/pages/setup-ansible-playbooks.adoc b/modules/ROOT/pages/setup-ansible-playbooks.adoc index 7961af8b..ce882657 100644 --- a/modules/ROOT/pages/setup-ansible-playbooks.adoc +++ b/modules/ROOT/pages/setup-ansible-playbooks.adoc @@ -108,9 +108,9 @@ The package filename format is `zdm-util-**PLATFORM**-**VERSION**.tgz`. The following example downloads {product-utility} version 2.3.1 for Linux amd64. To download a different package, change the version and package filename accordingly. + -[source,bash] +[source,bash,subs="+attributes"] ---- -wget https://github.com/datastax/zdm-proxy-automation/releases/download/v2.3.1/zdm-util-linux-amd64-v2.3.1.tgz +wget {product-automation-repo}/releases/download/v2.3.1/zdm-util-linux-amd64-v2.3.1.tgz ---- . Extract the archive: diff --git a/modules/ROOT/pages/troubleshooting-tips.adoc b/modules/ROOT/pages/troubleshooting-tips.adoc index ba62e3ff..e2673714 100644 --- a/modules/ROOT/pages/troubleshooting-tips.adoc +++ b/modules/ROOT/pages/troubleshooting-tips.adoc @@ -228,7 +228,7 @@ This flag will prevent you from accessing the logs when {product-proxy} stops or Querying `system.peers` and `system.local` can help you investigate {product-proxy} configuration issues: -. xref:ROOT:connect-clients-to-proxy.adoc#connect-cqlsh-to-zdm-proxy[Connect cqlsh to a {product-proxy} instance]. +. xref:ROOT:connect-clients-to-proxy.adoc#connect-cqlsh-to-zdm-proxy[Connect the {cql-shell} to a {product-proxy} instance]. . Query `system.peers`: + @@ -409,7 +409,7 @@ This happens because the application specifies the custom port as part of the co For example, if the {product-proxy} instances were listening on port 14035, the contact points for the {cass-short} Java driver might be specified as `.addContactPoints("172.18.10.36:14035", "172.18.11.48:14035", "172.18.12.61:14035")`. -The contact point is the first point of contact to the cluster, but the driver discovers the rest of the nodes through CQL queries. +The contact point is the first point of contact to the cluster, but the driver discovers the rest of the nodes through {cql-short} queries. However, this discovery process finds the addresses only, not the ports. The driver uses the addresses it discovers with the port that is configured at startup. As a result, the custom port is used for the initial contact point only, and the default port is used with all other nodes. @@ -441,7 +441,7 @@ Most {company}-compatible drivers have {dse-short} Insights reporting enabled by The driver might also have it enabled for {astra-db} depending on what server version {astra-db} is returning for queries involving the `system.local` and `system.peers` tables. These log messages are harmless, but if you need to remove them, you can disable {dse-short} Insights in the driver configuration. -For example, in the Java driver, you can set `https://github.com/apache/cassandra-java-driver/blob/4.x/core/src/main/resources/reference.conf#L1365[advanced.monitor-reporting]` to `false`. +For example, in the Java driver, you can set `{java-driver-url}/blob/4.x/core/src/main/resources/reference.conf#L1365[advanced.monitor-reporting]` to `false`. === Default Grafana credentials don't work @@ -625,7 +625,7 @@ If you choose to adjust the infrastructure, you must repeat your tests to determ === Permission errors related to InsightsRpc -If the {product-proxy} logs contain messages such as the following, it's likely that you have an origin {dse-short} cluster where Metrics Collector is enabled, and the user named in the logs doesn't have sufficient permissions to report Insights data: +If the {product-proxy} logs contain messages such as the following, it's likely that you have an origin {dse-short} cluster where {metrics-collector} is enabled, and the user named in the logs doesn't have sufficient permissions to report Insights data: [source,log] ---- @@ -639,10 +639,10 @@ There are two ways to resolve this issue: [tabs] ====== -Disable {dse-short} Metrics Collector (recommended):: +Disable {metrics-collector} (recommended):: + -- -. On the origin {dse-short} cluster, disable Metrics Collector: +. On the origin {dse-short} cluster, disable {metrics-collector}: + [source,bash] ---- @@ -660,7 +660,7 @@ dsetool insights_config --show_config Grant InsightsRpc permissions:: + -- -Only use this option if you cannot disable Metrics Collector. +Only use this option if you cannot disable {metrics-collector}. Using a superuser role, grant the appropriate permissions to the user named in the logs: @@ -680,8 +680,8 @@ To report an issue or get additional support, submit an issue in the {product-sh * {product-proxy-repo}/issues[{product-proxy} repository] * {product-automation-repo}/issues[{product-automation} repository] (includes {product-automation} and {product-utility}) -* {cass-migrator-repo}/issues[{cass-migrator} repository] -* {dsbulk-loader-repo}/issues[{dsbulk-loader} repository] +* {cass-migrator-repo}/issues[{cass-migrator-short} repository] +* {dsbulk-repo}/issues[{dsbulk-short} repository] [IMPORTANT] ==== @@ -715,11 +715,11 @@ For performance-related issues, provide the following additional information: * If you use batch statements: + ** Which driver API do you use to create these batches? -** Are you passing a `BEGIN BATCH` CQL query string to a simple/prepared statement, or do you use the actual batch statement objects that the drivers allow you to create? +** Are you passing a `BEGIN BATCH` {cql-short} query string to a simple/prepared statement, or do you use the actual batch statement objects that the drivers allow you to create? * How many parameters does each statement have? -* Is CQL function replacement enabled? +* Is {cql-short} function replacement enabled? This feature is disabled by default. To determine if this feature is enabled, check the following variables: + diff --git a/modules/ROOT/pages/zdm-proxy-migration-paths.adoc b/modules/ROOT/pages/zdm-proxy-migration-paths.adoc index 71c5b93c..6c13c5c0 100644 --- a/modules/ROOT/pages/zdm-proxy-migration-paths.adoc +++ b/modules/ROOT/pages/zdm-proxy-migration-paths.adoc @@ -12,7 +12,7 @@ include::ROOT:partial$migration-scenarios.adoc[] If you don't want to use {product-proxy} or your databases don't meet the zero-downtime requirements, you can still complete the migration, but some downtime might be necessary to finish the migration. -If your origin cluster is incompatible with {product-proxy}, {product-utility}, and {product-automation}, you might be able to use standalone xref:ROOT:components.adoc#data-migration-tools[data migration tools] such as {dsbulk-loader} or a custom data migration script. +If your origin cluster is incompatible with {product-proxy}, {product-utility}, and {product-automation}, you might be able to use standalone xref:ROOT:components.adoc#data-migration-tools[data migration tools] such as {dsbulk-short} or a custom data migration script. Make sure you transform or prepare the data to comply with the target cluster's schema. For more complex migrations, such as RDBMS-to-NoSQL migrations, it is likely that your migration will require downtime for additional processing, such as extract, transform, and load (ETL) operations. diff --git a/modules/ROOT/partials/dsbulk-migrator-deprecation.adoc b/modules/ROOT/partials/dsbulk-migrator-deprecation.adoc index 29a6396f..36aaedeb 100644 --- a/modules/ROOT/partials/dsbulk-migrator-deprecation.adoc +++ b/modules/ROOT/partials/dsbulk-migrator-deprecation.adoc @@ -1,3 +1,3 @@ -The DSBulk Migrator tool, which was an extension of {dsbulk-loader}, is deprecated. +The {dsbulk-migrator} tool, which was an extension of {dsbulk}, is deprecated. This tool is no longer recommended. -Instead, use {dsbulk-loader}'s unload, load, and count commands, or use another data migration tool, such as {cass-migrator-short}. \ No newline at end of file +Instead, use the unload, load, and count commands included with {dsbulk-short}, or use another data migration tool, such as {cass-migrator-short}. \ No newline at end of file diff --git a/modules/sideloader/pages/migrate-sideloader.adoc b/modules/sideloader/pages/migrate-sideloader.adoc index eca68e26..86bda946 100644 --- a/modules/sideloader/pages/migrate-sideloader.adoc +++ b/modules/sideloader/pages/migrate-sideloader.adoc @@ -11,7 +11,7 @@ Before you use {sstable-sideloader} for a migration, xref:sideloader:sideloader- [#create-snapshots] == Create snapshots -On _each node_ in your origin cluster, use `nodetool` to create a backup of the data that you want to migrate, including all keyspaces and CQL tables that you want to migrate. +On _each node_ in your origin cluster, use `nodetool` to create a backup of the data that you want to migrate, including all keyspaces and {cql-short} tables that you want to migrate. . Be aware of the {sstable-sideloader} limitations related to materialized views, secondary indexes, and encrypted data that are described in xref:sideloader:prepare-sideloader.adoc#origin-cluster-requirements[Origin cluster requirements]. If necessary, modify the data model on your origin cluster to prepare for the migration. @@ -233,7 +233,7 @@ CREATE TABLE smart_home.sensor_readings ( + .. In the {astra-ui-link} navigation menu, click *Databases*, and then click the name of your {astra-db} database. .. xref:astra-db-serverless:databases:manage-keyspaces.adoc#keyspaces[Create a keyspace] with the exact same name as your origin cluster's keyspace. -.. In your database's xref:astra-db-serverless:cql:develop-with-cql.adoc#connect-to-the-cql-shell[CQL console], create tables with the exact same names and schemas as your origin cluster. +.. In your database's xref:astra-db-serverless:cql:develop-with-cql.adoc#connect-to-the-cql-shell[{cql-console}], create tables with the exact same names and schemas as your origin cluster. + image::sideloader:cql-console-create-identical-schema.png[] + diff --git a/modules/sideloader/pages/prepare-sideloader.adoc b/modules/sideloader/pages/prepare-sideloader.adoc index de7ebcb1..dc55c053 100644 --- a/modules/sideloader/pages/prepare-sideloader.adoc +++ b/modules/sideloader/pages/prepare-sideloader.adoc @@ -162,7 +162,7 @@ If you choose the alternative option, you must modify the commands accordingly f * *{sstable-sideloader} doesn't support encrypted data*: If your origin cluster uses xref:6.9@dse:securing:transparent-data-encryption.adoc[{dse-short} Transparent Data Encryption], be aware that {sstable-sideloader} cannot migrate these SSTables. + If you have a mix of encrypted and unencrypted data, you can use {sstable-sideloader} to migrate the unencrypted data. -After the initial migration, you can use another strategy to move the encrypted data, such as {cass-migrator-repo}[{cass-migrator} ({cass-migrator-short})] or a manual export and reupload. +After the initial migration, you can use another strategy to move the encrypted data, such as {cass-migrator-repo}[{cass-migrator}] or a manual export and reupload. * *{sstable-sideloader} doesn't support secondary indexes*: If you don't remove or replace these in your origin cluster, {sstable-sideloader} ignores these directories when importing the data to your {astra-db} database. @@ -174,7 +174,7 @@ Your administration server must have SSH access to each node in your origin clus {company} recommends that you install the following additional software on your administration server: -* {cass-migrator-repo}[{cass-migrator} ({cass-migrator-short})] to validate imported data and, with {product-proxy}, reconcile it with the origin cluster. +* {cass-migrator-repo}[{cass-migrator-short}] to validate imported data and, with {product-proxy}, reconcile it with the origin cluster. * https://jqlang.github.io/jq/[jq] to format JSON responses from the {astra} {devops-api}. The {devops-api} commands in this guide use this tool. @@ -186,18 +186,18 @@ The following information can help you prepare for specific migration scenarios, [#minimum-migration-scope] === Minimum migration scope -To minimize data reconciliation issues, the recommended minimum migration scope is one CQL table across all nodes. +To minimize data reconciliation issues, the recommended minimum migration scope is one {cql-short} table across all nodes. -This means that a single migration process, from start to finish, should encapsulate the data for one CQL table as it exists on all of your origin nodes. -For example, if you are migrating one table, you need to upload snapshots of all SSTables from all nodes for that CQL table. +This means that a single migration process, from start to finish, should encapsulate the data for one {cql-short} table as it exists on all of your origin nodes. +For example, if you are migrating one table, you need to upload snapshots of all SSTables from all nodes for that {cql-short} table. -Avoid breaking one table into multiple migrations because migrating a subset of SSTables for one CQL table will likely result in data loss, corruption, or resurrection of previously deleted data. +Avoid breaking one table into multiple migrations because migrating a subset of SSTables for one {cql-short} table will likely result in data loss, corruption, or resurrection of previously deleted data. Each migration is performed separately, and each migration has no awareness of prior migrations. This means that data from later migrations can be incorrectly applied to the table. For example, if your first migration includes tombstones, that data could be resurrected if it is present in a subsequent migration from another node. -In contrast, if you use a single large migration to migrate all SSTables for a CQL table across all nodes, {astra-db} can reconcile the data across all nodes, ensuring that your migration is accurate and complete. +In contrast, if you use a single large migration to migrate all SSTables for a {cql-short} table across all nodes, {astra-db} can reconcile the data across all nodes, ensuring that your migration is accurate and complete. === Multi-region migrations @@ -241,7 +241,7 @@ The number of node snapshots that you uploaded to the migration bucket doesn't d The success of the import depends primarily on the validity of the schemas and the data in the snapshots. . After the import, validate the migrated data to ensure that it matches the data in the origin cluster. -For example, you can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator} ({cass-migrator-short}) in validation mode]. +For example, you can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator-short} in validation mode]. -- Migrate multiple nodes to multiple databases:: @@ -284,7 +284,7 @@ The number of node snapshots that you uploaded to the migration bucket doesn't d The success of the import depends primarily on the validity of the schemas and the data in the snapshots.\ . After the import, validate the migrated data to ensure that it matches the data in the origin cluster. -For example, you can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator} ({cass-migrator-short}) in validation mode]. +For example, you can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator-short} in validation mode]. -- ====== @@ -297,7 +297,7 @@ If you initialize multiple migrations for the same database, you generate multip This can be useful for breaking large migrations into smaller batches. For example, if you have 100 snapshots, you could initialize 10 migrations, and then upload 10 different snapshots to each migration directory. -However, don't break one CQL table into multiple migrations, as explained in <>. +However, don't break one {cql-short} table into multiple migrations, as explained in <>. You can upload snapshots to multiple migration directories at once. However, when you reach the import phase of the migration, {sstable-sideloader} can import from only one migration directory at a time per database. @@ -305,7 +305,7 @@ For example, if you have 10 migration IDs for the same database, you must run 10 Each import must completely finish before starting the next import. After all of the imports are complete, validate the migrated data in your target database to ensure that it matches the data in the origin cluster. -For example, you can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator} ({cass-migrator-short}) in validation mode]. +For example, you can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator-short} in validation mode]. == Next steps diff --git a/modules/sideloader/pages/sideloader-overview.adoc b/modules/sideloader/pages/sideloader-overview.adoc index 5ea1f804..3d2d8650 100644 --- a/modules/sideloader/pages/sideloader-overview.adoc +++ b/modules/sideloader/pages/sideloader-overview.adoc @@ -4,7 +4,7 @@ {sstable-sideloader} is a service running in {astra-db} that directly imports data from snapshot backups that you've uploaded to {astra-db} from an existing {cass-reg}, {dse}, or {hcd} cluster. -Because it imports data directly, {sstable-sideloader} can offer several advantages over CQL-based tools like xref:dsbulk:overview:dsbulk-about.adoc[{company} Bulk Loader (DSBulk)] and xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator} ({cass-migrator-short})], including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. +Because it imports data directly, {sstable-sideloader} can offer several advantages over {cql-short}-based tools like xref:dsbulk:overview:dsbulk-about.adoc[{dsbulk}] and xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}], including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. == {sstable-sideloader} concepts @@ -19,7 +19,7 @@ A server where you run the migration commands, including CLI commands and {astra It must have SSH access to each node in your origin cluster. Migration:: -A workflow that you initiate within {sstable-sideloader} that encompasses the lifecycle of uploading and importing snapshot backups of a specific set of keyspaces or CQL tables. +A workflow that you initiate within {sstable-sideloader} that encompasses the lifecycle of uploading and importing snapshot backups of a specific set of keyspaces or {cql-short} tables. + This process produces artifacts and parameters including migration buckets, migration IDs, migration directories, and upload credentials. You use these components throughout the migration workflow. @@ -40,7 +40,7 @@ For more information, see xref:sideloader:prepare-sideloader.adoc[]. === Create snapshot backups {sstable-sideloader} uses snapshot backup files to import SSTable data from your existing origin cluster. -Each snapshot for each node in the origin cluster must include all the keyspaces and individual CQL tables that you want to migrate. +Each snapshot for each node in the origin cluster must include all the keyspaces and individual {cql-short} tables that you want to migrate. These snapshots are ideal for database migrations because creating snapshots has a negligible performance impact on the origin cluster, and the snapshots preserve metadata like `writetime` and `ttl` values. @@ -115,7 +115,7 @@ include::sideloader:partial$validate.adoc[] == Use {sstable-sideloader} with {product-proxy} -If you need to migrate a live database, you can use {sstable-sideloader} instead of {dsbulk-loader} or {cass-migrator} during of xref:ROOT:migrate-and-validate-data.adoc[Phase 2 of {product}]. +If you need to migrate a live database, you can use {sstable-sideloader} instead of {dsbulk-short} or {cass-migrator-short} during of xref:ROOT:migrate-and-validate-data.adoc[Phase 2 of {product}]. .Use {sstable-sideloader} with {product-proxy} svg::sideloader:astra-migration-toolkit.svg[] diff --git a/modules/sideloader/partials/no-return.adoc b/modules/sideloader/partials/no-return.adoc index 8561543b..77631bc0 100644 --- a/modules/sideloader/partials/no-return.adoc +++ b/modules/sideloader/partials/no-return.adoc @@ -1,2 +1,2 @@ You can abort a migration up until the point at which {sstable-sideloader} starts importing SSTable metadata. -After this point, you must wait for the migration to finish, and then you can use `cqlsh` to drop the keyspace/table in your target database before repeating the entire migration procedure. \ No newline at end of file +After this point, you must wait for the migration to finish, and then you can use the {cql-shell} (`cqlsh`) to drop the keyspace/table in your target database before repeating the entire migration procedure. \ No newline at end of file diff --git a/modules/sideloader/partials/validate.adoc b/modules/sideloader/partials/validate.adoc index ac94e778..bac5da4d 100644 --- a/modules/sideloader/partials/validate.adoc +++ b/modules/sideloader/partials/validate.adoc @@ -1,4 +1,4 @@ -After the migration is complete, you can query the migrated data using the xref:astra-db-serverless:cql:develop-with-cql.adoc#connect-to-the-cql-shell[cqlsh] or xref:astra-db-serverless:api-reference:row-methods/find-many.adoc[{data-api}]. +After the migration is complete, you can query the migrated data using the xref:astra-db-serverless:cql:develop-with-cql.adoc#connect-to-the-cql-shell[{cql-shell}] (`cqlsh`) or xref:astra-db-serverless:api-reference:row-methods/find-many.adoc[{data-api}]. -You can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator} ({cass-migrator-short}) in validation mode] for more thorough validation. +You can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-migrator-short} in validation mode] for more thorough validation. {cass-migrator-short} also offers an AutoCorrect mode to reconcile any differences that it detects. \ No newline at end of file From ca800870ed3e7d6b51397f511d73e78ed4c13a22 Mon Sep 17 00:00:00 2001 From: April M <36110273+aimurphy@users.noreply.github.com> Date: Fri, 20 Feb 2026 09:11:50 -0800 Subject: [PATCH 2/5] align sideloader attributes --- antora.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/antora.yml b/antora.yml index 75fabcea..bc38e43e 100644 --- a/antora.yml +++ b/antora.yml @@ -59,5 +59,4 @@ asciidoc: cass-migrator-short: 'CDM' cass-migrator-repo: 'https://github.com/datastax/cassandra-data-migrator' cass-migrator-shield: 'image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=latest[alt="Latest cassandra-data-migrator release on GitHub",link="{cass-migrator-repo}/packages"]' - # Sideloader has a specific name in this repo. It is not identical to the one in the Serverless repo. sstable-sideloader: '{astra-db} Sideloader' \ No newline at end of file From 301ad7668d91b765896ed3c1c97836a2cb36fd75 Mon Sep 17 00:00:00 2001 From: April M <36110273+aimurphy@users.noreply.github.com> Date: Fri, 20 Feb 2026 09:15:49 -0800 Subject: [PATCH 3/5] update local preview playbook --- local-preview-playbook.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/local-preview-playbook.yml b/local-preview-playbook.yml index 8513ddfa..c12607f5 100644 --- a/local-preview-playbook.yml +++ b/local-preview-playbook.yml @@ -85,6 +85,12 @@ asciidoc: starlight-kafka: 'Starlight for Kafka' starlight-rabbitmq: 'Starlight for RabbitMQ' astra-streaming-examples-repo: 'https://github.com/datastax/astra-streaming-examples' + sstable-sideloader: '{astra-db} Sideloader' + zdm: 'Zero Downtime Migration' + zdm-short: 'ZDM' + zdm-proxy: 'ZDM Proxy' + cass-migrator: 'Cassandra Data Migrator (CDM)' + cass-migrator-short: 'CDM' hcd: 'Hyper-Converged Database (HCD)' hcd-short: 'HCD' dse: 'DataStax Enterprise (DSE)' From e56014cb659e999cd0165580d7e2183b9454467a Mon Sep 17 00:00:00 2001 From: April M <36110273+aimurphy@users.noreply.github.com> Date: Fri, 20 Feb 2026 09:22:34 -0800 Subject: [PATCH 4/5] Update local-preview-playbook.yml --- local-preview-playbook.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/local-preview-playbook.yml b/local-preview-playbook.yml index c12607f5..e6414c9f 100644 --- a/local-preview-playbook.yml +++ b/local-preview-playbook.yml @@ -106,6 +106,7 @@ asciidoc: cql-short: 'CQL' cql-shell: 'CQL shell' cql-console: 'CQL console' + cql-service: 'CQL Service' pulsar-reg: 'Apache Pulsar(TM)' pulsar: 'Apache Pulsar' pulsar-short: 'Pulsar' From d633440bc77dd0eda8ea15fafeba204f0649a9b7 Mon Sep 17 00:00:00 2001 From: April M <36110273+aimurphy@users.noreply.github.com> Date: Sun, 22 Feb 2026 07:00:51 -0800 Subject: [PATCH 5/5] remove cql-short --- antora.yml | 1 - local-preview-playbook.yml | 1 - .../ROOT/pages/connect-clients-to-proxy.adoc | 4 +-- .../ROOT/pages/connect-clients-to-target.adoc | 8 ++--- modules/ROOT/pages/create-target.adoc | 12 +++---- .../ROOT/pages/feasibility-checklists.adoc | 32 +++++++++---------- modules/ROOT/pages/introduction.adoc | 2 +- .../ROOT/pages/manage-proxy-instances.adoc | 4 +-- .../ROOT/pages/migrate-and-validate-data.adoc | 2 +- modules/ROOT/pages/troubleshooting-tips.adoc | 6 ++-- .../sideloader/pages/migrate-sideloader.adoc | 2 +- .../sideloader/pages/prepare-sideloader.adoc | 12 +++---- .../sideloader/pages/sideloader-overview.adoc | 6 ++-- 13 files changed, 45 insertions(+), 47 deletions(-) diff --git a/antora.yml b/antora.yml index bc38e43e..aaa001c9 100644 --- a/antora.yml +++ b/antora.yml @@ -18,7 +18,6 @@ asciidoc: cass: 'Apache Cassandra' cass-short: 'Cassandra' cql: 'Cassandra Query Language (CQL)' - cql-short: 'CQL' cql-shell: 'CQL shell' cql-console: 'CQL console' cql-proxy-url: 'https://github.com/datastax/cql-proxy' diff --git a/local-preview-playbook.yml b/local-preview-playbook.yml index e6414c9f..cf792e01 100644 --- a/local-preview-playbook.yml +++ b/local-preview-playbook.yml @@ -103,7 +103,6 @@ asciidoc: cass: 'Apache Cassandra' cass-short: 'Cassandra' cql: 'Cassandra Query Language (CQL)' - cql-short: 'CQL' cql-shell: 'CQL shell' cql-console: 'CQL console' cql-service: 'CQL Service' diff --git a/modules/ROOT/pages/connect-clients-to-proxy.adoc b/modules/ROOT/pages/connect-clients-to-proxy.adoc index 17a76251..da271394 100644 --- a/modules/ROOT/pages/connect-clients-to-proxy.adoc +++ b/modules/ROOT/pages/connect-clients-to-proxy.adoc @@ -200,14 +200,14 @@ The configuration logic, as well as the cluster and session management code, are == Connect the {cql-shell} to {product-proxy} -The {cql-shell} (`cqlsh`) is a command-line tool that you can use to send {cql-short} and `cqlsh` commands to your {cass-short}-based clusters, including {astra-db}, {dse-short}, {hcd-short}, and {cass} databases. +The {cql-shell} (`cqlsh`) is a command-line tool that you can use to send CQL and `cqlsh` commands to your {cass-short}-based clusters, including {astra-db}, {dse-short}, {hcd-short}, and {cass} databases. When issuing these commands directly to a cluster, you can use your database's included version of `cqlsh`, or you can download and run a standalone `cqlsh`. With {product-proxy}, you use a standalone `cqlsh` installation to issue commands to both clusters through {product-proxy}. Your origin and target clusters must have a common `cql_version` between them. -If there is no {cql-short} version that is compatible with both clusters, `cqlsh` won't be able to connect to {product-proxy}. +If there is no CQL version that is compatible with both clusters, `cqlsh` won't be able to connect to {product-proxy}. To connect `cqlsh` to a {product-proxy} instance, do the following: diff --git a/modules/ROOT/pages/connect-clients-to-target.adoc b/modules/ROOT/pages/connect-clients-to-target.adoc index 0053f22f..8cfa5f62 100644 --- a/modules/ROOT/pages/connect-clients-to-target.adoc +++ b/modules/ROOT/pages/connect-clients-to-target.adoc @@ -231,7 +231,7 @@ In addition to updating connection strings, you might also need to make the foll * Feature compatibility between your previous and current database platform. + -For example, after migrating to {astra-db}, your drivers cannot create keyspaces because xref:astra-db-serverless:cql:develop-with-cql.adoc[{cql-short} for {astra-db}] doesn't support `CREATE KEYSPACE`. +For example, after migrating to {astra-db}, your drivers cannot create keyspaces because xref:astra-db-serverless:cql:develop-with-cql.adoc[CQL for {astra-db}] doesn't support `CREATE KEYSPACE`. + Similarly, {astra-db} doesn't support {dse-short}-specific features like {dse-short} Insights Monitoring. @@ -243,15 +243,15 @@ Depending on your application's requirements, you might need to make these chang If you migrated to {astra-db} or {hcd-short}, you have the option of using the {data-api} instead of, or in addition to, a {cass-short} driver. -Although the {data-api} can read and write to {cql-short} tables, it is significantly different from driver code. +Although the {data-api} can read and write to CQL tables, it is significantly different from driver code. To use the {data-api}, you must rewrite your application code or create a new application. For more information, see the following: * xref:astra-db-serverless:api-reference:dataapiclient.adoc[Get started with the {data-api} in {astra-db}] -* xref:astra-db-serverless:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from {cql-short} in {astra-db}] +* xref:astra-db-serverless:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from CQL in {astra-db}] * xref:hyper-converged-database:api-reference:dataapiclient.adoc[Get started with the {data-api} in {hcd-short}] -* xref:hyper-converged-database:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from {cql-short} in {hcd-short}] +* xref:hyper-converged-database:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from CQL in {hcd-short}] == Migration complete diff --git a/modules/ROOT/pages/create-target.adoc b/modules/ROOT/pages/create-target.adoc index fe2b3dd8..9107357c 100644 --- a/modules/ROOT/pages/create-target.adoc +++ b/modules/ROOT/pages/create-target.adoc @@ -62,13 +62,13 @@ On your new database, the keyspace names, table names, column names, data types, + Note the following limitations and exceptions for tables in {astra-db}: + -* In {astra-db}, you must xref:astra-db-serverless:databases:manage-keyspaces.adoc[create keyspaces in the {astra-ui} or with the {devops-api}] because xref:astra-db-serverless:cql:develop-with-cql.adoc[{cql-short} for {astra-db}] doesn't support `CREATE KEYSPACE`. +* In {astra-db}, you must xref:astra-db-serverless:databases:manage-keyspaces.adoc[create keyspaces in the {astra-ui} or with the {devops-api}] because xref:astra-db-serverless:cql:develop-with-cql.adoc[CQL for {astra-db}] doesn't support `CREATE KEYSPACE`. * You can use the {cql-shell} (`cqlsh`), {cass-short} drivers, or the {data-api} to xref:astra-db-serverless:databases:manage-collections.adoc#create-a-table[create tables in {astra-db}]. However, the only optional table properties that {astra-db} supports are `default_time_to_live` and `comment`. As a best practice, omit xref:astra-db-serverless:cql:develop-with-cql.adoc#unsupported-values-are-ignored[unsupported DDL properties], such as compaction strategy and `gc_grace_seconds`, when creating tables in {astra-db}. * {astra-db} doesn't support Materialized Views (MVs) and certain types of indexes. You must adjust your data model and application logic to discard or replace these structures before beginning your migration. -For more information, see xref:astra-db-serverless:cql:develop-with-cql.adoc#limitations-on-cql-for-astra-db[Limitations on {cql-short} for {astra-db}]. +For more information, see xref:astra-db-serverless:cql:develop-with-cql.adoc#limitations-on-cql-for-astra-db[Limitations on CQL for {astra-db}]. * If you plan to use {sstable-sideloader} for xref:ROOT:migrate-and-validate-data.adoc[Phase 2], see the xref:sideloader:migrate-sideloader.adoc#record-schema[target database configuration requirements for migrating data with {sstable-sideloader}]. -- @@ -103,16 +103,16 @@ Store the authentication credentials securely for use by your client application On your new cluster, the keyspace names, table names, column names, data types, and primary keys must be identical to the schema on the origin cluster or the migration will fail. ==== + -To copy the schema, you can run {cql-short} `DESCRIBE` on the origin cluster to get the schema that is being migrated, and then run the output on your new cluster. +To copy the schema, you can run CQL `DESCRIBE` on the origin cluster to get the schema that is being migrated, and then run the output on your new cluster. + -If your origin cluster is running an earlier version, you might need to edit {cql-short} clauses that are no longer supported in newer versions, such as `COMPACT STORAGE`. +If your origin cluster is running an earlier version, you might need to edit CQL clauses that are no longer supported in newer versions, such as `COMPACT STORAGE`. For specific changes in each version, see the release notes for your database platform and {cass-short} driver. -- -Other {cql-short}-compatible data stores:: +Other CQL-compatible data stores:: + -- -Support for other {cql-short}-compatible data stores isn't guaranteed for {product-proxy}. +Support for other CQL-compatible data stores isn't guaranteed for {product-proxy}. If your origin and target clusters meet the xref:ROOT:feasibility-checklists.adoc[protocol version compatibility requirements], you might be able to use {product-proxy} for your migration. As with any migration, {company} recommends that you test this in isolation before attempting a full-scale production migration. diff --git a/modules/ROOT/pages/feasibility-checklists.adoc b/modules/ROOT/pages/feasibility-checklists.adoc index 9c042b05..9754afb2 100644 --- a/modules/ROOT/pages/feasibility-checklists.adoc +++ b/modules/ROOT/pages/feasibility-checklists.adoc @@ -114,26 +114,26 @@ include::ROOT:partial$migration-scenarios.adoc[] [IMPORTANT] ==== To successfully migrate with {product-proxy}, the origin and target clusters must have matching schemas, including keyspace names, table names, column names, and data types. -A {cql-short} statement produced by your client application must be able to succeed _without modification_ on both clusters because {product-proxy} routes the exact same {cql-short} statements to both clusters. +A CQL statement produced by your client application must be able to succeed _without modification_ on both clusters because {product-proxy} routes the exact same CQL statements to both clusters. -Even without {product-proxy}, matching schemas are required for most {cql-short}-based data migration tools, such as {sstable-sideloader} and {cass-migrator}. +Even without {product-proxy}, matching schemas are required for most CQL-based data migration tools, such as {sstable-sideloader} and {cass-migrator}. If it is impossible to match the schemas, your migration cannot use {product-proxy}. See xref:ROOT:components.adoc[] for alternative migration tools and strategies, such as extract, transform, and load (ETL) processes. ==== -{product-proxy} _doesn't_ modify or transform {cql-short} statements with the exception of <>. +{product-proxy} _doesn't_ modify or transform CQL statements with the exception of <>. -A {cql-short} statement that your client application sends to {product-proxy} must be able to succeed on both clusters. +A CQL statement that your client application sends to {product-proxy} must be able to succeed on both clusters. This means that any keyspace that your client application uses must exist on both the origin and target clusters with the same keyspace name, table names, column names, and compatible data types. However, the keyspaces can have different replication strategies and durable write settings. -At the column level, the schema doesn't need to be a one-to-one match as long as the {cql-short} statements can be executed successfully on both clusters. +At the column level, the schema doesn't need to be a one-to-one match as long as the CQL statements can be executed successfully on both clusters. For example, if a table has 10 columns, and your client application uses only five of those columns, then you can recreate the table on the target cluster with only the five required columns. However, the data from the other columns won't be migrated to the target cluster if those columns don't exist on the target cluster. Before you decide to omit a column from the target cluster, make sure that it is acceptable to permanently lose that data after the migration. In some cases, you can change the primary key on the target cluster. -For example, if your compound primary key is `PRIMARY KEY (A, B)`, and you always provide parameters for the `A` and `B` columns in your {cql-short} statements, then you could change the key to `PRIMARY KEY (B, A)` when creating the schema on the target because your {cql-short} statements will still run successfully. +For example, if your compound primary key is `PRIMARY KEY (A, B)`, and you always provide parameters for the `A` and `B` columns in your CQL statements, then you could change the key to `PRIMARY KEY (B, A)` when creating the schema on the target because your CQL statements will still run successfully. == Request and error handling with {product-proxy} @@ -142,7 +142,7 @@ See xref:ROOT:components.adoc#how-zdm-proxy-handles-reads-and-writes[How {produc [[non-idempotent-operations]] == Lightweight Transactions and other non-idempotent operations -Non-idempotent {cql-short} operations don't produce the exact same result each time the request is executed. +Non-idempotent CQL operations don't produce the exact same result each time the request is executed. Whether the request is executed and the actual value written to the database depend on the column's current value or other conditions at the time of the request. Examples of non-idempotent operations include the following: @@ -253,8 +253,8 @@ LZ4 and Snappy compression algorithms require {product-proxy} version 2.4.0 or l The binary protocol used by {astra}, {dse-short}, {hcd-short}, and open-source {cass-short} supports optional compression of transport-level requests and responses that reduces network traffic at the cost of CPU overhead. When establishing connections from client applications, {product-proxy} responds with a list of compression algorithms supported by both clusters. -The compression algorithm configured in your {company}-compatible driver must match any item from the common list, or {cql-short} request compression must be disabled completely. -{product-proxy} cannot decompress and recompress {cql-short} requests using different compression algorithms. +The compression algorithm configured in your {company}-compatible driver must match any item from the common list, or CQL request compression must be disabled completely. +{product-proxy} cannot decompress and recompress CQL requests using different compression algorithms. This isn't related to storage compression, which you can configure on specific tables with the `compression` table property. Storage/table compression doesn't affect the client application or {product-proxy} in any way. @@ -296,25 +296,25 @@ There is no special handling for these requests, so you must consider the traver If the traversals are non-idempotent, then your must thoroughly validate and reconcile the data and the end of xref:ROOT:migrate-and-validate-data.adoc[Phase 2] to ensure that the target cluster is truly consistent with the origin cluster. For more information, see <>. + -{company}'s recommended tools for data migration and reconciliation are {cql-short}-based, so they only support migrations where the origin cluster is a database that uses the new {dse-short} Graph engine ({dse-short} 6.8 and later). +{company}'s recommended tools for data migration and reconciliation are CQL-based, so they only support migrations where the origin cluster is a database that uses the new {dse-short} Graph engine ({dse-short} 6.8 and later). They _cannot_ be used with the earlier Graph engine in {dse-short} versions prior to 6.8. {dse-short} Search:: Read-only {dse-short} Search workloads can be moved directly from the origin to the target without {product-proxy} being involved. If your client application uses Search and also issues writes, or if you need the read routing capabilities from {product-proxy}, then you can connect your Search workloads to {product-proxy} as long as you are using xref:datastax-drivers:compatibility:driver-matrix.adoc[{company}-compatible drivers] to submit these queries. -This approach means the queries are regular {cql-short} `SELECT` statements, so {product-proxy} handles them as regular read requests. +This approach means the queries are regular CQL `SELECT` statements, so {product-proxy} handles them as regular read requests. + -If you use the HTTP API then you can either modify your applications to use the {cql-short} API instead, or you must move those applications directly from the origin to the target when the migration is complete, if that is acceptable for your use case. +If you use the HTTP API then you can either modify your applications to use the CQL API instead, or you must move those applications directly from the origin to the target when the migration is complete, if that is acceptable for your use case. == {astra} migrations This section describes specific considerations for {astra} migrations. If you need to make any changes to your data model or application logic for compatibility with {astra}, do so before starting the migration. -As mentioned previously, this is because {product-proxy} requires that the same {cql-short} statements can be executed successfully on both clusters, and the data migration tools require matching schemas. +As mentioned previously, this is because {product-proxy} requires that the same CQL statements can be executed successfully on both clusters, and the data migration tools require matching schemas. -{astra} guardrails, limits, and {cql-short} compatibility:: -As a managed database-as-a-service (DBaaS) offering, {astra} implements xref:astra-db-serverless:databases:database-limits.adoc[guardrails and limits] on its databases, and xref:astra-db-serverless:cql:develop-with-cql.adoc[{astra} doesn't support all {cql-short} functionality]. -Make sure your application workloads and {cql-short} statements are compatible with these limitations. +{astra} guardrails, limits, and CQL compatibility:: +As a managed database-as-a-service (DBaaS) offering, {astra} implements xref:astra-db-serverless:databases:database-limits.adoc[guardrails and limits] on its databases, and xref:astra-db-serverless:cql:develop-with-cql.adoc[{astra} doesn't support all CQL functionality]. +Make sure your application workloads and CQL statements are compatible with these limitations. + In self-managed clusters, such as {dse-short} and {cass-short}, you can configure the database limits in `cassandra.yml`. However, the xref:astra-db-serverless:databases:database-limits.adoc#cassandra-configuration-properties[{astra} `cassandra.yml`] cannot be changed unless explicitly approved and implemented by {company}. diff --git a/modules/ROOT/pages/introduction.adoc b/modules/ROOT/pages/introduction.adoc index 8fb01b6b..45816c59 100644 --- a/modules/ROOT/pages/introduction.adoc +++ b/modules/ROOT/pages/introduction.adoc @@ -45,7 +45,7 @@ The _target cluster_ is your new {cass-short}-based environment where you want t === Plan and prepare for your migration -From a functional standpoint, before you begin a migration, your client applications perform read/write operations directly with your existing xref:cql:ROOT:index.adoc[{cql-short}]-compatible database, such as {cass}, {dse-short}, {hcd-short}, or {astra-db}. +From a functional standpoint, before you begin a migration, your client applications perform read/write operations directly with your existing xref:cql:ROOT:index.adoc[CQL]-compatible database, such as {cass}, {dse-short}, {hcd-short}, or {astra-db}. Over the course of the migration, the contact points change to incorporate {product-proxy} and eventually switch to the target database. image::ROOT:pre-migration0ra.svg[Before the migration begins, your applications connect exclusively to your origin cluster] diff --git a/modules/ROOT/pages/manage-proxy-instances.adoc b/modules/ROOT/pages/manage-proxy-instances.adoc index 7c83c01d..73fc440e 100644 --- a/modules/ROOT/pages/manage-proxy-instances.adoc +++ b/modules/ROOT/pages/manage-proxy-instances.adoc @@ -136,7 +136,7 @@ Adjust this variable to limit the total number of connections on each instance. Default: `1000` `replace_cql_functions`:: -Whether {product-proxy} replaces standard `now()` {cql-short} function calls in write requests with an explicit timeUUID value computed at proxy level. +Whether {product-proxy} replaces standard `now()` CQL function calls in write requests with an explicit timeUUID value computed at proxy level. + * **`false` (default)**: Replacement of `now()` is disabled. * **`true`**: {product-proxy} replaces instances of `now()` in write requests with an explicit timeUUID value before sending the write to each cluster. @@ -201,7 +201,7 @@ This isn't recommended. Set the maximum pool size of available stream IDs managed by {product-proxy} per client connection. Use the same value as your driver's maximum stream IDs configuration. + -In the {cql-short} protocol, every request has a unique stream ID. +In the CQL protocol, every request has a unique stream ID. However, if there are a lot of requests in a given amount of time, errors can occur due to xref:datastax-drivers:developing:speculative-retry.adoc#stream-id-exhaustion[stream ID exhaustion]. + In the client application, the stream IDs are managed internally by the driver, and, in most drivers, the max number is 2048, which is the same default value used by {product-proxy}. diff --git a/modules/ROOT/pages/migrate-and-validate-data.adoc b/modules/ROOT/pages/migrate-and-validate-data.adoc index 7a018a6c..91abb545 100644 --- a/modules/ROOT/pages/migrate-and-validate-data.adoc +++ b/modules/ROOT/pages/migrate-and-validate-data.adoc @@ -16,7 +16,7 @@ To move and validate data, you can use a dedicated data migration tool, such as This tool is exclusively for migrations that move data to {astra-db}. {sstable-sideloader} is a service running in {astra-db} that imports data from snapshots of your existing {cass-reg}-based cluster. -Because it imports data directly, {sstable-sideloader} can offer several advantages over {cql-short}-based tools like {dsbulk-short} and {cass-migrator-short}, including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. +Because it imports data directly, {sstable-sideloader} can offer several advantages over CQL-based tools like {dsbulk-short} and {cass-migrator-short}, including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. To migrate data with {sstable-sideloader}, you use `nodetool`, a cloud provider's CLI, and the {astra} {devops-api}: diff --git a/modules/ROOT/pages/troubleshooting-tips.adoc b/modules/ROOT/pages/troubleshooting-tips.adoc index e2673714..6d1e1852 100644 --- a/modules/ROOT/pages/troubleshooting-tips.adoc +++ b/modules/ROOT/pages/troubleshooting-tips.adoc @@ -409,7 +409,7 @@ This happens because the application specifies the custom port as part of the co For example, if the {product-proxy} instances were listening on port 14035, the contact points for the {cass-short} Java driver might be specified as `.addContactPoints("172.18.10.36:14035", "172.18.11.48:14035", "172.18.12.61:14035")`. -The contact point is the first point of contact to the cluster, but the driver discovers the rest of the nodes through {cql-short} queries. +The contact point is the first point of contact to the cluster, but the driver discovers the rest of the nodes through CQL queries. However, this discovery process finds the addresses only, not the ports. The driver uses the addresses it discovers with the port that is configured at startup. As a result, the custom port is used for the initial contact point only, and the default port is used with all other nodes. @@ -715,11 +715,11 @@ For performance-related issues, provide the following additional information: * If you use batch statements: + ** Which driver API do you use to create these batches? -** Are you passing a `BEGIN BATCH` {cql-short} query string to a simple/prepared statement, or do you use the actual batch statement objects that the drivers allow you to create? +** Are you passing a `BEGIN BATCH` CQL query string to a simple/prepared statement, or do you use the actual batch statement objects that the drivers allow you to create? * How many parameters does each statement have? -* Is {cql-short} function replacement enabled? +* Is CQL function replacement enabled? This feature is disabled by default. To determine if this feature is enabled, check the following variables: + diff --git a/modules/sideloader/pages/migrate-sideloader.adoc b/modules/sideloader/pages/migrate-sideloader.adoc index 86bda946..9f6cba5a 100644 --- a/modules/sideloader/pages/migrate-sideloader.adoc +++ b/modules/sideloader/pages/migrate-sideloader.adoc @@ -11,7 +11,7 @@ Before you use {sstable-sideloader} for a migration, xref:sideloader:sideloader- [#create-snapshots] == Create snapshots -On _each node_ in your origin cluster, use `nodetool` to create a backup of the data that you want to migrate, including all keyspaces and {cql-short} tables that you want to migrate. +On _each node_ in your origin cluster, use `nodetool` to create a backup of the data that you want to migrate, including all keyspaces and CQL tables that you want to migrate. . Be aware of the {sstable-sideloader} limitations related to materialized views, secondary indexes, and encrypted data that are described in xref:sideloader:prepare-sideloader.adoc#origin-cluster-requirements[Origin cluster requirements]. If necessary, modify the data model on your origin cluster to prepare for the migration. diff --git a/modules/sideloader/pages/prepare-sideloader.adoc b/modules/sideloader/pages/prepare-sideloader.adoc index dc55c053..881aa0af 100644 --- a/modules/sideloader/pages/prepare-sideloader.adoc +++ b/modules/sideloader/pages/prepare-sideloader.adoc @@ -186,18 +186,18 @@ The following information can help you prepare for specific migration scenarios, [#minimum-migration-scope] === Minimum migration scope -To minimize data reconciliation issues, the recommended minimum migration scope is one {cql-short} table across all nodes. +To minimize data reconciliation issues, the recommended minimum migration scope is one CQL table across all nodes. -This means that a single migration process, from start to finish, should encapsulate the data for one {cql-short} table as it exists on all of your origin nodes. -For example, if you are migrating one table, you need to upload snapshots of all SSTables from all nodes for that {cql-short} table. +This means that a single migration process, from start to finish, should encapsulate the data for one CQL table as it exists on all of your origin nodes. +For example, if you are migrating one table, you need to upload snapshots of all SSTables from all nodes for that CQL table. -Avoid breaking one table into multiple migrations because migrating a subset of SSTables for one {cql-short} table will likely result in data loss, corruption, or resurrection of previously deleted data. +Avoid breaking one table into multiple migrations because migrating a subset of SSTables for one CQL table will likely result in data loss, corruption, or resurrection of previously deleted data. Each migration is performed separately, and each migration has no awareness of prior migrations. This means that data from later migrations can be incorrectly applied to the table. For example, if your first migration includes tombstones, that data could be resurrected if it is present in a subsequent migration from another node. -In contrast, if you use a single large migration to migrate all SSTables for a {cql-short} table across all nodes, {astra-db} can reconcile the data across all nodes, ensuring that your migration is accurate and complete. +In contrast, if you use a single large migration to migrate all SSTables for a CQL table across all nodes, {astra-db} can reconcile the data across all nodes, ensuring that your migration is accurate and complete. === Multi-region migrations @@ -297,7 +297,7 @@ If you initialize multiple migrations for the same database, you generate multip This can be useful for breaking large migrations into smaller batches. For example, if you have 100 snapshots, you could initialize 10 migrations, and then upload 10 different snapshots to each migration directory. -However, don't break one {cql-short} table into multiple migrations, as explained in <>. +However, don't break one CQL table into multiple migrations, as explained in <>. You can upload snapshots to multiple migration directories at once. However, when you reach the import phase of the migration, {sstable-sideloader} can import from only one migration directory at a time per database. diff --git a/modules/sideloader/pages/sideloader-overview.adoc b/modules/sideloader/pages/sideloader-overview.adoc index 3d2d8650..0c441f9e 100644 --- a/modules/sideloader/pages/sideloader-overview.adoc +++ b/modules/sideloader/pages/sideloader-overview.adoc @@ -4,7 +4,7 @@ {sstable-sideloader} is a service running in {astra-db} that directly imports data from snapshot backups that you've uploaded to {astra-db} from an existing {cass-reg}, {dse}, or {hcd} cluster. -Because it imports data directly, {sstable-sideloader} can offer several advantages over {cql-short}-based tools like xref:dsbulk:overview:dsbulk-about.adoc[{dsbulk}] and xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}], including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. +Because it imports data directly, {sstable-sideloader} can offer several advantages over CQL-based tools like xref:dsbulk:overview:dsbulk-about.adoc[{dsbulk}] and xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}], including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. == {sstable-sideloader} concepts @@ -19,7 +19,7 @@ A server where you run the migration commands, including CLI commands and {astra It must have SSH access to each node in your origin cluster. Migration:: -A workflow that you initiate within {sstable-sideloader} that encompasses the lifecycle of uploading and importing snapshot backups of a specific set of keyspaces or {cql-short} tables. +A workflow that you initiate within {sstable-sideloader} that encompasses the lifecycle of uploading and importing snapshot backups of a specific set of keyspaces or CQL tables. + This process produces artifacts and parameters including migration buckets, migration IDs, migration directories, and upload credentials. You use these components throughout the migration workflow. @@ -40,7 +40,7 @@ For more information, see xref:sideloader:prepare-sideloader.adoc[]. === Create snapshot backups {sstable-sideloader} uses snapshot backup files to import SSTable data from your existing origin cluster. -Each snapshot for each node in the origin cluster must include all the keyspaces and individual {cql-short} tables that you want to migrate. +Each snapshot for each node in the origin cluster must include all the keyspaces and individual CQL tables that you want to migrate. These snapshots are ideal for database migrations because creating snapshots has a negligible performance impact on the origin cluster, and the snapshots preserve metadata like `writetime` and `ttl` values.