diff --git a/antora.yml b/antora.yml index 737e374b..aaa001c9 100644 --- a/antora.yml +++ b/antora.yml @@ -8,19 +8,26 @@ nav: asciidoc: attributes: - #General attributes company: 'DataStax' support-url: 'https://www.ibm.com/mysupport/s/' - #Other product attributes + spark-reg: 'Apache Spark(TM)' + spark: 'Apache Spark' + spark-short: 'Spark' + hadoop-reg: 'Apache Hadoop(R)' cass-reg: 'Apache Cassandra(R)' cass: 'Apache Cassandra' cass-short: 'Cassandra' + cql: 'Cassandra Query Language (CQL)' + cql-shell: 'CQL shell' + cql-console: 'CQL console' + cql-proxy-url: 'https://github.com/datastax/cql-proxy' + java-driver-url: 'https://github.com/apache/cassandra-java-driver' dse: 'DataStax Enterprise (DSE)' dse-short: 'DSE' + metrics-collector: 'DSE Metrics Collector' hcd: 'Hyper-Converged Database (HCD)' hcd-short: 'HCD' mc: 'Mission Control' - #Astra DB attributes astra-db: 'Astra DB' astra: 'Astra' db-serverless: 'Serverless (non-vector)' @@ -31,14 +38,9 @@ asciidoc: scb: 'Secure Connect Bundle (SCB)' scb-short: 'SCB' scb-brief: 'Secure Connect Bundle' - #Sideloader has a specific name in this repo. It is not identical to the one in the Serverless repo. - sstable-sideloader: '{astra-db} Sideloader' - #devops api attributes devops-api: 'DevOps API' devops-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$devops-api/index.html' - #data api attributes data-api: 'Data API' - #Migration docs attributes product: 'Zero Downtime Migration' product-short: 'ZDM' product-proxy: 'ZDM Proxy' @@ -48,9 +50,12 @@ asciidoc: product-automation-repo: 'https://github.com/datastax/zdm-proxy-automation' product-automation-shield: 'image:https://img.shields.io/github/v/release/datastax/zdm-proxy-automation?label=latest[alt="Latest zdm-proxy-automation release on GitHub",link="{product-automation-repo}/releases"]' product-demo: 'ZDM Demo Client' - dsbulk-loader: 'DSBulk Loader' - dsbulk-loader-repo: 'https://github.com/datastax/dsbulk' - cass-migrator: 'Cassandra Data Migrator' + dsbulk: 'DataStax Bulk Loader (DSBulk)' + dsbulk-short: 'DSBulk' + dsbulk-repo: 'https://github.com/datastax/dsbulk' + dsbulk-migrator: 'DSBulk Migrator' + cass-migrator: 'Cassandra Data Migrator (CDM)' cass-migrator-short: 'CDM' cass-migrator-repo: 'https://github.com/datastax/cassandra-data-migrator' - cass-migrator-shield: 'image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=latest[alt="Latest cassandra-data-migrator release on GitHub",link="{cass-migrator-repo}/packages"]' \ No newline at end of file + cass-migrator-shield: 'image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=latest[alt="Latest cassandra-data-migrator release on GitHub",link="{cass-migrator-repo}/packages"]' + sstable-sideloader: '{astra-db} Sideloader' \ No newline at end of file diff --git a/local-preview-playbook.yml b/local-preview-playbook.yml index c4c4fa93..cf792e01 100644 --- a/local-preview-playbook.yml +++ b/local-preview-playbook.yml @@ -60,34 +60,176 @@ asciidoc: xrefstyle: short # CUSTOM ATTRIBUTES company: 'DataStax' - astra_db: 'Astra DB' - astra_stream: 'Astra Streaming' - astra-stream: 'Astra Streaming' # AIM: Once all instances of astra_stream are removed, keep only the astra-stream attribute. - astra_ui: 'Astra Portal' - astra_cli: 'Astra CLI' - astra-cli: 'Astra CLI' # AIM: Once all instances of astra_cli are removed, keep only the astra-cli attribute. + trust-center: 'IBM Trust Center' + trust-center-url: 'https://www.ibm.com/trust' + trust-center-link: '{trust-center-url}[{trust-center}]' + support-url: 'https://www.ibm.com/mysupport/s/' + dsbulk: 'DataStax Bulk Loader (DSBulk)' + dsbulk-short: 'DSBulk' + dsbulk-repo: 'https://github.com/datastax/dsbulk' + astra: 'Astra' + astra-db: 'Astra DB' + astra-ui: 'Astra Portal' + astra-url: 'https://astra.datastax.com' + astra-ui-link: '{astra-url}[{astra-ui}^]' + db-classic: 'Managed Cluster' + db-serverless: 'Serverless (non-vector)' + db-serverless-vector: 'Serverless (vector)' scb: 'Secure Connect Bundle (SCB)' scb-short: 'SCB' scb-brief: 'Secure Connect Bundle' - astra-streaming-examples-repo: 'https://raw.githubusercontent.com/datastax/astra-streaming-examples/master' - luna-streaming-examples-repo: 'https://raw.githubusercontent.com/datastaxdevs/luna-streaming-examples/main' - support_url: 'https://www.ibm.com/mysupport/s/' - glossary-url: 'https://docs.datastax.com/en/glossary/docs/index.html#' - emoji-tada: "🎉" - emoji-rocket: "🚀" - emoji-smile: "😀" + devops-api: 'DevOps API' + devops-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$devops-api/index.html' + astra-cli: 'Astra CLI' + astra-stream: 'Astra Streaming' + starlight-kafka: 'Starlight for Kafka' + starlight-rabbitmq: 'Starlight for RabbitMQ' + astra-streaming-examples-repo: 'https://github.com/datastax/astra-streaming-examples' + sstable-sideloader: '{astra-db} Sideloader' + zdm: 'Zero Downtime Migration' + zdm-short: 'ZDM' + zdm-proxy: 'ZDM Proxy' + cass-migrator: 'Cassandra Data Migrator (CDM)' + cass-migrator-short: 'CDM' + hcd: 'Hyper-Converged Database (HCD)' + hcd-short: 'HCD' dse: 'DataStax Enterprise (DSE)' - cassandra: 'Apache Cassandra(R)' - classic: 'classic' - classic_cap: 'Classic' - serverless: 'serverless' - serverless_cap: 'Serverless' - py-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$python-client-1x/astrapy' - ts-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$typescript-client-1x' - java-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$java-client-1x' + dse-short: 'DSE' + metrics-collector: 'DSE Metrics Collector' + mc: 'Mission Control' + opscenter: 'DSE OpsCenter' + studio: 'DataStax Studio' + cass-reg: 'Apache Cassandra(R)' + cass: 'Apache Cassandra' + cass-short: 'Cassandra' + cql: 'Cassandra Query Language (CQL)' + cql-shell: 'CQL shell' + cql-console: 'CQL console' + cql-service: 'CQL Service' + pulsar-reg: 'Apache Pulsar(TM)' + pulsar: 'Apache Pulsar' + pulsar-short: 'Pulsar' + spark-reg: 'Apache Spark(TM)' + spark: 'Apache Spark' + spark-short: 'Spark' + spark-connect: 'Spark Connect' + spark-connector: 'Apache Cassandra Spark Connector' + spark-connector-short: 'Spark Connector' + kafka-reg: 'Apache Kafka(R)' + kafka: 'Apache Kafka' + kafka-short: 'Kafka' + kafka-connect: 'Kafka Connect' + kafka-connector: 'DataStax Apache Kafka Connector' + kafka-connector-short: 'Kafka Connector' + solr-reg: 'Apache Solr(TM)' + solr: 'Apache Solr' + solr-short: 'Solr' + lucene-reg: 'Apache Lucene(TM)' + lucene: 'Apache Lucene' + lucene-short: 'Lucene' + hadoop-reg: 'Apache Hadoop(R)' + hadoop: 'Apache Hadoop' + hadoop-short: 'Hadoop' + airflow-reg: 'Apache Airflow(R)' + airflow: 'Apache Airflow' + airflow-short: 'Airflow' + maven-reg: 'Apache Maven(TM)' + maven: 'Apache Maven' + maven-short: 'Maven' + flink-reg: 'Apache Flink(R)' + flink: 'Apache Flink' + flink-short: 'Flink' + beam-reg: 'Apache Beam(R)' + beam: 'Apache Beam' + beam-short: 'Beam' + geode-reg: 'Apache Geode(TM)' + geode: 'Apache Geode' + geode-short: 'Geode' + hbase-reg: 'Apache HBase(R)' + hbase: 'Apache HBase' + hbase-short: 'HBase' + kudu-reg: 'Apache Kudu(TM)' + kudu: 'Apache Kudu' + kudu-short: 'Kudu' + phoenix-reg: 'Apache Phoenix(TM)' + phoenix: 'Apache Phoenix' + phoenix-short: 'Phoenix' + zookeeper-reg: 'Apache ZooKeeper(TM)' + zookeeper: 'Apache ZooKeeper' + zookeeper-short: 'ZooKeeper' + asf: 'Apache Software Foundation (ASF)' + asf-short: 'ASF' + tinkerpop-reg: 'Apache TinkerPop(TM)' + tinkerpop: 'Apache TinkerPop' + tinkerpop-short: 'TinkerPop' + cloudstack-reg: 'Apache CloudStack(R)' + cloudstack: 'Apache CloudStack' + cloudstack-short: 'CloudStack' + tomcat-reg: 'Apache Tomcat(R)' + tomcat: 'Apache Tomcat' + tomcat-short: 'Tomcat' + ajp: 'Apache JServ Protocol (AJP)' + ajp-short: 'AJP' + activemq-reg: 'Apache ActiveMQ(R)' + activemq: 'Apache ActiveMQ' + activemq-short: 'ActiveMQ' + tomee-reg: 'Apache TomEE(TM)' + tomee: 'Apache TomEE' + tomee-short: 'TomEE' + bookkeeper-reg: 'Apache BookKeeper(TM)' + bookkeeper: 'Apache BookKeeper' + bookkeeper-short: 'BookKeeper' + groovy-reg: 'Apache Groovy(TM)' + groovy: 'Apache Groovy' + groovy-short: 'Groovy' + cpp-driver-url: 'https://github.com/datastax/cpp-driver' + csharp-driver-url: 'https://github.com/datastax/csharp-driver' + gocql-astra-url: 'https://github.com/datastax/gocql-astra' + go-driver-url: 'https://github.com/apache/cassandra-gocql-driver' + cql-proxy-url: 'https://github.com/datastax/cql-proxy' + java-driver-url: 'https://github.com/apache/cassandra-java-driver' + nodejs-driver-url: 'https://github.com/datastax/nodejs-driver' + python-driver-url: 'https://github.com/datastax/python-driver' + scala-driver-url: 'https://github.com/apache/cassandra-spark-connector' + cass-driver-cpp-shield: 'image:https://img.shields.io/github/v/tag/datastax/cpp-driver?label=latest[alt="Latest cpp-driver release on GitHub",link="{cpp-driver-url}/tags"]' + cass-driver-csharp-shield: 'image:https://img.shields.io/nuget/v/CassandraCSharpDriver?label=latest[alt="Latest CassandraCSharpDriver release on NuGet",link="https://www.nuget.org/packages/CassandraCSharpDriver"]' + cass-driver-go-shield: 'image:https://img.shields.io/github/v/tag/apache/cassandra-gocql-driver?label=latest%20gocql[alt="Latest gocql release on GitHub",link="{go-driver-url}/tags"]' + cass-driver-java-shield: 'image:https://img.shields.io/github/v/tag/apache/cassandra-java-driver?label=latest[alt="Latest cassandra-java-driver release on GitHub",link="{java-driver-url}/tags"]' + cass-driver-nodejs-shield: 'image:https://img.shields.io/github/v/tag/datastax/nodejs-driver?label=latest[alt="Latest nodejs-driver release on GitHub",link="{nodejs-driver-url}/tags"]' + cass-driver-python-shield: 'image:https://img.shields.io/github/v/tag/datastax/python-driver?label=latest[alt="Latest python-driver release on GitHub",link="{python-driver-url}/tags"]' + cass-driver-scala-shield: 'image:https://img.shields.io/github/v/tag/apache/cassandra-spark-connector?label=latest[alt="Latest cassandra-spark-connector release on GitHub",link="{scala-driver-url}/releases"]' + data-api: 'Data API' + csharp-client-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$csharp-client' py-client-api-ref-url-2x: 'xref:astra-api-docs:ROOT:attachment$python-client/astrapy' ts-client-api-ref-url-2x: 'xref:astra-api-docs:ROOT:attachment$typescript-client' java-client-api-ref-url-2x: 'xref:astra-api-docs:ROOT:attachment$java-client' + python-client-repo-url: 'https://github.com/datastax/astrapy' + typescript-client-repo-url: 'https://github.com/datastax/astra-db-ts' + typescript-client-examples-url: '{typescript-client-repo-url}/blob/v2.x/examples' + java-client-repo-url: 'https://github.com/datastax/astra-db-java' + csharp-client-repo-url: 'https://github.com/datastax/astra-db-csharp' + python-client-python-version: '3.8' + dataapi-java-client-shield: 'image:https://img.shields.io/maven-central/v/com.datastax.astra/astra-db-java.svg?label=latest[alt="Latest astra-db-java release on Maven Central",link="https://search.maven.org/artifact/com.datastax.astra/astra-db-java"]' + dataapi-python-client-shield: 'image:https://img.shields.io/github/v/tag/datastax/astrapy?label=latest[alt="Latest astrapy release on GitHub",link="{python-client-repo-url}/releases"]' + dataapi-typescript-client-shield: 'image:https://img.shields.io/github/v/tag/datastax/astra-db-ts?label=latest[alt="Latest astra-db-ts release on GitHub",link="{typescript-client-repo-url}/releases"]' + dataapi-csharp-client-shield: 'image:https://img.shields.io/github/v/tag/datastax/astra-db-csharp?label=latest[alt="Latest astra-db-csharp release on GitHub",link="{csharp-client-repo-url}/releases"]' + agent: 'DataStax Agent' + repair-service: 'Repair Service' + backup-service: 'Backup Service' + performance-service: 'Performance Service' + monitoring-service: 'OpsCenter Monitoring' + nodesync-service: 'NodeSync Service' + bestpractice-service: 'Best Practice Service' + capacity-service: 'Capacity Service' + lcm: 'Lifecycle Manager (LCM)' + lcm-short: 'LCM' + cr: 'custom resource (CR)' + cr-short: 'CR' + crd: 'custom resource definition (CRD)' + crd-short: 'CRD' + # Custom attributes only used in ragstack-ai + astra_db: 'Astra DB' + astra_ui: 'Astra Portal' # Antora Atlas primary-site-url: https://docs.datastax.com/en primary-site-manifest-url: https://docs.datastax.com/en/site-manifest.json diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 9cc4ed72..e576fbb3 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -42,5 +42,5 @@ * xref:ROOT:cassandra-data-migrator.adoc[] * {cass-migrator-repo}/releases[{cass-migrator-short} release notes] -.{dsbulk-loader} +.{dsbulk} * xref:dsbulk:overview:dsbulk-about.adoc[] \ No newline at end of file diff --git a/modules/ROOT/pages/astra-migration-paths.adoc b/modules/ROOT/pages/astra-migration-paths.adoc index 54a09736..9b4b8f9f 100644 --- a/modules/ROOT/pages/astra-migration-paths.adoc +++ b/modules/ROOT/pages/astra-migration-paths.adoc @@ -9,7 +9,7 @@ If you have questions about migrating from a specific source to {astra-db}, cont .Migration tool compatibility [cols="2,1,1,1,1"] |=== -|Origin |{sstable-sideloader} |{cass-migrator} |{product-proxy} |{dsbulk-loader} +|Origin |{sstable-sideloader} |{cass-migrator} |{product-proxy} |{dsbulk} |Aiven for {cass-short} |icon:check[role="text-success",alt="Supported"] diff --git a/modules/ROOT/pages/cassandra-data-migrator.adoc b/modules/ROOT/pages/cassandra-data-migrator.adoc index 501e6c36..e69cf14d 100644 --- a/modules/ROOT/pages/cassandra-data-migrator.adoc +++ b/modules/ROOT/pages/cassandra-data-migrator.adoc @@ -1,6 +1,6 @@ = Use {cass-migrator} with {product-proxy} -:navtitle: Use {cass-migrator} -:description: You can use {cass-migrator} ({cass-migrator-short}) for data migration and validation between {cass-reg}-based databases. +:navtitle: Use {cass-migrator-short} +:description: You can use {cass-migrator} for data migration and validation between {cass-reg}-based databases. :page-aliases: cdm-parameters.adoc, ROOT:cdm-steps.adoc, ROOT:cdm-overview.adoc {description} @@ -51,41 +51,41 @@ The container's `assets` directory includes all required migration tools: `cassa Install as a JAR file:: + -- -. Install Java 11 or later, which includes Spark binaries. +. Install Java 11 or later, which includes {spark-short} binaries. -. Install https://spark.apache.org/downloads.html[Apache Spark(TM)] version 3.5.x with Scala 2.13 and Hadoop 3.3 and later. +. Install https://spark.apache.org/downloads.html[{spark-reg}] version 3.5.x with Scala 2.13 and {hadoop-reg} 3.3 and later. + [tabs] ==== Single VM:: + -For one-off migrations, you can install the Spark binary on a single VM where you will run the {cass-migrator-short} job. +For one-off migrations, you can install the {spark-short} binary on a single VM where you will run the {cass-migrator-short} job. + -. Get the Spark tarball from the Apache Spark archive. +. Get the {spark-reg} tarball from the {spark} archive. + [source,bash,subs="+quotes"] ---- wget https://archive.apache.org/dist/spark/spark-3.5.**PATCH**/spark-3.5.**PATCH**-bin-hadoop3-scala2.13.tgz ---- + -Replace `**PATCH**` with your Spark patch version. +Replace `**PATCH**` with your {spark-short} patch version. + -. Change to the directory where you want install Spark, and then extract the tarball: +. Change to the directory where you want install {spark-short}, and then extract the tarball: + [source,bash,subs="+quotes"] ---- tar -xvzf spark-3.5.**PATCH**-bin-hadoop3-scala2.13.tgz ---- + -Replace `**PATCH**` with your Spark patch version. +Replace `**PATCH**` with your {spark-short} patch version. -Spark cluster:: +{spark-reg} cluster:: + -For large (several terabytes) migrations, complex migrations, and use of {cass-migrator-short} as a long-term data transfer utility, {company} recommends that you use a Spark cluster or Spark Serverless platform. +For large (several terabytes) migrations, complex migrations, and use of {cass-migrator-short} as a long-term data transfer utility, {company} recommends that you use a {spark} cluster or {spark-short} Serverless platform. + -If you deploy CDM on a Spark cluster, you must modify your `spark-submit` commands as follows: +If you deploy {cass-migrator-short} on a {spark-short} cluster, you must modify your `spark-submit` commands as follows: + -* Replace `--master "local[*]"` with the host and port for your Spark cluster, as in `--master "spark://**MASTER_HOST**:**PORT**"`. +* Replace `--master "local[*]"` with the host and port for your {spark-short} cluster, as in `--master "spark://**MASTER_HOST**:**PORT**"`. * Remove parameters related to single-VM installations, such as `--driver-memory` and `--executor-memory`. ==== @@ -167,7 +167,7 @@ You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- -Spark cluster:: +{spark-reg} cluster:: + -- [source,bash,subs="+quotes"] @@ -188,7 +188,7 @@ Depending on where your properties file is stored, you might need to specify the + You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file using the same format of `**KEYSPACE_NAME**.**TABLE_NAME**`. -* `--master`: Provide the URL of your Spark cluster. +* `--master`: Provide the URL of your {spark-short} cluster. * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- @@ -236,7 +236,7 @@ You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- -Spark cluster:: +{spark-reg} cluster:: + -- [source,bash,subs="+quotes"] @@ -257,7 +257,7 @@ Depending on where your properties file is stored, you might need to specify the + You can also set `spark.cdm.schema.origin.keyspaceTable` in your properties file using the same format of `**KEYSPACE_NAME**.**TABLE_NAME**`. -* `--master`: Provide the URL of your Spark cluster. +* `--master`: Provide the URL of your {spark-short} cluster. * `**VERSION**`: Specify the full {cass-migrator-short} version that you installed, such as `5.2.1`. -- @@ -331,15 +331,15 @@ Specifically, see the {cass-migrator-repo}/blob/main/src/resources/cdm-detailed. .Java NoSuchMethodError [%collapsible] ==== -If you installed Spark as a JAR file, and your Spark and Scala versions aren't compatible with your installed version of {cass-migrator-short}, {cass-migrator-short} jobs can throw exceptions such a the following: +If you installed {spark-short} as a JAR file, and your {spark-short} and Scala versions aren't compatible with your installed version of {cass-migrator-short}, {cass-migrator-short} jobs can throw exceptions such a the following: [source,console] ---- Exception in thread "main" java.lang.NoSuchMethodError: 'void scala.runtime.Statics.releaseFence()' ---- -Make sure that your Spark binary is compatible with your {cass-migrator-short} version. -If you installed an earlier version of {cass-migrator-short}, you might need to install an earlier Spark binary. +Make sure that your {spark-short} binary is compatible with your {cass-migrator-short} version. +If you installed an earlier version of {cass-migrator-short}, you might need to install an earlier {spark-short} binary. ==== .Rerun a failed or partially completed job diff --git a/modules/ROOT/pages/components.adoc b/modules/ROOT/pages/components.adoc index 7dcb5c25..408b5ff3 100644 --- a/modules/ROOT/pages/components.adoc +++ b/modules/ROOT/pages/components.adoc @@ -30,7 +30,7 @@ Typically, you only need to update the connection string. === How {product-proxy} handles reads and writes {company} created {product-proxy} to orchestrate requests between a client application and both the origin and target clusters. -These clusters can be any xref:cql:ROOT:index.adoc[CQL]-compatible data store, such as {cass-reg}, {dse}, and {astra-db}. +These clusters can be any data store that supports the xref:cql:ROOT:index.adoc[{cql}], such as {cass-reg}, {dse}, {hcd}, and {astra-db}. During the migration process, you designate one cluster as the _primary cluster_, which serves as the source of truth for reads. For the majority of the migration process, this is typically the origin cluster. @@ -145,21 +145,21 @@ For more information, see xref:sideloader:sideloader-overview.adoc[]. === {cass-migrator} -You can use {cass-migrator} ({cass-migrator-short}) for data migration and validation between {cass-reg}-based databases. +You can use {cass-migrator-short} for data migration and validation between {cass-short}-based databases. It offers extensive functionality and configuration options to support large and complex migrations as well as post-migration data validation. You can use {cass-migrator-short} by itself, with {product-proxy}, or for data validation after using another data migration tool. For more information, see xref:ROOT:cassandra-data-migrator.adoc[]. -=== {dsbulk-loader} +=== {dsbulk} -{dsbulk-loader} is a high-performance data loading and unloading tool for {cass-short}-based databases. +{dsbulk-short} is a high-performance data loading and unloading tool for {cass-short}-based databases. You can use it to load, unload, and count records. -Because {dsbulk-loader} doesn't have the same data validation capabilities as {cass-migrator-short}, it is best for migrations that don't require extensive data validation, aside from post-migration row counts. +Because {dsbulk-short} doesn't have the same data validation capabilities as {cass-migrator-short}, it is best for migrations that don't require extensive data validation, aside from post-migration row counts. -You can use {dsbulk-loader} alone or with {product-proxy}. +You can use {dsbulk-short} alone or with {product-proxy}. For more information, see xref:dsbulk:overview:dsbulk-about.adoc[]. @@ -171,7 +171,7 @@ include::ROOT:partial$dsbulk-migrator-deprecation.adoc[] === Other data migration processes Depending on your origin and target databases, there might be other data migration tools available for your migration. -For example, if you want to write your own custom data migration processes, you can use a tool like Apache Spark(TM). +For example, if you want to write your own custom data migration processes, you can use a tool like {spark-reg}. To use a data migration tool with {product-proxy}, it must meet the following requirements: diff --git a/modules/ROOT/pages/connect-clients-to-proxy.adoc b/modules/ROOT/pages/connect-clients-to-proxy.adoc index 823683e6..da271394 100644 --- a/modules/ROOT/pages/connect-clients-to-proxy.adoc +++ b/modules/ROOT/pages/connect-clients-to-proxy.adoc @@ -3,12 +3,12 @@ {product-proxy} is designed to mimic communication with a typical cluster based on {cass-reg}. This means that your client applications connect to {product-proxy} in the same way that they already connect to your existing {cass-short}-based clusters. -You can communicate with {product-proxy} using the same xref:cql:ROOT:index.adoc[CQL] statements used in your existing client applications. +You can communicate with {product-proxy} using the same xref:cql:ROOT:index.adoc[{cql}] statements used in your existing client applications. It understands the same messaging protocols used by {cass-short}, {dse-short}, {hcd-short}, and {astra-db}. As a result, most client applications cannot distinguish between connections to {product-proxy} and direct connections to a {cass-short}-based cluster. This page explains how to connect client applications to a {cass-short}-based cluster using {cass-short} drivers, and then describes the how you need to modify your client applications to connect to {product-proxy}. -It also explains how to connect `cqlsh` to {product-proxy}, which is often used in conjunction with {cass-short} drivers and during the migration process for certain validation tasks. +It also explains how to connect the {cql-shell} (`cqlsh`) to {product-proxy}, which is often used in conjunction with {cass-short} drivers and during the migration process for certain validation tasks. == Connect applications to {cass-short} clusters @@ -198,18 +198,20 @@ For more information, see the https://github.com/absurdfarce/themis/blob/main/RE In addition to being a validation tool, Themis also provides an example of a larger client application that uses the Java driver to connect to {product-proxy}, or directly to a {cass-short} cluster, and perform operations. The configuration logic, as well as the cluster and session management code, are separated into distinct packages to make them easy to understand. -== Connect cqlsh to {product-proxy} +== Connect the {cql-shell} to {product-proxy} -`cqlsh` is a command-line tool that you can use to send CQL statements and `cqlsh`-specific commands to your {cass-short}-based clusters, including {astra-db}, {dse-short}, {hcd-short}, and {cass} databases. +The {cql-shell} (`cqlsh`) is a command-line tool that you can use to send CQL and `cqlsh` commands to your {cass-short}-based clusters, including {astra-db}, {dse-short}, {hcd-short}, and {cass} databases. -You can use your database's included version of `cqlsh`, or you can download and run a standalone `cqlsh`. +When issuing these commands directly to a cluster, you can use your database's included version of `cqlsh`, or you can download and run a standalone `cqlsh`. + +With {product-proxy}, you use a standalone `cqlsh` installation to issue commands to both clusters through {product-proxy}. Your origin and target clusters must have a common `cql_version` between them. If there is no CQL version that is compatible with both clusters, `cqlsh` won't be able to connect to {product-proxy}. To connect `cqlsh` to a {product-proxy} instance, do the following: -. On a machine that can connect to your {product-proxy} instance, xref:cql:connect:start-cqlsh-linux-mac.adoc[download and install `cqlsh`]. +. On a machine that can connect to your {product-proxy} instance, xref:cql:connect:start-cqlsh-linux-mac.adoc[download and install the {cql-shell}]. + Any version of `cqlsh` can connect to {product-proxy}, but some clusters require a specific `cqlsh` version. diff --git a/modules/ROOT/pages/connect-clients-to-target.adoc b/modules/ROOT/pages/connect-clients-to-target.adoc index 514129ab..8cfa5f62 100644 --- a/modules/ROOT/pages/connect-clients-to-target.adoc +++ b/modules/ROOT/pages/connect-clients-to-target.adoc @@ -8,7 +8,7 @@ This removes the dependency on {product-proxy} and the origin cluster, thereby c image::ROOT:migration-phase5ra.svg[In Phase 5, your applications no longer use the proxy and, instead, connect directly to the target cluster] -The minimum requirements for reconfiguring these connections depend on whether your target cluster is {astra-db} or a generic CQL cluster, such as {cass-reg}, {dse}, or {hcd}. +The minimum requirements for reconfiguring these connections depend on whether your target cluster is {astra-db} or a self-managed {cass-short} cluster, such as {cass-reg}, {dse}, or {hcd}. [WARNING] ==== @@ -18,9 +18,9 @@ From this point onward, the clusters will diverge, and the target cluster become Be sure that you have thoroughly xref:ROOT:migrate-and-validate-data.adoc[validated your data (Phase 2)], xref:ROOT:enable-async-dual-reads.adoc[tested your target cluster's performance (Phase 3)], and xref:ROOT:change-read-routing.adoc[routed all reads to the target (Phase 4)] before permanently switching the connection. ==== -== Connect a driver to a generic CQL cluster +== Connect a driver to a self-managed {cass-short} cluster -If your origin and target clusters are both generic CQL clusters, such as {cass-short}, {dse-short}, or {hcd-short}, then the driver connection strings can be extremely similar. +If your origin and target clusters are both self-managed {cass-short} clusters, such as {cass-short}, {dse-short}, or {hcd-short}, then the driver connection strings can be extremely similar. It's possible that your code requires only minor changes to connect to the target cluster. . At minimum, update your driver configuration to use the appropriate contact points for your target cluster. @@ -40,7 +40,7 @@ For more information on supported drivers, see xref:datastax-drivers:compatibili == Connect a driver to {astra-db} -Connections to {astra-db} are different from connections to generic CQL clusters. +Connections to {astra-db} are different from connections to self-managed {cass-short} clusters. === Get {astra-db} credentials @@ -199,7 +199,7 @@ If your driver version is fully compatible with {astra-db}, then it has built-in The changes required to connect your application to {astra-db} are minimal. If your client application uses an earlier driver version without built-in {scb-short} support, {company} strongly recommends upgrading to a compatible driver to simplify configuration and get the latest features and bug fixes. -If you prefer to make this change after the migration, or you must support a legacy application that relies on an earlier driver, you can connect to {astra-db} with https://github.com/datastax/cql-proxy[CQL Proxy], or you must extract the {scb-short} archive and provide the individual files to enable mTLS in your driver's configuration. +If you prefer to make this change after the migration, or you must support a legacy application that relies on an earlier driver, you can connect to {astra-db} with `{cql-proxy-url}[cql-proxy]`, or you must extract the {scb-short} archive and provide the individual files to enable mTLS in your driver's configuration. The following example demonstrates how the {cass-short} Python driver connects to {astra-db} using an application token and {scb-short}: @@ -239,19 +239,19 @@ Similarly, {astra-db} doesn't support {dse-short}-specific features like {dse-sh Depending on your application's requirements, you might need to make these changes immediately, or you might make them after switching the connection. -== Switch to the Data API +== Switch to the {data-api} -If you migrated to {astra-db} or {hcd-short}, you have the option of using the Data API instead of, or in addition to, a {cass-short} driver. +If you migrated to {astra-db} or {hcd-short}, you have the option of using the {data-api} instead of, or in addition to, a {cass-short} driver. -Although the Data API can read and write to CQL tables, it is significantly different from driver code. -To use the Data API, you must rewrite your application code or create a new application. +Although the {data-api} can read and write to CQL tables, it is significantly different from driver code. +To use the {data-api}, you must rewrite your application code or create a new application. For more information, see the following: -* xref:astra-db-serverless:api-reference:dataapiclient.adoc[Get started with the Data API in {astra-db}] -* xref:astra-db-serverless:api-reference:compare-dataapi-to-cql.adoc[Migrate to the Data API from CQL in {astra-db}] -* xref:hyper-converged-database:api-reference:dataapiclient.adoc[Get started with the Data API in {hcd-short}] -* xref:hyper-converged-database:api-reference:compare-dataapi-to-cql.adoc[Migrate to the Data API from CQL in {hcd-short}] +* xref:astra-db-serverless:api-reference:dataapiclient.adoc[Get started with the {data-api} in {astra-db}] +* xref:astra-db-serverless:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from CQL in {astra-db}] +* xref:hyper-converged-database:api-reference:dataapiclient.adoc[Get started with the {data-api} in {hcd-short}] +* xref:hyper-converged-database:api-reference:compare-dataapi-to-cql.adoc[Migrate to the {data-api} from CQL in {hcd-short}] == Migration complete diff --git a/modules/ROOT/pages/create-target.adoc b/modules/ROOT/pages/create-target.adoc index 7e8f2905..9107357c 100644 --- a/modules/ROOT/pages/create-target.adoc +++ b/modules/ROOT/pages/create-target.adoc @@ -63,7 +63,7 @@ On your new database, the keyspace names, table names, column names, data types, Note the following limitations and exceptions for tables in {astra-db}: + * In {astra-db}, you must xref:astra-db-serverless:databases:manage-keyspaces.adoc[create keyspaces in the {astra-ui} or with the {devops-api}] because xref:astra-db-serverless:cql:develop-with-cql.adoc[CQL for {astra-db}] doesn't support `CREATE KEYSPACE`. -* You can use `cqlsh`, drivers, or the {data-api} to xref:astra-db-serverless:databases:manage-collections.adoc#create-a-table[create tables in {astra-db}]. +* You can use the {cql-shell} (`cqlsh`), {cass-short} drivers, or the {data-api} to xref:astra-db-serverless:databases:manage-collections.adoc#create-a-table[create tables in {astra-db}]. However, the only optional table properties that {astra-db} supports are `default_time_to_live` and `comment`. As a best practice, omit xref:astra-db-serverless:cql:develop-with-cql.adoc#unsupported-values-are-ignored[unsupported DDL properties], such as compaction strategy and `gc_grace_seconds`, when creating tables in {astra-db}. * {astra-db} doesn't support Materialized Views (MVs) and certain types of indexes. diff --git a/modules/ROOT/pages/deployment-infrastructure.adoc b/modules/ROOT/pages/deployment-infrastructure.adoc index 28228397..02d53796 100644 --- a/modules/ROOT/pages/deployment-infrastructure.adoc +++ b/modules/ROOT/pages/deployment-infrastructure.adoc @@ -49,7 +49,7 @@ In a sidecar deployment, each client application instance would be connecting to [[_machines]] == Hardware requirements -You need a number of machines to run your {product-proxy} instances, plus additional machines for the centralized jumphost, and for running {dsbulk-loader} or {cass-migrator} (which are recommended data migration and validation tools). +You need a number of machines to run your {product-proxy} instances, plus additional machines for the centralized jumphost, and for running {dsbulk} or {cass-migrator} (which are recommended data migration and validation tools). This section uses the term _machine_ broadly to refer to a cloud instance (on any cloud provider), a VM, or a physical server. @@ -83,10 +83,10 @@ The jumphost machine must meet the following specifications: * 200 to 500 GB of storage depending on the amount of metrics history that you want to retain * Equivalent to AWS **c5.2xlarge**, GCP **e2-standard-8**, or Azure **A8 v2** -Data migration tools ({dsbulk-loader} or {cass-migrator}):: -You need at least one machine to run {dsbulk-loader} or {cass-migrator} for the data migration and validation (xref:ROOT:migrate-and-validate-data.adoc[Phase 2]). +Data migration tools ({dsbulk-short} or {cass-migrator-short}):: +You need at least one machine to run {dsbulk-short} or {cass-migrator-short} for the data migration and validation (xref:ROOT:migrate-and-validate-data.adoc[Phase 2]). Even if you plan to use another data migration tool, you might need infrastructure for these tools or your chosen tool. -For example, {cass-migrator} is used for data validation after migrating data with {sstable-sideloader}. +For example, {cass-migrator-short} is used for data validation after migrating data with {sstable-sideloader}. + {company} recommends that you start with at least one VM that meets the following minimum specifications: + @@ -102,7 +102,7 @@ Whether you need additional machines depends on the total amount of data you nee All machines must meet the minimum specifications. For example, if you have 20 TBs of existing data to migrate, you could use 4 VMs to speed up the migration. -Then, you would run {dsbulk-loader} or {cass-migrator} in parallel on each VM with each one responsible for migrating specific tables or a portion of the data, such as 25% or 5 TB. +Then, you would run {dsbulk-short} or {cass-migrator-short} in parallel on each VM with each one responsible for migrating specific tables or a portion of the data, such as 25% or 5 TB. If you have one especially large table, such as 75% of the total data in one table, you can use multiple VMs to migrate that one table. For example, if you have 4 VMs, you can use three VMs in parallel for the large table by splitting the table's full token range into three groups. diff --git a/modules/ROOT/pages/faqs.adoc b/modules/ROOT/pages/faqs.adoc index e224f729..20b3035e 100644 --- a/modules/ROOT/pages/faqs.adoc +++ b/modules/ROOT/pages/faqs.adoc @@ -6,7 +6,7 @@ This page includes common questions about the {company} {product} ({product-shor == What is a zero-downtime migration? -A zero-downtime migration with the {company} {product-short} tools means you can reliably migrate your client applications and data between CQL clusters with no interruption of service. +A zero-downtime migration with the {company} {product-short} tools means you can reliably migrate your client applications and data between {cass-short}-based clusters with no interruption of service. == Which platforms and versions are supported by the {product-short} tools? @@ -41,7 +41,7 @@ Endless validation and testing time:: Because your client applications remain fully operational during the migration, and your clusters are kept in sync by {product-proxy}, you can take as much time as you need to validate and test the target cluster before switching over permanently. Migrate to a different platform or perform major version upgrades:: -The {product-short} tools support migrations between different CQL-based platforms, such as open-source {cass-reg} to {astra-db}, as well as major version upgrades of the same platform, such as {dse-short} 5.0 to {dse-short} 6.9. +The {product-short} tools support migrations between different {cass-short}-based platforms, such as open-source {cass-reg} to {astra-db}, as well as major version upgrades of the same platform, such as {dse-short} 5.0 to {dse-short} 6.9. == What are the requirements for true zero-downtime migrations? @@ -60,7 +60,7 @@ Yes, you can use the xref:ROOT:introduction.adoc#lab[{product-short} interactive The {product-short} tools are {product-proxy}, {product-utility}, and {product-automation}. These tools orchestrate the traffic between your client applications and the origin and target clusters during the migration process. -For the actual data migration, there are many tools you can use, such as {sstable-sideloader}, {cass-migrator}, {dsbulk-loader}, and custom data migration scripts. +For the actual data migration, there are many tools you can use, such as {sstable-sideloader}, {cass-migrator}, {dsbulk}, and custom data migration scripts. For more information, see xref:ROOT:components.adoc[]. @@ -92,12 +92,12 @@ At the end of the migration process, your client application connects exclusivel Before the {product-short} tools were available, migrating client applications between clusters involved granular and intrusive client application code changes, extensive migration preparation, and a window of downtime for the client application's end users. -With the {product-short} tools, you can migrate your client applications and data between CQL clusters with minimal code changes and no interruption of service. +With the {product-short} tools, you can migrate your client applications and data between {cass-short}-based clusters with minimal code changes and no interruption of service. You can have the confidence that you are using tools designed specifically to handle the complexities of live traffic during large enterprise migrations. == What is the pricing model? -{product-proxy}, {product-utility}, {product-automation}, {cass-migrator}, and {dsbulk-loader} are free and open-sourced. +{product-proxy}, {product-utility}, {product-automation}, {cass-migrator-short}, and {dsbulk-short} are free and open-sourced. {sstable-sideloader} is part of an {astra-db} *Enterprise* subscription plan, and it incurs costs based on usage. @@ -109,12 +109,12 @@ For any observed problems with {product-proxy} or the other open-source {product * {product-proxy-repo}[{product-proxy} repository] * {product-automation-repo}[{product-automation} repository] (includes {product-automation} and {product-utility}) -* {cass-migrator-repo}[{cass-migrator} repository] -* {dsbulk-loader-repo}[{dsbulk-loader} repository] +* {cass-migrator-repo}[{cass-migrator-short} repository] +* {dsbulk-repo}[{dsbulk-short} repository] == Can I contribute to {product-proxy}? -Yes, see `https://github.com/datastax/zdm-proxy/blob/main/CONTRIBUTING.md[CONTRIBUTING.md]`. +Yes, see `{product-proxy-repo}/blob/main/CONTRIBUTING.md[CONTRIBUTING.md]`. == Does {product-proxy} support Transport Layer Security (TLS)? @@ -158,10 +158,10 @@ For more information, see xref:components.adoc#how-zdm-proxy-handles-reads-and-w In the context of the {product-short} process, the terms _cluster_ and _database_ are used interchangeably to refer to the source and destination for the data that you are moving during your migration. -== What happened to DSBulk Migrator? +== What happened to {dsbulk-migrator}? include::ROOT:partial$dsbulk-migrator-deprecation.adoc[] == See also -* https://github.com/datastax/zdm-proxy/blob/main/faq.md[{product-proxy} FAQ on GitHub] \ No newline at end of file +* {product-proxy-repo}/blob/main/faq.md[{product-proxy} FAQ on GitHub] \ No newline at end of file diff --git a/modules/ROOT/pages/feasibility-checklists.adoc b/modules/ROOT/pages/feasibility-checklists.adoc index d09d8cdf..9754afb2 100644 --- a/modules/ROOT/pages/feasibility-checklists.adoc +++ b/modules/ROOT/pages/feasibility-checklists.adoc @@ -103,7 +103,7 @@ However, be aware that {product-proxy} itself can have a performance impact, reg === Thrift isn't supported by {product-proxy} -If you are using an earlier driver or cluster version that only supports Thrift, you must change your client application to use CQL, and, if needed, upgrade your cluster before starting the migration process. +If you are using an earlier driver or cluster version that only supports Thrift, you must change your client application to use {cql} statements, and, if needed, upgrade your cluster before starting the migration process. == Supported data store providers, versions, and migration paths @@ -155,7 +155,7 @@ By design, a {product-short} migration involves two separate clusters, and it is If you use non-idempotent operations, {company} recommends adding a reconciliation phase to your migration before and after xref:ROOT:change-read-routing.adoc[Phase 4]. This allows you an additional opportunity to resolve any data inconsistencies that are produced by non-idempotent operations. -The xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}] is ideal for detecting and reconciling these types of inconsistencies. +The xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator-short}] is ideal for detecting and reconciling these types of inconsistencies. However, if your application workloads can tolerate inconsistencies produced by LWTs and non-idempotent operations, you might not need to perform any additional validation or reconciliation steps. This depends entirely on your application business logic and requirements. diff --git a/modules/ROOT/pages/hcd-migration-paths.adoc b/modules/ROOT/pages/hcd-migration-paths.adoc index b1183216..e1a1f147 100644 --- a/modules/ROOT/pages/hcd-migration-paths.adoc +++ b/modules/ROOT/pages/hcd-migration-paths.adoc @@ -25,7 +25,7 @@ During the {product-short} process, you use a xref:ROOT:migrate-and-validate-dat {company} recommends that you do the following: -* Choose a data migration tool that also includes strong validation capabilities, such as xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator} ({cass-migrator-short})]. +* Choose a data migration tool that also includes strong validation capabilities, such as xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}]. * Be aware of incompatible data types that can fail to migrate from your old cluster. Data validation tools can identify inconsistencies as missing or mismatched data, but you still need to have a plan to resolve them. diff --git a/modules/ROOT/pages/index.adoc b/modules/ROOT/pages/index.adoc index 4b519302..65997e64 100644 --- a/modules/ROOT/pages/index.adoc +++ b/modules/ROOT/pages/index.adoc @@ -68,7 +68,7 @@ svg::sideloader:astra-migration-toolkit.svg[role="absolute bottom-1/2 translate- svg:common:ROOT:icons/datastax/insert-data.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] -
{cass-migrator-short} can migrate and validate data between {cass-short}-based clusters, with optional logging and reconciliation support.
@@ -81,12 +81,12 @@ svg::sideloader:astra-migration-toolkit.svg[role="absolute bottom-1/2 translate- svg:common:ROOT:icons/datastax/migrate.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] -{dsbulk-loader} can load, unload, and count large volumes of data from your {cass-short}-based clusters.
+{dsbulk-short} can load, unload, and count large volumes of data from your {cass-short}-based clusters.