dstackai
diff --git a/‎assets/images/social/examples.png‎
-338 Bytes b/‎assets/images/social/examples.png‎
-338 Bytes
diff --git a/‎assets/images/social/partners.png‎
-427 Bytes b/‎assets/images/social/partners.png‎
-427 Bytes
diff --git a/‎assets/javascripts/extra.js‎
Lines changed: 7 additions & 0 deletions b/‎assets/javascripts/extra.js‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎assets/stylesheets/extra.css‎
Lines changed: 32 additions & 2 deletions b/‎assets/stylesheets/extra.css‎
Lines changed: 32 additions & 2 deletions
diff --git a/‎assets/stylesheets/landing.css‎
Lines changed: 4 additions & 4 deletions b/‎assets/stylesheets/landing.css‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎blog/amd-mi300x-inference-benchmark/index.html‎
Lines changed: 6 additions & 6 deletions b/‎blog/amd-mi300x-inference-benchmark/index.html‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎blog/amd-on-runpod/index.html‎
Lines changed: 6 additions & 6 deletions b/‎blog/amd-on-runpod/index.html‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎blog/amd-on-tensorwave/index.html‎
Lines changed: 2 additions & 2 deletions b/‎blog/amd-on-tensorwave/index.html‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎blog/archive/ambassador-program/index.html‎
Lines changed: 2 additions & 2 deletions b/‎blog/archive/ambassador-program/index.html‎
Lines changed: 2 additions & 2 deletions
@@ -155,4 +155,11 @@ window.addEventListener("DOMContentLoaded", function() {
             }
         });
     })
+
+    document.querySelectorAll('a[href^="http"]').forEach(link => {
+        if (!link.href.includes(location.hostname)) {
+          link.setAttribute('target', '_blank');
+          link.setAttribute('rel', 'noopener noreferrer');
+        }
+      });
 })()
@@ -1350,7 +1350,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) {
         visibility: visible;
     }*/
 
-    .twemoji.external {
+    /* .twemoji.external {
         position: relative;
         top: 2.5px;
         height: 18.5px;
@@ -1364,7 +1364,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) {
         position: relative;
         top: 1.5px;
         margin-right: -7px;
-    }
+    } */
 
     /*.md-tabs__item:nth-child(6) .md-tabs__link:before {
         position: relative;
@@ -1801,3 +1801,33 @@ img.border {
     font-size: 12px !important;;
     padding: 30px !important;
 }
+
+/* External link indicator */
+a[href^="http"]:not(:where(
+  /* exclude http:// dstack links */
+  [href^="http://dstack.ai"],
+  /* exclude https://dstack.ai links */
+  [href^="https://dstack.ai"],
+)):after {
+  content: '';
+  display: inline-block;
+  width: 18.5px;
+  height: 18.5px;
+  margin-left: 0.15em;
+  vertical-align: -0.2em;
+  background-color: currentColor;
+  mask-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"></path></svg>');
+  mask-size: 100%;
+  mask-repeat: no-repeat;
+  mask-position: center;
+  -webkit-mask-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"></path></svg>');
+  -webkit-mask-size: 100%;
+  -webkit-mask-repeat: no-repeat;
+  -webkit-mask-position: center;
+  text-decoration: none;
+}
+
+/* Exclude links inside .md-social */
+.md-social a[href^="http"]:after {
+  display: none;
+}
@@ -327,7 +327,7 @@
     margin-right: -7px;
 }
 
-.md-button-secondary.external:after {
+/* .md-button-secondary.external:after {
     content: url('data:image/svg+xml,<svg fill="rgba(0, 0, 0, 0.87)" xmlns="http://www.w3.org/2000/svg" width="20px" height="20px" viewBox="0 0 16 16"><polygon points="5 4.31 5 5.69 9.33 5.69 2.51 12.51 3.49 13.49 10.31 6.67 10.31 11 11.69 11 11.69 4.31 5 4.31" data-v-e1bdab2c=""></polygon></svg>');
     line-height: 14px;
     margin-left: 5px;
@@ -343,7 +343,7 @@
     position: relative;
     top: 2.5px;
     margin-right: -7px;
-}
+} */
 
 .md-header__buttons .md-button-secondary,
 .md-typeset .md-button-secondary,
@@ -702,13 +702,13 @@
     line-height: 32px;
 }
 
-.tx-landing__highlights_grid h3.external:after {
+/* .tx-landing__highlights_grid h3.external:after {
     content: url('data:image/svg+xml,<svg fill="black" xmlns="http://www.w3.org/2000/svg" width="22px" height="22px" viewBox="0 0 16 16"><polygon points="5 4.31 5 5.69 9.33 5.69 2.51 12.51 3.49 13.49 10.31 6.67 10.31 11 11.69 11 11.69 4.31 5 4.31" data-v-e1bdab2c=""></polygon></svg>');
     margin-left: 2px;
     position: relative;
     top: 3px;
     margin-right: -7px;
-}
+} */
 
 .tx-landing__highlights_grid p {
     font-size: 16px;
 
@@ -4000,7 +4000,7 @@
 <h1 id="benchmarking-llama-31-405b-on-8x-amd-mi300x-gpus">Benchmarking Llama 3.1 405B on 8x AMD MI300X GPUs<a class="headerlink" href="#benchmarking-llama-31-405b-on-8x-amd-mi300x-gpus" title="Permanent link">&para;</a></h1>
 <p>At <code>dstack</code>, we've been adding support for AMD GPUs with <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleets</a>, 
 so we saw this as a great chance to test our integration by benchmarking AMD GPUs. Our friends at 
-<a href="https://hotaisle.xyz/" target="_blank">Hot Aisle <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>, who build top-tier 
+<a href="https://hotaisle.xyz/">Hot Aisle</a>, who build top-tier 
 bare metal compute for AMD GPUs, kindly provided the hardware for the benchmark.</p>
 <p><img src="https://dstack.ai/static-assets/static-assets/images/dstack-hotaisle-amd-mi300x-prompt-v5.png" width="750" /></p>
 <!-- more -->
@@ -4085,7 +4085,7 @@ <h2 id="benchmark-setup">Benchmark setup<a class="headerlink" href="#benchmark-s
 <summary>TGI</summary>
 <p>The <code>ghcr.io/huggingface/text-generation-inference:sha-11d7af7-rocm</code> Docker image was used.</p>
 </details>
-<p>For conducting the tests, we've been using the <a href="https://github.com/vllm-project/vllm/blob/main/benchmarks/benchmark_serving.py" target="_blank"><code>benchmark_serving</code> <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a> provided by vLLM. </p>
+<p>For conducting the tests, we've been using the <a href="https://github.com/vllm-project/vllm/blob/main/benchmarks/benchmark_serving.py"><code>benchmark_serving</code></a> provided by vLLM. </p>
 <h2 id="observations">Observations<a class="headerlink" href="#observations" title="Permanent link">&para;</a></h2>
 <h3 id="tokensec-per-batch-size">Token/sec per batch size<a class="headerlink" href="#tokensec-per-batch-size" title="Permanent link">&para;</a></h3>
 <p>TGI consistently exceeds vLLM in token throughput across all batch sizes, with the performance difference growing larger
@@ -4127,7 +4127,7 @@ <h3 id="vram-consumption">VRAM consumption<a class="headerlink" href="#vram-cons
 <p>When considering VRAM consumption right after loading model weights, TGI allocates approximately 28% less VRAM compared
 to vLLM.</p>
 <p><img src="https://raw.githubusercontent.com/dstackai/benchmarks/refs/heads/main/amd/inference/gpu_vram_tgi_vllm.png" width="750" /></p>
-<p>This difference may be related to how vLLM <a href="https://docs.vllm.ai/en/latest/models/performance.html" target="_blank">pre-allocates GPU cache <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.</p>
+<p>This difference may be related to how vLLM <a href="https://docs.vllm.ai/en/latest/models/performance.html">pre-allocates GPU cache</a>.</p>
 <h2 id="conclusion">Conclusion<a class="headerlink" href="#conclusion" title="Permanent link">&para;</a></h2>
 <ol>
 <li>For small sequence lengths, starting with a batch size of 64, TGI significantly outperforms vLLM in terms of throughput and TTFT.</li>
@@ -4153,17 +4153,17 @@ <h2 id="whats-next">What's next?<a class="headerlink" href="#whats-next" title="
 </blockquote>
 <h3 id="source-code">Source code<a class="headerlink" href="#source-code" title="Permanent link">&para;</a></h3>
 <p>The source code used for this benchmark can be found in our 
-<a href="https://github.com/dstackai/benchmarks/tree/main/amd/inference" target="_blank">GitHub repo <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.</p>
+<a href="https://github.com/dstackai/benchmarks/tree/main/amd/inference">GitHub repo</a>.</p>
 <p>If you have questions, feedback, or want to help improve the benchmark, please reach out to our team.</p>
 <h2 id="thanks-to-our-friends">Thanks to our friends<a class="headerlink" href="#thanks-to-our-friends" title="Permanent link">&para;</a></h2>
 <h3 id="hot-aisle">Hot Aisle<a class="headerlink" href="#hot-aisle" title="Permanent link">&para;</a></h3>
-<p><a href="https://hotaisle.xyz/" target="_blank">Hot Aisle <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a> 
+<p><a href="https://hotaisle.xyz/">Hot Aisle</a> 
 is the primary sponsor of this benchmark, and we are sincerely grateful for their hardware and support.  </p>
 <p>If you'd like to use top-tier bare metal compute with AMD GPUs, we recommend going
 with Hot Aisle. Once you gain access to a cluster, it can be easily accessed via <code>dstack</code>'s <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleet</a> easily.</p>
 <h3 id="runpod">RunPod<a class="headerlink" href="#runpod" title="Permanent link">&para;</a></h3>
 <p>If you’d like to use on-demand compute with AMD GPUs at affordable prices, you can configure <code>dstack</code> to
-use <a href="https://runpod.io/" target="_blank">RunPod <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>. In
+use <a href="https://runpod.io/">RunPod</a>. In
 this case, <code>dstack</code> will be able to provision fleets automatically when you run dev environments, tasks, and
 services.</p>
 
 
@@ -3882,14 +3882,14 @@ <h2 id="specification">Specification<a class="headerlink" href="#specification"
 <p>One of the main advantages of the <code>MI300X</code> is its VRAM. For example, with the <code>H100 SXM</code>, you wouldn't be able to fit the FP16
 version of Llama 3.1 405B into a single node with 8 GPUs—you'd have to use FP8 instead. However, with the <code>MI300X</code>, you
 can fit FP16 into a single node with 8 GPUs, and for FP8, you'd only need 4 GPUs.</p>
-<p>With the <a href="https://github.com/dstackai/dstack/releases/0.18.11rc1" target="_blank">latest update <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>,
+<p>With the <a href="https://github.com/dstackai/dstack/releases/0.18.11rc1">latest update</a>,
 you can now specify an AMD GPU under <code>resources</code>. Below are a few examples.</p>
 <h2 id="configuration">Configuration<a class="headerlink" href="#configuration" title="Permanent link">&para;</a></h2>
 <div class="tabbed-set tabbed-alternate" data-tabs="1:2"><input checked="checked" id="service" name="__tabbed_1" type="radio" /><input id="dev-environment" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="service">Service</label><label for="dev-environment">Dev environment</label></div>
 <div class="tabbed-content">
 <div class="tabbed-block">
 <p>Here's an example of a <a href="../../docs/concepts/services/">service</a> that deploys
-Llama 3.1 70B in FP16 using <a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd" target="_blank">TGI <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.</p>
+Llama 3.1 70B in FP16 using <a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd">TGI</a>.</p>
 <p><div editor-title="examples/inference/tgi/amd/service.dstack.yml"> </p>
 <div class="highlight"><pre><span></span><code><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">service</span>
 <span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">amd-service-tgi</span>
@@ -3917,7 +3917,7 @@ <h2 id="configuration">Configuration<a class="headerlink" href="#configuration"
 </div>
 <div class="tabbed-block">
 <p>Here's an example of a <a href="../../docs/concepts/dev-environments/">dev environment</a> using
-<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd" target="_blank">TGI <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>'s
+<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd">TGI</a>'s
 Docker image:</p>
 <div class="highlight"><pre><span></span><code><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">dev-environment</span>
 <span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">amd-dev-tgi</span>
@@ -3955,14 +3955,14 @@ <h2 id="configuration">Configuration<a class="headerlink" href="#configuration"
 <h2 id="whats-next">What's next?<a class="headerlink" href="#whats-next" title="Permanent link">&para;</a></h2>
 <ol>
 <li>The examples above demonstrate the use of
-<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd" target="_blank">TGI <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>. 
+<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd">TGI</a>. 
 AMD accelerators can also be used with other frameworks like vLLM, Ollama, etc., and we'll be adding more examples soon.</li>
 <li>RunPod is the first cloud provider where dstack supports AMD. More cloud providers will be supported soon as well.</li>
-<li>Want to give RunPod and <code>dstack</code> a try? Make sure you've signed up for <a href="https://www.runpod.io/" target="_blank">RunPod <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>, 
+<li>Want to give RunPod and <code>dstack</code> a try? Make sure you've signed up for <a href="https://www.runpod.io/">RunPod</a>, 
    then <a href="../../docs/reference/server/config.yml/#runpod">set up</a> the <code>dstack server</code>. </li>
 </ol>
 <blockquote>
-<p>Have questioned or feedback? Join our <a href="https://discord.gg/u8SmfwPpMd" target="_blank">Discord <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a> 
+<p>Have questioned or feedback? Join our <a href="https://discord.gg/u8SmfwPpMd">Discord</a> 
 server.</p>
 </blockquote>
 
 
@@ -3885,7 +3885,7 @@ <h1 id="using-ssh-fleets-with-tensorwaves-private-amd-cloud">Using SSH fleets wi
 <p>Since last month, when we introduced support for private clouds and data centers, it has become easier to use <code>dstack</code>
 to orchestrate AI containers with any AI cloud vendor, whether they provide on-demand compute or reserved clusters.</p>
 <p>In this tutorial, we’ll walk you through how <code>dstack</code> can be used with
-<a href="https://tensorwave.com/" target="_blank">TensorWave <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a> using
+<a href="https://tensorwave.com/">TensorWave</a> using
 <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleets</a>.</p>
 <p><img src="https://dstack.ai/static-assets/static-assets/images/dstack-tensorwave-v2.png" width="630"/></p>
 <!-- more -->
@@ -4071,7 +4071,7 @@ <h2 id="see-it-in-action">See it in action<a class="headerlink" href="#see-it-in
 <ol>
 <li>See <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleets</a></li>
 <li>Read about <a href="../../docs/concepts/dev-environments/">dev environments</a>, <a href="../../docs/concepts/tasks/">tasks</a>, and <a href="../../docs/concepts/services/">services</a></li>
-<li>Join <a href="https://discord.gg/u8SmfwPpMd">Discord <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a></li>
+<li>Join <a href="https://discord.gg/u8SmfwPpMd">Discord</a></li>
 </ol>
 </div>
 
 
@@ -3801,10 +3801,10 @@ <h2 id="how-to-apply">How to apply?<a class="headerlink" href="#how-to-apply" ti
     data-tally-overlay="1">
     Get involved
 </a></p>
-<p>Have questions? Reach out via <a href="https://discord.gg/u8SmfwPpMd" target="_blank">Discord <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>!</p>
+<p>Have questions? Reach out via <a href="https://discord.gg/u8SmfwPpMd">Discord</a>!</p>
 <blockquote>
 <p>💜 In the meantime, we’re thrilled to
-welcome <a href="https://x.com/algo_diver" target="_blank">Park Chansung <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>, the
+welcome <a href="https://x.com/algo_diver">Park Chansung</a>, the
 first <code>dstack</code> ambassador.</p>
 </blockquote>