Skip to content

Commit 77b2f3f

Browse files
Deploying to gh-pages from @ dstackai/dstack@0219c0b 🚀
1 parent bfa750d commit 77b2f3f

File tree

123 files changed

+850
-813
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+850
-813
lines changed

assets/images/social/examples.png

-338 Bytes
Loading

assets/images/social/partners.png

-427 Bytes
Loading

assets/javascripts/extra.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,11 @@ window.addEventListener("DOMContentLoaded", function() {
155155
}
156156
});
157157
})
158+
159+
document.querySelectorAll('a[href^="http"]').forEach(link => {
160+
if (!link.href.includes(location.hostname)) {
161+
link.setAttribute('target', '_blank');
162+
link.setAttribute('rel', 'noopener noreferrer');
163+
}
164+
});
158165
})()

assets/stylesheets/extra.css

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,7 +1350,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) {
13501350
visibility: visible;
13511351
}*/
13521352

1353-
.twemoji.external {
1353+
/* .twemoji.external {
13541354
position: relative;
13551355
top: 2.5px;
13561356
height: 18.5px;
@@ -1364,7 +1364,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) {
13641364
position: relative;
13651365
top: 1.5px;
13661366
margin-right: -7px;
1367-
}
1367+
} */
13681368

13691369
/*.md-tabs__item:nth-child(6) .md-tabs__link:before {
13701370
position: relative;
@@ -1801,3 +1801,33 @@ img.border {
18011801
font-size: 12px !important;;
18021802
padding: 30px !important;
18031803
}
1804+
1805+
/* External link indicator */
1806+
a[href^="http"]:not(:where(
1807+
/* exclude http:// dstack links */
1808+
[href^="http://dstack.ai"],
1809+
/* exclude https://dstack.ai links */
1810+
[href^="https://dstack.ai"],
1811+
)):after {
1812+
content: '';
1813+
display: inline-block;
1814+
width: 18.5px;
1815+
height: 18.5px;
1816+
margin-left: 0.15em;
1817+
vertical-align: -0.2em;
1818+
background-color: currentColor;
1819+
mask-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"></path></svg>');
1820+
mask-size: 100%;
1821+
mask-repeat: no-repeat;
1822+
mask-position: center;
1823+
-webkit-mask-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"></path></svg>');
1824+
-webkit-mask-size: 100%;
1825+
-webkit-mask-repeat: no-repeat;
1826+
-webkit-mask-position: center;
1827+
text-decoration: none;
1828+
}
1829+
1830+
/* Exclude links inside .md-social */
1831+
.md-social a[href^="http"]:after {
1832+
display: none;
1833+
}

assets/stylesheets/landing.css

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@
327327
margin-right: -7px;
328328
}
329329

330-
.md-button-secondary.external:after {
330+
/* .md-button-secondary.external:after {
331331
content: url('data:image/svg+xml,<svg fill="rgba(0, 0, 0, 0.87)" xmlns="http://www.w3.org/2000/svg" width="20px" height="20px" viewBox="0 0 16 16"><polygon points="5 4.31 5 5.69 9.33 5.69 2.51 12.51 3.49 13.49 10.31 6.67 10.31 11 11.69 11 11.69 4.31 5 4.31" data-v-e1bdab2c=""></polygon></svg>');
332332
line-height: 14px;
333333
margin-left: 5px;
@@ -343,7 +343,7 @@
343343
position: relative;
344344
top: 2.5px;
345345
margin-right: -7px;
346-
}
346+
} */
347347

348348
.md-header__buttons .md-button-secondary,
349349
.md-typeset .md-button-secondary,
@@ -702,13 +702,13 @@
702702
line-height: 32px;
703703
}
704704

705-
.tx-landing__highlights_grid h3.external:after {
705+
/* .tx-landing__highlights_grid h3.external:after {
706706
content: url('data:image/svg+xml,<svg fill="black" xmlns="http://www.w3.org/2000/svg" width="22px" height="22px" viewBox="0 0 16 16"><polygon points="5 4.31 5 5.69 9.33 5.69 2.51 12.51 3.49 13.49 10.31 6.67 10.31 11 11.69 11 11.69 4.31 5 4.31" data-v-e1bdab2c=""></polygon></svg>');
707707
margin-left: 2px;
708708
position: relative;
709709
top: 3px;
710710
margin-right: -7px;
711-
}
711+
} */
712712

713713
.tx-landing__highlights_grid p {
714714
font-size: 16px;

blog/amd-mi300x-inference-benchmark/index.html

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4000,7 +4000,7 @@
40004000
<h1 id="benchmarking-llama-31-405b-on-8x-amd-mi300x-gpus">Benchmarking Llama 3.1 405B on 8x AMD MI300X GPUs<a class="headerlink" href="#benchmarking-llama-31-405b-on-8x-amd-mi300x-gpus" title="Permanent link">&para;</a></h1>
40014001
<p>At <code>dstack</code>, we've been adding support for AMD GPUs with <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleets</a>,
40024002
so we saw this as a great chance to test our integration by benchmarking AMD GPUs. Our friends at
4003-
<a href="https://hotaisle.xyz/" target="_blank">Hot Aisle <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>, who build top-tier
4003+
<a href="https://hotaisle.xyz/">Hot Aisle</a>, who build top-tier
40044004
bare metal compute for AMD GPUs, kindly provided the hardware for the benchmark.</p>
40054005
<p><img src="https://dstack.ai/static-assets/static-assets/images/dstack-hotaisle-amd-mi300x-prompt-v5.png" width="750" /></p>
40064006
<!-- more -->
@@ -4085,7 +4085,7 @@ <h2 id="benchmark-setup">Benchmark setup<a class="headerlink" href="#benchmark-s
40854085
<summary>TGI</summary>
40864086
<p>The <code>ghcr.io/huggingface/text-generation-inference:sha-11d7af7-rocm</code> Docker image was used.</p>
40874087
</details>
4088-
<p>For conducting the tests, we've been using the <a href="https://github.com/vllm-project/vllm/blob/main/benchmarks/benchmark_serving.py" target="_blank"><code>benchmark_serving</code> <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a> provided by vLLM. </p>
4088+
<p>For conducting the tests, we've been using the <a href="https://github.com/vllm-project/vllm/blob/main/benchmarks/benchmark_serving.py"><code>benchmark_serving</code></a> provided by vLLM. </p>
40894089
<h2 id="observations">Observations<a class="headerlink" href="#observations" title="Permanent link">&para;</a></h2>
40904090
<h3 id="tokensec-per-batch-size">Token/sec per batch size<a class="headerlink" href="#tokensec-per-batch-size" title="Permanent link">&para;</a></h3>
40914091
<p>TGI consistently exceeds vLLM in token throughput across all batch sizes, with the performance difference growing larger
@@ -4127,7 +4127,7 @@ <h3 id="vram-consumption">VRAM consumption<a class="headerlink" href="#vram-cons
41274127
<p>When considering VRAM consumption right after loading model weights, TGI allocates approximately 28% less VRAM compared
41284128
to vLLM.</p>
41294129
<p><img src="https://raw.githubusercontent.com/dstackai/benchmarks/refs/heads/main/amd/inference/gpu_vram_tgi_vllm.png" width="750" /></p>
4130-
<p>This difference may be related to how vLLM <a href="https://docs.vllm.ai/en/latest/models/performance.html" target="_blank">pre-allocates GPU cache <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.</p>
4130+
<p>This difference may be related to how vLLM <a href="https://docs.vllm.ai/en/latest/models/performance.html">pre-allocates GPU cache</a>.</p>
41314131
<h2 id="conclusion">Conclusion<a class="headerlink" href="#conclusion" title="Permanent link">&para;</a></h2>
41324132
<ol>
41334133
<li>For small sequence lengths, starting with a batch size of 64, TGI significantly outperforms vLLM in terms of throughput and TTFT.</li>
@@ -4153,17 +4153,17 @@ <h2 id="whats-next">What's next?<a class="headerlink" href="#whats-next" title="
41534153
</blockquote>
41544154
<h3 id="source-code">Source code<a class="headerlink" href="#source-code" title="Permanent link">&para;</a></h3>
41554155
<p>The source code used for this benchmark can be found in our
4156-
<a href="https://github.com/dstackai/benchmarks/tree/main/amd/inference" target="_blank">GitHub repo <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.</p>
4156+
<a href="https://github.com/dstackai/benchmarks/tree/main/amd/inference">GitHub repo</a>.</p>
41574157
<p>If you have questions, feedback, or want to help improve the benchmark, please reach out to our team.</p>
41584158
<h2 id="thanks-to-our-friends">Thanks to our friends<a class="headerlink" href="#thanks-to-our-friends" title="Permanent link">&para;</a></h2>
41594159
<h3 id="hot-aisle">Hot Aisle<a class="headerlink" href="#hot-aisle" title="Permanent link">&para;</a></h3>
4160-
<p><a href="https://hotaisle.xyz/" target="_blank">Hot Aisle <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>
4160+
<p><a href="https://hotaisle.xyz/">Hot Aisle</a>
41614161
is the primary sponsor of this benchmark, and we are sincerely grateful for their hardware and support. </p>
41624162
<p>If you'd like to use top-tier bare metal compute with AMD GPUs, we recommend going
41634163
with Hot Aisle. Once you gain access to a cluster, it can be easily accessed via <code>dstack</code>'s <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleet</a> easily.</p>
41644164
<h3 id="runpod">RunPod<a class="headerlink" href="#runpod" title="Permanent link">&para;</a></h3>
41654165
<p>If you’d like to use on-demand compute with AMD GPUs at affordable prices, you can configure <code>dstack</code> to
4166-
use <a href="https://runpod.io/" target="_blank">RunPod <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>. In
4166+
use <a href="https://runpod.io/">RunPod</a>. In
41674167
this case, <code>dstack</code> will be able to provision fleets automatically when you run dev environments, tasks, and
41684168
services.</p>
41694169

blog/amd-on-runpod/index.html

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3882,14 +3882,14 @@ <h2 id="specification">Specification<a class="headerlink" href="#specification"
38823882
<p>One of the main advantages of the <code>MI300X</code> is its VRAM. For example, with the <code>H100 SXM</code>, you wouldn't be able to fit the FP16
38833883
version of Llama 3.1 405B into a single node with 8 GPUs—you'd have to use FP8 instead. However, with the <code>MI300X</code>, you
38843884
can fit FP16 into a single node with 8 GPUs, and for FP8, you'd only need 4 GPUs.</p>
3885-
<p>With the <a href="https://github.com/dstackai/dstack/releases/0.18.11rc1" target="_blank">latest update <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>,
3885+
<p>With the <a href="https://github.com/dstackai/dstack/releases/0.18.11rc1">latest update</a>,
38863886
you can now specify an AMD GPU under <code>resources</code>. Below are a few examples.</p>
38873887
<h2 id="configuration">Configuration<a class="headerlink" href="#configuration" title="Permanent link">&para;</a></h2>
38883888
<div class="tabbed-set tabbed-alternate" data-tabs="1:2"><input checked="checked" id="service" name="__tabbed_1" type="radio" /><input id="dev-environment" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="service">Service</label><label for="dev-environment">Dev environment</label></div>
38893889
<div class="tabbed-content">
38903890
<div class="tabbed-block">
38913891
<p>Here's an example of a <a href="../../docs/concepts/services/">service</a> that deploys
3892-
Llama 3.1 70B in FP16 using <a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd" target="_blank">TGI <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.</p>
3892+
Llama 3.1 70B in FP16 using <a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd">TGI</a>.</p>
38933893
<p><div editor-title="examples/inference/tgi/amd/service.dstack.yml"> </p>
38943894
<div class="highlight"><pre><span></span><code><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">service</span>
38953895
<span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">amd-service-tgi</span>
@@ -3917,7 +3917,7 @@ <h2 id="configuration">Configuration<a class="headerlink" href="#configuration"
39173917
</div>
39183918
<div class="tabbed-block">
39193919
<p>Here's an example of a <a href="../../docs/concepts/dev-environments/">dev environment</a> using
3920-
<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd" target="_blank">TGI <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>'s
3920+
<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd">TGI</a>'s
39213921
Docker image:</p>
39223922
<div class="highlight"><pre><span></span><code><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">dev-environment</span>
39233923
<span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">amd-dev-tgi</span>
@@ -3955,14 +3955,14 @@ <h2 id="configuration">Configuration<a class="headerlink" href="#configuration"
39553955
<h2 id="whats-next">What's next?<a class="headerlink" href="#whats-next" title="Permanent link">&para;</a></h2>
39563956
<ol>
39573957
<li>The examples above demonstrate the use of
3958-
<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd" target="_blank">TGI <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>.
3958+
<a href="https://huggingface.co/docs/text-generation-inference/en/installation_amd">TGI</a>.
39593959
AMD accelerators can also be used with other frameworks like vLLM, Ollama, etc., and we'll be adding more examples soon.</li>
39603960
<li>RunPod is the first cloud provider where dstack supports AMD. More cloud providers will be supported soon as well.</li>
3961-
<li>Want to give RunPod and <code>dstack</code> a try? Make sure you've signed up for <a href="https://www.runpod.io/" target="_blank">RunPod <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>,
3961+
<li>Want to give RunPod and <code>dstack</code> a try? Make sure you've signed up for <a href="https://www.runpod.io/">RunPod</a>,
39623962
then <a href="../../docs/reference/server/config.yml/#runpod">set up</a> the <code>dstack server</code>. </li>
39633963
</ol>
39643964
<blockquote>
3965-
<p>Have questioned or feedback? Join our <a href="https://discord.gg/u8SmfwPpMd" target="_blank">Discord <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>
3965+
<p>Have questioned or feedback? Join our <a href="https://discord.gg/u8SmfwPpMd">Discord</a>
39663966
server.</p>
39673967
</blockquote>
39683968

blog/amd-on-tensorwave/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3885,7 +3885,7 @@ <h1 id="using-ssh-fleets-with-tensorwaves-private-amd-cloud">Using SSH fleets wi
38853885
<p>Since last month, when we introduced support for private clouds and data centers, it has become easier to use <code>dstack</code>
38863886
to orchestrate AI containers with any AI cloud vendor, whether they provide on-demand compute or reserved clusters.</p>
38873887
<p>In this tutorial, we’ll walk you through how <code>dstack</code> can be used with
3888-
<a href="https://tensorwave.com/" target="_blank">TensorWave <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a> using
3888+
<a href="https://tensorwave.com/">TensorWave</a> using
38893889
<a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleets</a>.</p>
38903890
<p><img src="https://dstack.ai/static-assets/static-assets/images/dstack-tensorwave-v2.png" width="630"/></p>
38913891
<!-- more -->
@@ -4071,7 +4071,7 @@ <h2 id="see-it-in-action">See it in action<a class="headerlink" href="#see-it-in
40714071
<ol>
40724072
<li>See <a href="../../docs/concepts/fleets/#ssh-fleets">SSH fleets</a></li>
40734073
<li>Read about <a href="../../docs/concepts/dev-environments/">dev environments</a>, <a href="../../docs/concepts/tasks/">tasks</a>, and <a href="../../docs/concepts/services/">services</a></li>
4074-
<li>Join <a href="https://discord.gg/u8SmfwPpMd">Discord <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a></li>
4074+
<li>Join <a href="https://discord.gg/u8SmfwPpMd">Discord</a></li>
40754075
</ol>
40764076
</div>
40774077

blog/archive/ambassador-program/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3801,10 +3801,10 @@ <h2 id="how-to-apply">How to apply?<a class="headerlink" href="#how-to-apply" ti
38013801
data-tally-overlay="1">
38023802
Get involved
38033803
</a></p>
3804-
<p>Have questions? Reach out via <a href="https://discord.gg/u8SmfwPpMd" target="_blank">Discord <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>!</p>
3804+
<p>Have questions? Reach out via <a href="https://discord.gg/u8SmfwPpMd">Discord</a>!</p>
38053805
<blockquote>
38063806
<p>💜 In the meantime, we’re thrilled to
3807-
welcome <a href="https://x.com/algo_diver" target="_blank">Park Chansung <span class="twemoji external"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m11.93 5 2.83 2.83L5 17.59 6.42 19l9.76-9.75L19 12.07V5z"/></svg></span></a>, the
3807+
welcome <a href="https://x.com/algo_diver">Park Chansung</a>, the
38083808
first <code>dstack</code> ambassador.</p>
38093809
</blockquote>
38103810

0 commit comments

Comments
 (0)