QuantumKitHub · kshyatt · Jan 5, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -36,7 +36,7 @@ steps:
       rocm: "*"
       rocmgpu: "*"
     if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 60
+    timeout_in_minutes: 90
     matrix:
       setup:
         julia:

diff --git a/Project.toml b/Project.toml
@@ -19,6 +19,7 @@ VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
 [weakdeps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
@@ -27,6 +28,7 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [extensions]
 TensorKitAdaptExt = "Adapt"
+TensorKitAMDGPUExt = "AMDGPU"
 TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
 TensorKitChainRulesCoreExt = "ChainRulesCore"
 TensorKitFiniteDifferencesExt = "FiniteDifferences"
@@ -35,6 +37,7 @@ TensorKitMooncakeExt = "Mooncake"
 [compat]
 Adapt = "4"
 AllocCheck = "0.2.3"
+AMDGPU = "2"
 Aqua = "0.6, 0.7, 0.8"
 ArgParse = "1.2.0"
 CUDA = "5.9"
@@ -67,6 +70,7 @@ julia = "1.10"
 [extras]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
@@ -86,4 +90,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [targets]
-test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]
+test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "AMDGPU", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]
diff --git a/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
@@ -0,0 +1,20 @@
+module TensorKitAMDGPUExt
+
+using AMDGPU, AMDGPU.rocBLAS, AMDGPU.rocSOLVER, LinearAlgebra
+using AMDGPU: @allowscalar
+import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!
+
+using TensorKit
+using TensorKit.Factorizations
+using TensorKit.Strided
-using TensorKit.Strided
+using Strided
-using TensorKit.Strided
+using Strided
+using TensorKit.Factorizations: AbstractAlgorithm
+using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
+import TensorKit: randisometry, rand, randn
+
+using TensorKit: MatrixAlgebraKit
-using TensorKit: MatrixAlgebraKit
+using MatrixAlgebraKit
-using TensorKit: MatrixAlgebraKit
+using MatrixAlgebraKit
+
+using Random
+
+include("roctensormap.jl")
+
+end
diff --git a/ext/TensorKitAMDGPUExt/roctensormap.jl b/ext/TensorKitAMDGPUExt/roctensormap.jl
@@ -0,0 +1,166 @@
+const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
+const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0}
+
+const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}}
+
+function ROCTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
+    return ROCTensorMap{T, S, N₁, N₂}(ROCArray{T}(t.data), space(t))
+end
+
+# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
+function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: ROCVector{T}}
+    h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
+    h_t = TensorKit.project_symmetric!(h_t, Array(data))
+    # verify result
+    isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
+        throw(ArgumentError("Data has non-zero elements at incompatible positions"))
+    return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
+end
+
+for (fname, felt) in ((:zeros, :zero), (:ones, :one))
+    @eval begin
+        function AMDGPU.$fname(
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain)
+            ) where {S <: IndexSpace}
+            return AMDGPU.$fname(codomain ← domain)
+        end
+        function AMDGPU.$fname(
+                ::Type{T}, codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain)
+            ) where {T, S <: IndexSpace}
+            return AMDGPU.$fname(T, codomain ← domain)
+        end
+        AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
+        function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
+            t = ROCTensorMap{T}(undef, V)
+            fill!(t, $felt(T))
+            return t
+        end
+    end
+end
+
+for randfun in (:rocrand, :rocrandn)
+    randfun! = Symbol(randfun, :!)
+    @eval begin
+        # converting `codomain` and `domain` into `HomSpace`
+        function $randfun(
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain),
+            ) where {S <: IndexSpace}
+            return $randfun(codomain ← domain)
+        end
+        function $randfun(
+                ::Type{T}, codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain),
+            ) where {T, S <: IndexSpace}
+            return $randfun(T, codomain ← domain)
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain),
+            ) where {T, S <: IndexSpace}
+            return $randfun(rng, T, codomain ← domain)
+        end
+
+        # filling in default eltype
+        $randfun(V::TensorMapSpace) = $randfun(Float64, V)
+        function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
+            return $randfun(rng, Float64, V)
+        end
+
+        # filling in default rng
+        function $randfun(::Type{T}, V::TensorMapSpace) where {T}
+            return $randfun(Random.default_rng(), T, V)
+        end
+
+        # implementation
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                V::TensorMapSpace
+            ) where {T}
+            t = ROCTensorMap{T}(undef, V)
+            $randfun!(rng, t)
+            return t
+        end
+
+        function $randfun!(rng::Random.AbstractRNG, t::ROCTensorMap)
+            for (_, b) in blocks(t)
+                $randfun!(rng, b)
+            end
-            for (_, b) in blocks(t)
-                $randfun!(rng, b)
-            end
+            $randfun!(rng, b.data)
-            for (_, b) in blocks(t)
-                $randfun!(rng, b)
-            end
+            $randfun!(rng, b.data)
+            return t
+        end
+    end
+end
+
+# Scalar implementation
+#-----------------------
+function TensorKit.scalar(t::ROCTensorMap{T, S, 0, 0}) where {T, S}
+    inds = findall(!iszero, t.data)
+    return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
+end
+
+function Base.convert(
+        TT::Type{ROCTensorMap{T, S, N₁, N₂}},
+        t::AbstractTensorMap{<:Any, S, N₁, N₂}
+    ) where {T, S, N₁, N₂}
+    if typeof(t) === TT
+        return t
+    else
+        tnew = TT(undef, space(t))
+        return copy!(tnew, t)
+    end
+end
+
+function LinearAlgebra.isposdef(t::ROCTensorMap)
+    domain(t) == codomain(t) ||
+        throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
+    InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
+    for (c, b) in blocks(t)
+        # do our own hermitian check
+        isherm = MatrixAlgebraKit.ishermitian(b)
+        isherm || return false
+        isposdef(Hermitian(b)) || return false
+    end
+    return true
+end
+
+function Base.promote_rule(
+        ::Type{<:TT₁},
+        ::Type{<:TT₂}
+    ) where {
+        S, N₁, N₂, TTT₁, TTT₂,
+        TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂},
+        TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂},
+    }
+    T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
+    return ROCTensorMap{T, S, N₁, N₂}
+end
+
+# ROCTensorMap exponentation:
+function TensorKit.exp!(t::ROCTensorMap)
+    domain(t) == codomain(t) ||
+        error("Exponential of a tensor only exist when domain == codomain.")
+    !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian AMDGPU tensors"))
+    for (c, b) in blocks(t)
+        copy!(b, parent(Base.exp(Hermitian(b))))
+    end
+    return t
+end
+
+# functions that don't map ℝ to (a subset of) ℝ
+for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
+    sf = string(f)
+    @eval function Base.$f(t::ROCTensorMap)
+        domain(t) == codomain(t) ||
+            throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain"))
+        !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian AMDGPU tensors"))
+        T = complex(float(scalartype(t)))
+        tf = similar(t, T)
+        for (c, b) in blocks(t)
+            copy!(block(tf, c), parent($f(Hermitian(b))))
+        end
+        return tf
+    end
+end