Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ steps:
queue: "juliagpu"
cuda: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30
timeout_in_minutes: 60
matrix:
setup:
julia:
Expand All @@ -36,7 +36,7 @@ steps:
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30
timeout_in_minutes: 60
matrix:
setup:
julia:
Expand Down
23 changes: 21 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,33 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[sources]
GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
MatrixAlgebraKit = {rev = "main", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}

[extensions]
TensorKitAMDGPUExt = "AMDGPU"
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
TensorKitChainRulesCoreExt = "ChainRulesCore"
TensorKitFiniteDifferencesExt = "FiniteDifferences"

[compat]
AMDGPU = "2"
Adapt = "4"
Aqua = "0.6, 0.7, 0.8"
ArgParse = "1.2.0"
CUDA = "5.9"
ChainRulesCore = "1"
ChainRulesTestUtils = "1"
Combinatorics = "1"
FiniteDifferences = "0.12"
GPUArrays = "11.3.1"
LRUCache = "1.0.2"
LinearAlgebra = "1"
MatrixAlgebraKit = "0.6.0"
Expand All @@ -48,21 +61,27 @@ TestExtras = "0.2,0.3"
TupleTools = "1.1"
VectorInterface = "0.4.8, 0.5"
Zygote = "0.7"
cuTENSOR = "2"
julia = "1.10"

[extras]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
test = ["ArgParse", "Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
108 changes: 108 additions & 0 deletions ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
module TensorKitAMDGPUExt

using AMDGPU, AMDGPU.rocBLAS, LinearAlgebra
using AMDGPU: @allowscalar
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!

using TensorKit
using TensorKit.Factorizations
using TensorKit.Strided
using TensorKit.Factorizations: AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype

using TensorKit.MatrixAlgebraKit

using Random

include("roctensormap.jl")

const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.Mem.HIPBuffer}}

"""
ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
# expert mode: select storage type `A`
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}

Construct a `DiagonalTensorMap` with uninitialized data.
"""
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
return ROCDiagonalTensorMap{T}(undef, domain(V))
end
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
length(V) == 1 ||
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
return ROCDiagonalTensorMap{T}(undef, only(V))
end
Comment on lines +28 to +37
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have these generalizations of the constructor for the normal (i.e. non-gpu) DiagonalTensorMap right? Is this useful?

function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace}
return ROCDiagonalTensorMap{T, S}(undef, V)
end
ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V)

function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S}
return ROCDiagonalTensorMap{T, S}(data, V)
end

function ROCDiagonalTensorMap(data::Vector{T}, V::S) where {T, S}
return ROCDiagonalTensorMap{T, S}(ROCVector{T}(data), V)
end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm)
V_cod = fuse(codomain(t))
V_dom = fuse(domain(t))
U = similar(t, codomain(t) ← V_cod)
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom)
Vᴴ = similar(t, V_dom ← domain(t))
return U, S, Vᴴ
end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could consider introducing a similar_diagonal(t, T) function/hook in the base implementations, such that we could overload just that function and avoid having to copy the boilerplate here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[edit] I hadn't seen you already more or less did this for the base implementation, in which case these functions might not be necessary anymore?

end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm)
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
U = similar(t, codomain(t) ← V_cod)
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
Vᴴ = similar(t, V_dom ← domain(t))
return U, S, Vᴴ
end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm)
V_D = fuse(domain(t))
T = real(scalartype(t))
D = ROCDiagonalTensorMap{T}(undef, V_D)
V = similar(t, codomain(t) ← V_D)
return D, V
end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm)
V_D = fuse(domain(t))
Tc = complex(scalartype(t))
D = ROCDiagonalTensorMap{Tc}(undef, V_D)
V = similar(t, Tc, codomain(t) ← V_D)
return D, V
end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
V_D = fuse(domain(t))
T = real(scalartype(t))
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
end

function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
V_D = fuse(domain(t))
Tc = complex(scalartype(t))
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
end


# TODO
# add VectorInterface extensions for proper AMDGPU promotion
function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
return Base.promote_op(add, Tx, Ty, Tα, Tβ)
end

end
Loading
Loading