Skip to content
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ steps:
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
timeout_in_minutes: 90
matrix:
setup:
julia:
Expand Down
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Expand All @@ -27,6 +28,7 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[extensions]
TensorKitAdaptExt = "Adapt"
TensorKitAMDGPUExt = "AMDGPU"
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
TensorKitChainRulesCoreExt = "ChainRulesCore"
TensorKitFiniteDifferencesExt = "FiniteDifferences"
Expand All @@ -35,6 +37,7 @@ TensorKitMooncakeExt = "Mooncake"
[compat]
Adapt = "4"
AllocCheck = "0.2.3"
AMDGPU = "2"
Aqua = "0.6, 0.7, 0.8"
ArgParse = "1.2.0"
CUDA = "5.9"
Expand Down Expand Up @@ -67,6 +70,7 @@ julia = "1.10"
[extras]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Expand All @@ -86,4 +90,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]
test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "AMDGPU", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]
20 changes: 20 additions & 0 deletions ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module TensorKitAMDGPUExt

using AMDGPU, AMDGPU.rocBLAS, AMDGPU.rocSOLVER, LinearAlgebra
using AMDGPU: @allowscalar
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!

using TensorKit
using TensorKit.Factorizations
using TensorKit.Strided
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
using TensorKit.Strided
using Strided

Why not just use Strided?

using TensorKit.Factorizations: AbstractAlgorithm
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is from MatrixAlgebraKit

using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
import TensorKit: randisometry, rand, randn
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rand and randn are from either Random or Base I think...


using TensorKit: MatrixAlgebraKit
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
using TensorKit: MatrixAlgebraKit
using MatrixAlgebraKit


using Random

include("roctensormap.jl")

end
166 changes: 166 additions & 0 deletions ext/TensorKitAMDGPUExt/roctensormap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0}

const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}}

function ROCTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
return ROCTensorMap{T, S, N₁, N₂}(ROCArray{T}(t.data), space(t))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: ROCVector{T}}
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
h_t = TensorKit.project_symmetric!(h_t, Array(data))
# verify result
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function AMDGPU.$fname(
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain)
) where {S <: IndexSpace}
return AMDGPU.$fname(codomain domain)
end
function AMDGPU.$fname(
::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain)
) where {T, S <: IndexSpace}
return AMDGPU.$fname(T, codomain domain)
end
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
t = ROCTensorMap{T}(undef, V)
fill!(t, $felt(T))
return t
end
end
end

for randfun in (:rocrand, :rocrandn)
randfun! = Symbol(randfun, :!)
@eval begin
# converting `codomain` and `domain` into `HomSpace`
function $randfun(
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {S <: IndexSpace}
return $randfun(codomain domain)
end
function $randfun(
::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {T, S <: IndexSpace}
return $randfun(T, codomain domain)
end
function $randfun(
rng::Random.AbstractRNG, ::Type{T},
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {T, S <: IndexSpace}
return $randfun(rng, T, codomain domain)
end

# filling in default eltype
$randfun(V::TensorMapSpace) = $randfun(Float64, V)
function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
return $randfun(rng, Float64, V)
end

# filling in default rng
function $randfun(::Type{T}, V::TensorMapSpace) where {T}
return $randfun(Random.default_rng(), T, V)
end

# implementation
function $randfun(
rng::Random.AbstractRNG, ::Type{T},
V::TensorMapSpace
) where {T}
t = ROCTensorMap{T}(undef, V)
$randfun!(rng, t)
return t
end

function $randfun!(rng::Random.AbstractRNG, t::ROCTensorMap)
for (_, b) in blocks(t)
$randfun!(rng, b)
end
Comment on lines +89 to +91
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (_, b) in blocks(t)
$randfun!(rng, b)
end
$randfun!(rng, b.data)

Since we know this is <:TensorMap, might as well use that information :)

return t
end
end
end

# Scalar implementation
#-----------------------
function TensorKit.scalar(t::ROCTensorMap{T, S, 0, 0}) where {T, S}
inds = findall(!iszero, t.data)
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function Base.convert(
TT::Type{ROCTensorMap{T, S, N₁, N₂}},
t::AbstractTensorMap{<:Any, S, N₁, N₂}
) where {T, S, N₁, N₂}
if typeof(t) === TT
return t
else
tnew = TT(undef, space(t))
return copy!(tnew, t)
end
end

function LinearAlgebra.isposdef(t::ROCTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
for (c, b) in blocks(t)
# do our own hermitian check
isherm = MatrixAlgebraKit.ishermitian(b)
isherm || return false
isposdef(Hermitian(b)) || return false
end
return true
end

function Base.promote_rule(
::Type{<:TT₁},
::Type{<:TT₂}
) where {
S, N₁, N₂, TTT₁, TTT₂,
TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂},
TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂},
}
T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
return ROCTensorMap{T, S, N₁, N₂}
end

# ROCTensorMap exponentation:
function TensorKit.exp!(t::ROCTensorMap)
domain(t) == codomain(t) ||
error("Exponential of a tensor only exist when domain == codomain.")
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian AMDGPU tensors"))
for (c, b) in blocks(t)
copy!(b, parent(Base.exp(Hermitian(b))))
end
return t
end

# functions that don't map ℝ to (a subset of) ℝ
for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
sf = string(f)
@eval function Base.$f(t::ROCTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain"))
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian AMDGPU tensors"))
T = complex(float(scalartype(t)))
tf = similar(t, T)
for (c, b) in blocks(t)
copy!(block(tf, c), parent($f(Hermitian(b))))
end
return tf
end
end
Loading
Loading