diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6c414a31b..250cc4ce3 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -15,7 +15,7 @@ steps: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 matrix: setup: julia: @@ -36,7 +36,7 @@ steps: rocm: "*" rocmgpu: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 matrix: setup: julia: diff --git a/Project.toml b/Project.toml index 6c22acc60..a45e112fc 100644 --- a/Project.toml +++ b/Project.toml @@ -18,20 +18,33 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" + +[sources] +GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"} +MatrixAlgebraKit = {rev = "main", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"} [extensions] +TensorKitAMDGPUExt = "AMDGPU" +TensorKitCUDAExt = ["CUDA", "cuTENSOR"] TensorKitChainRulesCoreExt = "ChainRulesCore" TensorKitFiniteDifferencesExt = "FiniteDifferences" [compat] +AMDGPU = "2" +Adapt = "4" Aqua = "0.6, 0.7, 0.8" ArgParse = "1.2.0" +CUDA = "5.9" ChainRulesCore = "1" ChainRulesTestUtils = "1" Combinatorics = "1" FiniteDifferences = "0.12" +GPUArrays = "11.3.1" LRUCache = "1.0.2" LinearAlgebra = "1" MatrixAlgebraKit = "0.6.0" @@ -48,21 +61,27 @@ TestExtras = "0.2,0.3" TupleTools = "1.1" VectorInterface = "0.4.8, 0.5" Zygote = "0.7" +cuTENSOR = "2" julia = "1.10" [extras] -ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] -test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["ArgParse", "Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] diff --git a/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl new file mode 100644 index 000000000..a9e5bd021 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl @@ -0,0 +1,108 @@ +module TensorKitAMDGPUExt + +using AMDGPU, AMDGPU.rocBLAS, LinearAlgebra +using AMDGPU: @allowscalar +import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn! + +using TensorKit +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype + +using TensorKit.MatrixAlgebraKit + +using Random + +include("roctensormap.jl") + +const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.Mem.HIPBuffer}} + +""" + ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace} + # expert mode: select storage type `A` + DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}} + +Construct a `DiagonalTensorMap` with uninitialized data. +""" +function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T} + (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) || + throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices")) + return ROCDiagonalTensorMap{T}(undef, domain(V)) +end +function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T} + length(V) == 1 || + throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`")) + return ROCDiagonalTensorMap{T}(undef, only(V)) +end +function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace} + return ROCDiagonalTensorMap{T, S}(undef, V) +end +ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V) + +function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S} + return ROCDiagonalTensorMap{T, S}(data, V) +end + +function ROCDiagonalTensorMap(data::Vector{T}, V::S) where {T, S} + return ROCDiagonalTensorMap{T, S}(ROCVector{T}(data), V) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm) + V_cod = fuse(codomain(t)) + V_dom = fuse(domain(t)) + U = similar(t, codomain(t) ← V_cod) + S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm) + V_cod = infimum(fuse(codomain(t)), fuse(domain(t))) + return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm) + V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) + U = similar(t, codomain(t) ← V_cod) + S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + D = ROCDiagonalTensorMap{T}(undef, V_D) + V = similar(t, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + D = ROCDiagonalTensorMap{Tc}(undef, V_D) + V = similar(t, Tc, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + return D = ROCDiagonalTensorMap{Tc}(undef, V_D) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + return D = ROCDiagonalTensorMap{Tc}(undef, V_D) +end + + +# TODO +# add VectorInterface extensions for proper AMDGPU promotion +function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ} + return Base.promote_op(add, Tx, Ty, Tα, Tβ) +end + +end diff --git a/ext/TensorKitAMDGPUExt/roctensormap.jl b/ext/TensorKitAMDGPUExt/roctensormap.jl new file mode 100644 index 000000000..f4564c1e2 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/roctensormap.jl @@ -0,0 +1,257 @@ +const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}} +const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0} + +const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}} + +function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedROCArray}) + if TorA <: ROCArray + return TensorMap{eltype(TorA), S, N₁, N₂, ROCVector{eltype(TorA), AMDGPU.Mem.HIPBuffer}} + else + throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:ROCVector{<:Number}`")) + end +end + +function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return ROCTensorMap{T, S, N₁, N₂}(undef, V) +end + +function ROCTensorMap{T}( + ::UndefInitializer, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return ROCTensorMap{T}(undef, codomain ← domain) +end +function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T, S} + return ROCTensorMap{T}(undef, V ← one(V)) +end +# constructor starting from block data +""" + ROCTensorMap(data::AbstractDict{<:Sector,<:ROCMatrix}, codomain::ProductSpace{S,N₁}, + domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂} + ROCTensorMap(data, codomain ← domain) + ROCTensorMap(data, domain → codomain) + +Construct a `ROCTensorMap` by explicitly specifying its block data. + +## Arguments +- `data::AbstractDict{<:Sector,<:ROCMatrix}`: dictionary containing the block data for + each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`. +- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type + `S<:ElementarySpace`. +- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type + `S<:ElementarySpace`. + +Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref) +using the syntax `codomain ← domain` or `domain → codomain`. +""" +function ROCTensorMap( + data::AbstractDict{<:Sector, <:ROCArray}, + V::TensorMapSpace{S, N₁, N₂} + ) where {S, N₁, N₂} + T = eltype(valtype(data)) + t = ROCTensorMap{T}(undef, V) + for (c, b) in blocks(t) + haskey(data, c) || throw(SectorMismatch("no data for block sector $c")) + datac = data[c] + size(datac) == size(b) || + throw(DimensionMismatch("wrong size of block for sector $c")) + copy!(b, datac) + end + for (c, b) in data + c ∈ blocksectors(t) || isempty(b) || + throw(SectorMismatch("data for block sector $c not expected")) + end + return t +end +function ROCTensorMap{T}( + data::DenseVector{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return ROCTensorMap(data, codomain ← domain) +end +function ROCTensorMap( + data::AbstractDict{<:Sector, <:ROCMatrix}, codom::TensorSpace{S}, + dom::TensorSpace{S} + ) where {S} + return ROCTensorMap(data, codom ← dom) +end + +function ROCTensorMap(ts::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} + return ROCTensorMap{T, S, N₁, N₂}(ROCArray(ts.data), ts.space) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function AMDGPU.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return AMDGPU.$fname(codomain ← domain) + end + function AMDGPU.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return AMDGPU.$fname(T, codomain ← domain) + end + AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V) + function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = ROCTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:rocrand, :rocrandn) + randfun! = Symbol(randfun, :!) + @eval begin + $randfun(codomain::TensorSpace{S}, domain::TensorSpace{S} = one(codomain)) where {S} = $randfun(codomain ← domain) + function $randfun(::Type{T}, codomain::TensorSpace{S}, domain::TensorSpace{S} = one(codomain)) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = ROCTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +# converters +# ---------- +function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol, Any}) + try + codomain = eval(TensorKit, Meta.parse(d[:codomain])) + domain = eval(TensorKit, Meta.parse(d[:domain])) + data = SectorDict(eval(TensorKit, Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data]) + return ROCTensorMap(data, codomain, domain) + catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main + codomain = Base.eval(Main, Meta.parse(d[:codomain])) + domain = Base.eval(Main, Meta.parse(d[:domain])) + data = SectorDict( + Base.eval(Main, Meta.parse(c)) => ROCArray(b) + for (c, b) in d[:data] + ) + return ROCTensorMap(data, codomain, domain) + end +end + +function Base.convert(::Type{ROCTensorMap}, t::AbstractTensorMap) + return copy!(ROCTensorMap{scalartype(t)}(undef, space(t)), t) +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::ROCTensorMap) + + # TODO: should scalar only work if N₁ == N₂ == 0? + return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ? + first(blocks(t))[2][1, 1] : throw(DimensionMismatch()) +end + +TensorKit.scalartype(A::StridedROCArray{T}) where {T} = T +TensorKit.scalartype(::Type{<:ROCTensorMap{T}}) where {T} = T +TensorKit.scalartype(::Type{<:ROCArray{T}}) where {T} = T + +function TensorKit.similarstoragetype(TT::Type{<:ROCTensorMap{TTT, S, N₁, N₂}}, ::Type{T}) where {TTT, T, S, N₁, N₂} + return ROCVector{T, AMDGPU.Mem.HIPBuffer} +end + +function Base.convert( + TT::Type{ROCTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function Base.copy!(tdst::ROCTensorMap{T, S, N₁, N₂}, tsrc::ROCTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂} + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.copy!(tdst::ROCTensorMap, tsrc::TensorKit.AdjointTensorMap) + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return ROCTensorMap{T, S, N₁, N₂} +end + +function LinearAlgebra.isposdef(t::ROCTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + +# Conversion to ROCArray: +#---------------------- +# probably not optimized for speed, only for checking purposes +function Base.convert(::Type{ROCArray}, t::AbstractTensorMap) + I = sectortype(t) + if I === Trivial + convert(ROCArray, t[]) + else + cod = codomain(t) + dom = domain(t) + T = sectorscalartype(I) <: Complex ? complex(scalartype(t)) : + sectorscalartype(I) <: Integer ? scalartype(t) : float(scalartype(t)) + A = AMDGPU.zeros(T, dims(cod)..., dims(dom)...) + for (f₁, f₂) in fusiontrees(t) + F = convert(ROCArray, (f₁, f₂)) + Aslice = StridedView(A)[axes(cod, f₁.uncoupled)..., axes(dom, f₂.uncoupled)...] + add!(Aslice, StridedView(TensorKit._kron(convert(ROCArray, t[f₁, f₂]), F))) + end + return A + end +end diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl new file mode 100644 index 000000000..443e0fff0 --- /dev/null +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -0,0 +1,106 @@ +module TensorKitCUDAExt + +using CUDA, CUDA.CUBLAS, LinearAlgebra +using CUDA: @allowscalar +using cuTENSOR: cuTENSOR +import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! + +using TensorKit +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype +import TensorKit: randisometry + +using TensorKit.MatrixAlgebraKit + +using Random + +include("cutensormap.jl") + +const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}} + +""" + CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace} + # expert mode: select storage type `A` + DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}} + +Construct a `DiagonalTensorMap` with uninitialized data. +""" +function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T} + (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) || + throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices")) + return CuDiagonalTensorMap{T}(undef, domain(V)) +end +function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T} + length(V) == 1 || + throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`")) + return CuDiagonalTensorMap{T}(undef, only(V)) +end +function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace} + return CuDiagonalTensorMap{T, S}(undef, V) +end +CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V) + +function CuDiagonalTensorMap(data::CuVector{T}, V::S) where {T, S} + return CuDiagonalTensorMap{T, S}(data, V) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::CuDiagonalTensorMap, alg::DiagonalAlgorithm) + V_cod = fuse(codomain(t)) + V_dom = fuse(domain(t)) + U = similar(t, codomain(t) ← V_cod) + S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_cod = infimum(fuse(codomain(t)), fuse(domain(t))) + return CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm) + V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) + U = similar(t, codomain(t) ← V_cod) + S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + D = CuDiagonalTensorMap{T}(undef, V_D) + V = similar(t, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + D = CuDiagonalTensorMap{Tc}(undef, V_D) + V = similar(t, Tc, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + return D = CuDiagonalTensorMap{Tc}(undef, V_D) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + return D = CuDiagonalTensorMap{Tc}(undef, V_D) +end + + +# TODO +# add VectorInterface extensions for proper CUDA promotion +function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ} + return Base.promote_op(add, Tx, Ty, Tα, Tβ) +end + +end diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl new file mode 100644 index 000000000..127eb6cf4 --- /dev/null +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -0,0 +1,328 @@ +const CuTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, CuVector{T, CUDA.DeviceMemory}} +const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0} + +const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, CuTensorMap{T, S, N₁, N₂}} + +function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedCuArray}) + if TorA <: CuArray + return TensorMap{eltype(TorA), S, N₁, N₂, CuVector{eltype(TorA), CUDA.DeviceMemory}} + else + throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:CuVector{<:Number}`")) + end +end + +function CuTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T, S, N₁, N₂}(undef, V) +end + +function CuTensorMap{T}( + ::UndefInitializer, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return CuTensorMap{T}(undef, codomain ← domain) +end +function CuTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T, S} + return CuTensorMap{T}(undef, V ← one(V)) +end +# constructor starting from block data +""" + CuTensorMap(data::AbstractDict{<:Sector,<:CuMatrix}, codomain::ProductSpace{S,N₁}, + domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂} + CuTensorMap(data, codomain ← domain) + CuTensorMap(data, domain → codomain) + +Construct a `CuTensorMap` by explicitly specifying its block data. + +## Arguments +- `data::AbstractDict{<:Sector,<:CuMatrix}`: dictionary containing the block data for + each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`. +- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type + `S<:ElementarySpace`. +- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type + `S<:ElementarySpace`. + +Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref) +using the syntax `codomain ← domain` or `domain → codomain`. +""" +function CuTensorMap( + data::AbstractDict{<:Sector, <:CuArray}, + V::TensorMapSpace{S, N₁, N₂} + ) where {S, N₁, N₂} + T = eltype(valtype(data)) + t = CuTensorMap{T}(undef, V) + for (c, b) in blocks(t) + haskey(data, c) || throw(SectorMismatch("no data for block sector $c")) + datac = data[c] + size(datac) == size(b) || + throw(DimensionMismatch("wrong size of block for sector $c")) + copy!(b, datac) + end + for (c, b) in data + c ∈ blocksectors(t) || isempty(b) || + throw(SectorMismatch("data for block sector $c not expected")) + end + return t +end +function CuTensorMap{T}( + data::DenseVector{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return CuTensorMap(data, codomain ← domain) +end +function CuTensorMap( + data::AbstractDict{<:Sector, <:CuMatrix}, codom::TensorSpace{S}, + dom::TensorSpace{S} + ) where {S} + return CuTensorMap(data, codom ← dom) +end +function CuTensorMap(data::DenseVector{T}, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T, S, N₁, N₂}(data, V) +end +function CuTensorMap(data::CuArray{T}, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T, S, N₁, N₂}(vec(data), V) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function CUDA.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return CUDA.$fname(codomain ← domain) + end + function CUDA.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return CUDA.$fname(T, codomain ← domain) + end + CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V) + function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = CuTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:curand, :curandn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {S <: IndexSpace} + return $randfun(codomain ← domain) + end + function $randfun( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = CuTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +for randfun in (:rand, :randn, :randisometry) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + ::Type{A}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {A <: CuArray, S <: IndexSpace} + return $randfun(A, codomain ← domain) + end + function $randfun( + ::Type{T}, ::Type{A}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace, A <: CuArray{T}} + return $randfun(T, A, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, ::Type{A}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace, A <: CuArray{T}} + return $randfun(rng, T, A, codomain ← domain) + end + + # accepting single `TensorSpace` + $randfun(::Type{A}, codomain::TensorSpace) where {A <: CuArray} = $randfun(A, codomain ← one(codomain)) + function $randfun(::Type{T}, ::Type{A}, codomain::TensorSpace) where {T, A <: CuArray{T}} + return $randfun(T, A, codomain ← one(codomain)) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + ::Type{A}, codomain::TensorSpace + ) where {T, A <: CuArray{T}} + return $randfun(rng, T, A, codomain ← one(domain)) + end + + # filling in default eltype + $randfun(::Type{A}, V::TensorMapSpace) where {A <: CuArray} = $randfun(eltype(A), A, V) + function $randfun(rng::Random.AbstractRNG, ::Type{A}, V::TensorMapSpace) where {A <: CuArray} + return $randfun(rng, eltype(A), A, V) + end + + # filling in default rng + function $randfun(::Type{T}, ::Type{A}, V::TensorMapSpace) where {T, A <: CuArray{T}} + return $randfun(Random.default_rng(), T, A, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + ::Type{A}, V::TensorMapSpace + ) where {T, A <: CuArray{T}} + t = CuTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +# converters +# ---------- +function Base.convert(::Type{CuTensorMap}, d::Dict{Symbol, Any}) + try + codomain = eval(TensorKit, Meta.parse(d[:codomain])) + domain = eval(TensorKit, Meta.parse(d[:domain])) + data = SectorDict(eval(TensorKit, Meta.parse(c)) => CuArray(b) for (c, b) in d[:data]) + return CuTensorMap(data, codomain, domain) + catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main + codomain = Base.eval(Main, Meta.parse(d[:codomain])) + domain = Base.eval(Main, Meta.parse(d[:domain])) + data = SectorDict( + Base.eval(Main, Meta.parse(c)) => CuArray(b) + for (c, b) in d[:data] + ) + return CuTensorMap(data, codomain, domain) + end +end + +function Base.convert(::Type{CuTensorMap}, t::AbstractTensorMap) + return copy!(CuTensorMap{scalartype(t)}(undef, space(t)), t) +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::CuTensorMap) + + # TODO: should scalar only work if N₁ == N₂ == 0? + return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ? + first(blocks(t))[2][1, 1] : throw(DimensionMismatch()) +end + +TensorKit.scalartype(A::StridedCuArray{T}) where {T} = T +TensorKit.scalartype(::Type{<:CuTensorMap{T}}) where {T} = T +TensorKit.scalartype(::Type{<:CuArray{T}}) where {T} = T + +function TensorKit.similarstoragetype(TT::Type{<:CuTensorMap{TTT, S, N₁, N₂}}, ::Type{T}) where {TTT, T, S, N₁, N₂} + return CuVector{T, CUDA.DeviceMemory} +end + +function Base.convert( + TT::Type{CuTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function Base.copy!(tdst::CuTensorMap{T, S, N₁, N₂}, tsrc::CuTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂} + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.copy!(tdst::CuTensorMap, tsrc::TensorKit.AdjointTensorMap) + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: CuTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: CuTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return CuTensorMap{T, S, N₁, N₂} +end + +function LinearAlgebra.isposdef(t::CuTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + + +# Conversion to CuArray: +#---------------------- +# probably not optimized for speed, only for checking purposes +function Base.convert(::Type{CuArray}, t::AbstractTensorMap) + I = sectortype(t) + if I === Trivial + convert(CuArray, t[]) + else + cod = codomain(t) + dom = domain(t) + T = sectorscalartype(I) <: Complex ? complex(scalartype(t)) : + sectorscalartype(I) <: Integer ? scalartype(t) : float(scalartype(t)) + A = CUDA.zeros(T, dims(cod)..., dims(dom)...) + for (f₁, f₂) in fusiontrees(t) + F = convert(CuArray, (f₁, f₂)) + Aslice = StridedView(A)[axes(cod, f₁.uncoupled)..., axes(dom, f₂.uncoupled)...] + add!(Aslice, StridedView(TensorKit._kron(convert(CuArray, t[f₁, f₂]), F))) + end + return A + end +end diff --git a/src/auxiliary/random.jl b/src/auxiliary/random.jl index 022809518..9aed16509 100644 --- a/src/auxiliary/random.jl +++ b/src/auxiliary/random.jl @@ -1,17 +1,20 @@ """ - randisometry([::Type{T}=Float64], dims::Dims{2}) -> Array{T,2} - randhaar([::Type{T}=Float64], dims::Dims{2}) -> Array{T,2} + randisometry([::Type{T}=Float64], [::Type{A}=Matrix{T}], dims::Dims{2}) -> A + randhaar([::Type{T}=Float64], [::Type{A}=Matrix{T}], dims::Dims{2}) -> A Create a random isometry of size `dims`, uniformly distributed according to the Haar measure. See also [`randuniform`](@ref) and [`randnormal`](@ref). """ -randisometry(dims::Base.Dims{2}) = randisometry(Float64, dims) +randisometry(dims::Base.Dims{2}) = randisometry(Float64, Matrix{Float64}, dims) function randisometry(::Type{T}, dims::Base.Dims{2}) where {T <: Number} return randisometry(Random.default_rng(), T, dims) end -function randisometry(rng::Random.AbstractRNG, ::Type{T}, dims::Base.Dims{2}) where {T <: Number} - return randisometry!(rng, Matrix{T}(undef, dims)) +function randisometry(::Type{T}, ::Type{A}, dims::Base.Dims{2}) where {T <: Number, A <: AbstractArray{T}} + return randisometry(Random.default_rng(), T, A, dims) +end +function randisometry(rng::Random.AbstractRNG, ::Type{T}, ::Type{A}, dims::Base.Dims{2}) where {T <: Number, A <: AbstractArray{T}} + return randisometry!(rng, A(undef, dims)) end randisometry!(A::AbstractMatrix) = randisometry!(Random.default_rng(), A) diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl index 29f27203c..f9c2742e5 100644 --- a/src/factorizations/adjoint.jl +++ b/src/factorizations/adjoint.jl @@ -6,6 +6,8 @@ _adjoint(alg::MAK.LAPACK_HouseholderQR) = MAK.LAPACK_HouseholderLQ(; alg.kwargs. _adjoint(alg::MAK.LAPACK_HouseholderLQ) = MAK.LAPACK_HouseholderQR(; alg.kwargs...) _adjoint(alg::MAK.LAPACK_HouseholderQL) = MAK.LAPACK_HouseholderRQ(; alg.kwargs...) _adjoint(alg::MAK.LAPACK_HouseholderRQ) = MAK.LAPACK_HouseholderQL(; alg.kwargs...) +_adjoint(alg::MAK.LQViaTransposedQR) = alg.qr_alg +_adjoint(alg::MAK.CUSOLVER_HouseholderQR) = MAK.LQViaTransposedQR(alg) _adjoint(alg::MAK.PolarViaSVD) = MAK.PolarViaSVD(_adjoint(alg.svd_alg)) _adjoint(alg::AbstractAlgorithm) = alg diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index dce2067ee..fe69bf8d9 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -6,7 +6,7 @@ module Factorizations export copy_oftype, factorisation_scalartype, one!, truncspace using ..TensorKit -using ..TensorKit: AdjointTensorMap, SectorDict, blocktype, foreachblock, one! +using ..TensorKit: AdjointTensorMap, SectorDict, blocktype, foreachblock, one!, similarstoragetype using LinearAlgebra: LinearAlgebra, BlasFloat, Diagonal, svdvals, svdvals!, eigen, eigen!, isposdef, isposdef!, ishermitian @@ -28,7 +28,6 @@ include("diagonal.jl") include("pullbacks.jl") TensorKit.one!(A::AbstractMatrix) = MatrixAlgebraKit.one!(A) - #------------------------------# # LinearAlgebra overloads #------------------------------# diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl index 412fd8761..dc955bdb7 100644 --- a/src/factorizations/matrixalgebrakit.jl +++ b/src/factorizations/matrixalgebrakit.jl @@ -83,14 +83,16 @@ end function MAK.initialize_output(::typeof(svd_compact!), t::AbstractTensorMap, ::AbstractAlgorithm) V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) U = similar(t, codomain(t) ← V_cod) - S = DiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + output_type = similarstoragetype(t, real(scalartype(t))) + S = DiagonalTensorMap{real(scalartype(t)), typeof(V_cod), output_type}(undef, V_cod) Vᴴ = similar(t, V_dom ← domain(t)) return U, S, Vᴴ end function MAK.initialize_output(::typeof(svd_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm) V_cod = infimum(fuse(codomain(t)), fuse(domain(t))) - return DiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + output_storage_type = similarstoragetype(t, real(scalartype(t))) + return DiagonalTensorMap{real(scalartype(t)), typeof(V_cod), output_storage_type}(undef, V_cod) end # Eigenvalue decomposition @@ -98,7 +100,7 @@ end function MAK.initialize_output(::typeof(eigh_full!), t::AbstractTensorMap, ::AbstractAlgorithm) V_D = fuse(domain(t)) T = real(scalartype(t)) - D = DiagonalTensorMap{T}(undef, V_D) + D = DiagonalTensorMap{T, typeof(V_D), similarstoragetype(t, T)}(undef, V_D) V = similar(t, codomain(t) ← V_D) return D, V end @@ -106,7 +108,7 @@ end function MAK.initialize_output(::typeof(eig_full!), t::AbstractTensorMap, ::AbstractAlgorithm) V_D = fuse(domain(t)) Tc = complex(scalartype(t)) - D = DiagonalTensorMap{Tc}(undef, V_D) + D = DiagonalTensorMap{Tc, typeof(V_D), similarstoragetype(t, Tc)}(undef, V_D) V = similar(t, Tc, codomain(t) ← V_D) return D, V end @@ -114,13 +116,13 @@ end function MAK.initialize_output(::typeof(eigh_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm) V_D = fuse(domain(t)) T = real(scalartype(t)) - return D = DiagonalTensorMap{Tc}(undef, V_D) + return D = DiagonalTensorMap{Tc, typeof(V_D), storagetype(t)}(undef, V_D) end function MAK.initialize_output(::typeof(eig_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm) V_D = fuse(domain(t)) Tc = complex(scalartype(t)) - return D = DiagonalTensorMap{Tc}(undef, V_D) + return D = DiagonalTensorMap{Tc, typeof(V_D), similarstoragetype(t, Tc)}(undef, V_D) end # QR decomposition diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl index 84f2569e0..514801642 100644 --- a/src/factorizations/truncation.jl +++ b/src/factorizations/truncation.jl @@ -67,7 +67,7 @@ function MAK.truncate( Ũ = similar(U, codomain(U) ← V_truncated) truncate_domain!(Ũ, U, ind) - S̃ = DiagonalTensorMap{scalartype(S)}(undef, V_truncated) + S̃ = DiagonalTensorMap{scalartype(S), typeof(V_truncated), storagetype(S)}(undef, V_truncated) truncate_diagonal!(S̃, S, ind) Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ)) truncate_codomain!(Ṽᴴ, Vᴴ, ind) @@ -132,7 +132,7 @@ for f! in (:eig_trunc!, :eigh_trunc!) ind = MAK.findtruncated(diagview(D), strategy) V_truncated = truncate_space(space(D, 1), ind) - D̃ = DiagonalTensorMap{scalartype(D)}(undef, V_truncated) + D̃ = DiagonalTensorMap{scalartype(D), typeof(V_truncated), storagetype(D)}(undef, V_truncated) truncate_diagonal!(D̃, D, ind) Ṽ = similar(V, codomain(V) ← V_truncated) @@ -162,17 +162,15 @@ function _findnexttruncvalue( ) where {I <: Sector} # early return (isempty(S) || all(iszero, values(truncdim))) && return nothing + mapped_keys = map(keys(truncdim)) do c + d = truncdim[c] + return by(S[c][d]) + end if rev - σmin, imin = findmin(keys(truncdim)) do c - d = truncdim[c] - return by(S[c][d]) - end + σmin, imin = findmin(mapped_keys) return σmin, keys(truncdim)[imin] else - σmax, imax = findmax(keys(truncdim)) do c - d = truncdim[c] - return by(S[c][d]) - end + σmax, imax = findmax(mapped_keys) return σmax, keys(truncdim)[imax] end end diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl index 356f5dbf4..f66dfa1c3 100644 --- a/src/tensors/diagonal.jl +++ b/src/tensors/diagonal.jl @@ -271,7 +271,7 @@ function LinearAlgebra.mul!( dC::DiagonalTensorMap, dA::DiagonalTensorMap, dB::DiagonalTensorMap, α::Number, β::Number ) dC.domain == dA.domain == dB.domain || throw(SpaceMismatch()) - mul!(Diagonal(dC.data), Diagonal(dA.data), Diagonal(dB.data), α, β) + @. dC.data = (α * dA.data * dB.data) + β * dC.data return dC end diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index 747a44320..f8d09bbf1 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -272,7 +272,7 @@ function _norm(blockiter, p::Real, init::Real) end elseif p == 2 n² = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2) + return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm(b, 2)^2) end return sqrt(n²) elseif p == 1 @@ -281,7 +281,7 @@ function _norm(blockiter, p::Real, init::Real) end elseif p > 0 nᵖ = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p) + return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm(b, p)^p) end return (nᵖ)^inv(oftype(nᵖ, p)) else @@ -299,7 +299,7 @@ function LinearAlgebra.rank( r = 0 * dim(first(allunits(sectortype(t)))) dim(t) == 0 && return r S = LinearAlgebra.svdvals(t) - tol = max(atol, rtol * maximum(first, values(S))) + tol = max(atol, rtol * mapreduce(maximum, max, values(S))) for (c, b) in S if !isempty(b) r += dim(c) * count(>(tol), b) @@ -317,8 +317,8 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2) return zero(real(float(scalartype(t)))) end S = LinearAlgebra.svdvals(t) - maxS = maximum(first, values(S)) - minS = minimum(last, values(S)) + maxS = mapreduce(maximum, max, values(S)) + minS = mapreduce(minimum, min, values(S)) return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS) else throw(ArgumentError("cond currently only defined for p=2")) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index fe15d819d..8b82cea2e 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -357,7 +357,7 @@ function TensorMap( ) where {S} return TensorMap(data, codom ← dom; kwargs...) end -function Tensor(data::AbstractArray, codom::TensorSpace, ; kwargs...) +function Tensor(data::AbstractArray, codom::TensorSpace; kwargs...) return TensorMap(data, codom ← one(codom); kwargs...) end diff --git a/test/amd/factorizations.jl b/test/amd/factorizations.jl new file mode 100644 index 000000000..8769f4558 --- /dev/null +++ b/test/amd/factorizations.jl @@ -0,0 +1,414 @@ +using Test, TestExtras +using TensorKit +using LinearAlgebra: LinearAlgebra +using AMDGPU + +const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt) +@assert !isnothing(AMDGPUExt) +const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap) +const ROCDiagonalTensorMap = getglobal(AMDGPUExt, :ROCDiagonalTensorMap) + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₃, VfU₁, VfSU₂) + else + (Vtr, VU₁, VCU₁, VSU₂, VfSU₂) + end + else + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) + end +catch + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) +end + +import AMDGPU: rand as rocrand, randn as rocrandn + + +eltypes = (Float32, ComplexF64) + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("Factorizations with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + W = V1 ⊗ V2 + @testset "QR decomposition" begin + @testset "$(typeof(t))($T)" for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, W, V1), rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + Q, R = @constinferred qr_full(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isunitary(Q) + + Q, R = @constinferred qr_compact(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + + Q, R = @constinferred left_orth(t; kind = :qr) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + + N = @constinferred qr_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + N = @constinferred left_null(t; kind = :qr) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes + t = rocrand(T, V1 ⊗ V2, zero(V1)) + + Q, R = @constinferred qr_full(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isunitary(Q) + @test dim(R) == dim(t) == 0 + + Q, R = @constinferred qr_compact(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + Q, R = @constinferred left_orth(t; kind = :qr) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + N = @constinferred qr_null(t) + @test isunitary(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "LQ decomposition" begin + for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, W, V1), rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + L, Q = @constinferred lq_full(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isunitary(Q) + + L, Q = @constinferred lq_compact(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + + L, Q = @constinferred right_orth(t; kind = :lq) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + + Nᴴ = @constinferred lq_null(t) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + for T in eltypes + # empty tensor + t = rocrand(T, zero(V1), V1 ⊗ V2) + + L, Q = @constinferred lq_full(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isunitary(Q) + @test dim(L) == dim(t) == 0 + + L, Q = @constinferred lq_compact(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + L, Q = @constinferred right_orth(t; kind = :lq) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + Nᴴ = @constinferred lq_null(t) + @test isunitary(Nᴴ) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "Polar decomposition" begin + @testset "$(typeof(t))($T)" for T in eltypes, + t in ( + rocrand(T, W, W), + rocrand(T, W, W)', + rocrand(T, W, V1), + rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + @assert domain(t) ≾ codomain(t) + w, p = @constinferred left_polar(t) + @test w * p ≈ t + @test isisometric(w) + @test isposdef(p) + + w, p = @constinferred left_orth(t; kind = :polar) + @test w * p ≈ t + @test isisometric(w) + end + + @testset "$(typeof(t))($T)" for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, V1, W), rocrand(T, W, V1)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + @assert codomain(t) ≾ domain(t) + p, wᴴ = @constinferred right_polar(t) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + @test isposdef(p) + + p, wᴴ = @constinferred right_orth(t; kind = :polar) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + end + end + @testset "SVD" begin + for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', + rocrand(T, W, V1), rocrand(T, V1, W), + rocrand(T, W, V1)', rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + u, s, vᴴ = @constinferred svd_full(t) + @test u * s * vᴴ ≈ t + @test isunitary(u) + @test isunitary(vᴴ) + + u, s, vᴴ = @constinferred svd_compact(t) + @test u * s * vᴴ ≈ t + @test isisometric(u) + @test isposdef(s) + @test isisometric(vᴴ; side = :right) + + s′ = LinearAlgebra.diag(s) + for (c, b) in LinearAlgebra.svdvals(t) + @test b ≈ s′[c] + end + + v, c = @constinferred left_orth(t; kind = :svd) + @test v * c ≈ t + @test isisometric(v) + + N = @constinferred left_null(t; kind = :svd) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + Nᴴ = @constinferred right_null(t; kind = :svd) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes, t in (rocrand(T, W, zero(V1)), rocrand(T, zero(V1), W)) + U, S, Vᴴ = @constinferred svd_full(t) + @test U * S * Vᴴ ≈ t + @test isunitary(U) + @test isunitary(Vᴴ) + + U, S, Vᴴ = @constinferred svd_compact(t) + @test U * S * Vᴴ ≈ t + @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0 + end + end + + @testset "truncated SVD" begin + for T in eltypes, + # AMDGPU doesn't support randn(ComplexF64) + t in ( + ROCTensorMap(randn(T, W, W)), + ROCTensorMap(randn(T, W, W))', + ROCTensorMap(randn(T, W, V1)), + ROCTensorMap(randn(T, V1, W)), + ROCTensorMap(randn(T, W, V1))', + ROCTensorMap(randn(T, V1, W))', + ROCDiagonalTensorMap(randn(T, reduceddim(V1)), V1), + ) + + @constinferred normalize!(t) + + U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc()) + @test U * S * Vᴴ ≈ t + @test isisometric(U) + @test isisometric(Vᴴ; side = :right) + + trunc = truncrank(dim(domain(S)) ÷ 2) + AMDGPU.@allowscalar begin + U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ1' ≈ U1 * S1 + @test isisometric(U1) + @test isisometric(Vᴴ1; side = :right) + @test dim(domain(S1)) <= trunc.howmany + + λ = minimum(minimum, values(LinearAlgebra.diag(S1))) + trunc = trunctol(; atol = λ - 10eps(λ)) + AMDGPU.@allowscalar begin + U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ2' ≈ U2 * S2 + @test isisometric(U2) + @test isisometric(Vᴴ2; side = :right) + @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ + @test U2 ≈ U1 + @test S2 ≈ S1 + @test Vᴴ2 ≈ Vᴴ1 + + trunc = truncspace(space(S2, 1)) + AMDGPU.@allowscalar begin + U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ3' ≈ U3 * S3 + @test isisometric(U3) + @test isisometric(Vᴴ3; side = :right) + @test space(S3, 1) ≾ space(S2, 1) + + trunc = truncerror(; atol = 0.5) + AMDGPU.@allowscalar begin + U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ4' ≈ U4 * S4 + @test isisometric(U4) + @test isisometric(Vᴴ4; side = :right) + @test norm(t - U4 * S4 * Vᴴ4) <= 0.5 + end + end + + #=@testset "Eigenvalue decomposition" begin + for T in eltypes, + t in (rocrand(T, V1, V1), + rocrand(T, W, W), + rocrand(T, W, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1) + ) + + d, v = @constinferred eig_full(t) + @test t * v ≈ v * d + + d′ = LinearAlgebra.diag(d) + for (c, b) in LinearAlgebra.eigvals(t) + @test sort(b; by=abs) ≈ sort(d′[c]; by=abs) + end + + vdv = v' * v + vdv = (vdv + vdv') / 2 + @test @constinferred isposdef(vdv) + t isa ROCDiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map + + AMDGPU.@allowscalar begin # TODO + d, v = @constinferred eig_trunc(t; trunc=truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + + t2 = (t + t') + D, V = eigen(t2) + @test isisometric(V) + D̃, Ṽ = @constinferred eigh_full(t2) + @test D ≈ D̃ + @test V ≈ Ṽ + λ = minimum(minimum(real(LinearAlgebra.diag(b))) + for (c, b) in blocks(D)) + @test cond(Ṽ) ≈ one(real(T)) + @test isposdef(t2) == isposdef(λ) + @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + + add!(t, t') + + d, v = @constinferred eigh_full(t) + @test t * v ≈ v * d + @test isunitary(v) + + λ = minimum(minimum(real(LinearAlgebra.diag(b))) for (c, b) in blocks(d)) + @test cond(v) ≈ one(real(T)) + @test isposdef(t) == isposdef(λ) + @test isposdef(t - λ * one(t) + 0.1 * one(t)) + @test !isposdef(t - λ * one(t) - 0.1 * one(t)) + + AMDGPU.@allowscalar begin + d, v = @constinferred eigh_trunc(t; trunc=truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + end + end=# + + #=@testset "Condition number and rank" begin + for T in eltypes, + t in (rocrand(T, W, W), rocrand(T, W, W)', + rocrand(T, W, V1), rocrand(T, V1, W), + rocrand(T, W, V1)', rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1)) + + d1, d2 = dim(codomain(t)), dim(domain(t)) + @test rank(t) == min(d1, d2) + M = left_null(t) + @test @constinferred(rank(M)) + rank(t) == d1 + Mᴴ = right_null(t) + @test rank(Mᴴ) + rank(t) == d2 + end + for T in eltypes + u = unitary(T, V1 ⊗ V2, V1 ⊗ V2) + @test @constinferred(cond(u)) ≈ one(real(T)) + @test @constinferred(rank(u)) == dim(V1 ⊗ V2) + + t = rocrand(T, zero(V1), W) + @test rank(t) == 0 + t2 = rocrand(T, zero(V1) * zero(V2), zero(V1) * zero(V2)) + @test rank(t2) == 0 + @test cond(t2) == 0.0 + end + for T in eltypes, t in (rocrand(T, W, W), rocrand(T, W, W)') + add!(t, t') + vals = @constinferred LinearAlgebra.eigvals(t) + λmax = maximum(s -> maximum(abs, s), values(vals)) + λmin = minimum(s -> minimum(abs, s), values(vals)) + @test cond(t) ≈ λmax / λmin + end + end=# + end +end diff --git a/test/amd/tensors.jl b/test/amd/tensors.jl new file mode 100644 index 000000000..7c93732eb --- /dev/null +++ b/test/amd/tensors.jl @@ -0,0 +1,614 @@ +using Adapt, AMDGPU +using Test, TestExtras +using TensorKit, Combinatorics +ad = adapt(Array) + +const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt) +@assert !isnothing(AMDGPUExt) +const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap) +const ROCDiagonalTensorMap = getglobal(AMDGPUExt, :ROCDiagonalTensorMap) + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("AMDGPU Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred AMDGPU.zeros(T, W) + AMDGPU.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, ROCVector{T, AMDGPU.Mem.HIPBuffer}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + d = convert(Dict, t) + @test t == convert(ROCTensorMap, d) + end + end + if hasfusiontensor(I) + @timedtestset "Tensor Array conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + W1 = V1 ← one(V1) + W2 = one(V2) ← V2 + W3 = V1 ⊗ V2 ← one(V1) + W4 = V1 ← V2 + W5 = one(V1) ← V1 ⊗ V2 + W6 = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for W in (W1, W2, W3, W4, W5, W6) + for T in (Int, Float32, ComplexF64) + if T == Int + t = ROCTensorMap{T}(undef, W) + for (_, b) in blocks(t) + AMDGPU.@allowscalar AMDGPU.rand!(b, -20:20) + end + else + t = @constinferred AMDGPU.randn(T, W) + end + AMDGPU.@allowscalar begin + a = @constinferred convert(ROCArray, t) + end + b = reshape(a, dim(codomain(W)), dim(domain(W))) + @test t ≈ @constinferred TensorMap(a, W) + @test t ≈ @constinferred TensorMap(b, W) + @test t === @constinferred TensorMap(t.data, W) + end + end + for T in (Int, Float32, ComplexF64) + t = AMDGPU.randn(T, V1 ⊗ V2 ← zero(V1)) + AMDGPU.@allowscalar begin + a = convert(ROCArray, t) + end + @test norm(a) == 0 + end + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + AMDGPU.@allowscalar begin + @test norm(t + t, p) ≈ 2 * norm(t, p) + end + @test norm(t) ≈ norm(t') + + t2 = @constinferred AMDGPU.rand!(similar(t)) + β = rand(T) + @test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(Vector{T}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(Vector{T}, V2 ⊗ V1)) + + w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(Vector{T}, V1) + @test w * w' == (w * w')^2 + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = AMDGPU.rand(T, W) + t2 = @constinferred AMDGPU.rand!(similar(t)) + @test norm(t, 2) ≈ norm(ad(t), 2) + @test dot(t2, t) ≈ dot(ad(t2), ad(t)) + α = rand(T) + @test ad(α * t) ≈ α * ad(t) + @test ad(t + t) ≈ 2 * ad(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred AMDGPU.randn(T, W, W) + @test real(ad(t)) == ad(@constinferred real(t)) + @test imag(ad(t)) == ad(@constinferred imag(t)) + end + end + end + @timedtestset "Tensor conversion" begin + W = V1 ⊗ V2 + t = @constinferred AMDGPU.randn(W ← W) + @test typeof(convert(ROCTensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + t′ = AMDGPU.randn!(similar(t)) + AMDGPU.@allowscalar begin + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + a = ad(t) + AMDGPU.@allowscalar begin + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = permute(t, (p1, p2)) + a2 = ad(t2) + @test a2 ≈ permute(a, (p1, p2)) + @test ad(transpose(t2)) ≈ transpose(a2) + end + + t3 = repartition(t, k) + a3 = ad(t3) + @test a3 ≈ repartition(a, k) + end + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + AMDGPU.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + end + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + AMDGPU.@allowscalar begin + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + AMDGPU.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + end + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + AMDGPU.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + AMDGPU.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + @test ta ≈ tb + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via conversion" begin + A1 = AMDGPU.randn(ComplexF64, V1' * V2', V3') + A2 = AMDGPU.randn(ComplexF64, V3 * V4, V5) + rhoL = AMDGPU.randn(ComplexF64, V1, V1) + rhoR = AMDGPU.randn(ComplexF64, V5, V5)' # test adjoint tensor + H = AMDGPU.randn(ComplexF64, V2 * V4, V2 * V4) + AMDGPU.@allowscalar begin + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) * + A2[b, t2, c'] * rhoR[c', c] * + H[s1, s2, t1, t2] + + @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] * + conj(ad(A1)[a', t1, b]) * + ad(A2)[b, t2, c'] * + ad(rhoR)[c', c] * + ad(H)[s1, s2, t1, t2] + end + @test HrA12array ≈ ad(HrA12) + end + end + @timedtestset "Index flipping: test flipping inverse" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + AMDGPU.@allowscalar begin + for i in 1:4 + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #= + @timedtestset "Index flipping: test via explicit flip" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) # TODO wrong storage type + AMDGPU.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end =# + @timedtestset "Index flipping: test via contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + AMDGPU.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + AMDGPU.@allowscalar begin + @test t1 \ one(t1) ≈ inv(t1) + end + # @test one(t1) / t1 ≈ pinv(t1) # pinv not available in AMDGPU + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + # pinv not available in AMDGPU + # tp = pinv(t) * t + # @test tp ≈ tp * tp + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via conversion" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + d1 = dim(W1) + d2 = dim(W2) + AMDGPU.@allowscalar begin + At1 = reshape(convert(ROCArray, t1), d1, d1) + At2 = reshape(convert(ROCArray, t2), d2, d2) + At = reshape(convert(ROCArray, t), d1, d2) + end + @test ad(t1 * t) ≈ ad(t1) * ad(t) + @test ad(t1' * t) ≈ ad(t1)' * ad(t) + @test ad(t2 * t') ≈ ad(t2) * ad(t)' + @test ad(t2' * t') ≈ ad(t2)' * ad(t)' + @test ad(inv(t1)) ≈ inv(ad(t1)) + # pinv not available in AMDGPU + #@test ad(pinv(t)) ≈ pinv(ad(t)) + + if T == Float32 || T == ComplexF32 + continue + end + + @test ad(t1 \ t) ≈ ad(t1) \ ad(t) + @test ad(t1' \ t) ≈ ad(t1)' \ ad(t) + @test ad(t2 \ t') ≈ ad(t2) \ ad(t)' + @test ad(t2' \ t') ≈ ad(t2)' \ ad(t)' + + @test ad(t2 / t) ≈ ad(t2) / ad(t) + @test ad(t2' / t) ≈ ad(t2)' / ad(t) + @test ad(t1 / t') ≈ ad(t1) / ad(t)' + @test ad(t1' / t') ≈ ad(t1)' / ad(t)' + end + end + end + #= + @timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = AMDGPU.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# # TODO + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = AMDGPU.randn(T, W, W) + s = dim(W) + AMDGPU.@allowscalar begin + expt = @constinferred exp(t) + @test ad(expt) ≈ exp(ad(t)) + + @test (@constinferred sqrt(t))^2 ≈ t + @test ad(sqrt(t)) ≈ sqrt(ad(t)) + + @test exp(@constinferred log(expt)) ≈ expt + @test ad(log(expt)) ≈ log(ad(expt)) + + @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t) + + t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8 + end + end + end + end + # Sylvester not defined for AMDGPU + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = AMDGPU.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = AMDGPU.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = AMDGPU.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V2) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + At = ad(t) + AMDGPU.@allowscalar begin + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + @test t ≈ t′ + end + end + end + @timedtestset "Tensor absorpsion" begin + # absorbing small into large + t1 = AMDGPU.zeros(V1 ⊕ V1, V2 ⊗ V3) + t2 = AMDGPU.rand(V1, V2 ⊗ V3) + AMDGPU.@allowscalar begin + t3 = @constinferred absorb(t1, t2) + end + @test norm(t3) ≈ norm(t2) + @test norm(t1) == 0 + AMDGPU.@allowscalar begin + t4 = @constinferred absorb!(t1, t2) + end + @test t1 === t4 + @test t3 ≈ t4 + + # absorbing large into small + t1 = AMDGPU.rand(V1 ⊕ V1, V2 ⊗ V3) + t2 = AMDGPU.zeros(V1, V2 ⊗ V3) + AMDGPU.@allowscalar begin + t3 = @constinferred absorb(t2, t1) + end + @test norm(t3) < norm(t1) + @test norm(t2) == 0 + AMDGPU.@allowscalar begin + t4 = @constinferred absorb!(t2, t1) + end + @test t2 === t4 + @test t3 ≈ t4 + end + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = AMDGPU.rand(T, W2, W1 ⊗ W1') + t = @constinferred (t1 ⊠ t2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end +end diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl new file mode 100644 index 000000000..d095d270d --- /dev/null +++ b/test/cuda/factorizations.jl @@ -0,0 +1,450 @@ +using Test, TestExtras +using TensorKit +using LinearAlgebra: LinearAlgebra +using CUDA, cuTENSOR + +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const CuDiagonalTensorMap = getglobal(CUDAExt, :CuDiagonalTensorMap) + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₃, VfU₁, VfSU₂) + else + (Vtr, VU₁, VCU₁, VSU₂, VfSU₂) + end + else + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) + end +catch + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) +end + +import CUDA: rand as curand, randn as curandn + + +eltypes = (Float32, ComplexF64) + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("CUDA Factorizations with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "CUDA Factorizations with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + W = V1 ⊗ V2 + @testset "QR decomposition" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', curand(T, W, V1), curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + Q, R = @constinferred qr_full(t) + @test Q * R ≈ t + @test isunitary(Q) + + Q, R = @constinferred qr_compact(t) + @test Q * R ≈ t + @test isisometric(Q) + + Q, R = @constinferred left_orth(t) + @test Q * R ≈ t + @test isisometric(Q) + + N = @constinferred qr_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + N = @constinferred left_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes + t = curand(T, V1 ⊗ V2, zero(V1)) + + Q, R = @constinferred qr_full(t) + @test Q * R ≈ t + @test isunitary(Q) + @test dim(R) == dim(t) == 0 + + Q, R = @constinferred qr_compact(t) + @test Q * R ≈ t + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + Q, R = @constinferred left_orth(t) + @test Q * R ≈ t + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + N = @constinferred qr_null(t) + @test isunitary(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "LQ decomposition" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', curand(T, W, V1), curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + L, Q = @constinferred lq_full(t) + @test L * Q ≈ t + @test isunitary(Q) + + L, Q = @constinferred lq_compact(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + + L, Q = @constinferred right_orth(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + + Nᴴ = @constinferred lq_null(t) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + for T in eltypes + # empty tensor + t = curand(T, zero(V1), V1 ⊗ V2) + + L, Q = @constinferred lq_full(t) + @test L * Q ≈ t + @test isunitary(Q) + @test dim(L) == dim(t) == 0 + + L, Q = @constinferred lq_compact(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + L, Q = @constinferred right_orth(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + Nᴴ = @constinferred lq_null(t) + @test isunitary(Nᴴ) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "Polar decomposition" begin + @testset for T in eltypes, + t in ( + curand(T, W, W), + curand(T, W, W)', + curand(T, W, V1), + curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + @assert domain(t) ≾ codomain(t) + w, p = @constinferred left_polar(t) + @test w * p ≈ t + @test isisometric(w) + @test isposdef(project_hermitian!(p)) + + w, p = @constinferred left_orth(t; alg = :polar) + @test w * p ≈ t + @test isisometric(w) + end + + @testset for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', curand(T, V1, W), curand(T, W, V1)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + @assert codomain(t) ≾ domain(t) + p, wᴴ = @constinferred right_polar(t) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + @test isposdef(project_hermitian!(p)) + + p, wᴴ = @constinferred right_orth(t; alg = :polar) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + end + end + @testset "SVD" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', + curand(T, W, V1), curand(T, V1, W), + curand(T, W, V1)', curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + u, s, vᴴ = @constinferred svd_full(t) + @test u * s * vᴴ ≈ t + @test isunitary(u) + @test isunitary(vᴴ) + + u, s, vᴴ = @constinferred svd_compact(t) + @test u * s * vᴴ ≈ t + @test isisometric(u) + @test isposdef(s) + @test isisometric(vᴴ; side = :right) + + s′ = LinearAlgebra.diag(s) + for (c, b) in LinearAlgebra.svdvals(t) + @test b ≈ s′[c] + end + + v, c = @constinferred left_orth(t; alg = :svd) + @test v * c ≈ t + @test isisometric(v) + + N = @constinferred left_null(t; alg = :svd) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + Nᴴ = @constinferred right_null(t; alg = :svd) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes, t in (curand(T, W, zero(V1)), curand(T, zero(V1), W)) + U, S, Vᴴ = @constinferred svd_full(t) + @test U * S * Vᴴ ≈ t + @test isunitary(U) + @test isunitary(Vᴴ) + + U, S, Vᴴ = @constinferred svd_compact(t) + @test U * S * Vᴴ ≈ t + @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0 + end + end + + @testset "truncated SVD" begin + for T in eltypes, + t in ( + curandn(T, W, W), curandn(T, W, W)', + curandn(T, W, V1), curandn(T, V1, W), + curandn(T, W, V1)', curandn(T, V1, W)', + CuDiagonalTensorMap(curandn(T, reduceddim(V1)), V1), + ) + + @constinferred normalize!(t) + + CUDA.@allowscalar begin + U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc()) + end + @test U * S * Vᴴ ≈ t + @test isisometric(U) + @test isisometric(Vᴴ; side = :right) + + trunc = truncrank(dim(domain(S)) ÷ 2) + CUDA.@allowscalar begin + U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ1' ≈ U1 * S1 + @test isisometric(U1) + @test isisometric(Vᴴ1; side = :right) + @test dim(domain(S1)) <= trunc.howmany + + λ = minimum(minimum, values(LinearAlgebra.diag(S1))) + trunc = trunctol(; atol = λ - 10eps(λ)) + CUDA.@allowscalar begin + U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ2' ≈ U2 * S2 + @test isisometric(U2) + @test isisometric(Vᴴ2; side = :right) + @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ + @test U2 ≈ U1 + @test S2 ≈ S1 + @test Vᴴ2 ≈ Vᴴ1 + + trunc = truncspace(space(S2, 1)) + CUDA.@allowscalar begin + U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ3' ≈ U3 * S3 + @test isisometric(U3) + @test isisometric(Vᴴ3; side = :right) + @test space(S3, 1) ≾ space(S2, 1) + + trunc = truncerror(; atol = 0.5) + CUDA.@allowscalar begin + U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ4' ≈ U4 * S4 + @test isisometric(U4) + @test isisometric(Vᴴ4; side = :right) + @test norm(t - U4 * S4 * Vᴴ4) <= 0.5 + end + end + + @testset "Eigenvalue decomposition" begin + for T in eltypes, + t in ( + curand(T, V1, V1), + curand(T, W, W), + curand(T, W, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + d, v = @constinferred eig_full(t) + @test t * v ≈ v * d + + d′ = LinearAlgebra.diag(d) + for (c, b) in LinearAlgebra.eigvals(t) + @test sort(b; by = abs) ≈ sort(d′[c]; by = abs) + end + + vdv = v' * v + vdv = (vdv + vdv') / 2 + @test @constinferred isposdef(vdv) + t isa CuDiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map + + CUDA.@allowscalar begin # TODO + d, v = @constinferred eig_trunc(t; trunc = truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + + t2 = (t + t') + D, V = eigen(t2) + @test isisometric(V) + D̃, Ṽ = @constinferred eigh_full(t2) + @test D ≈ D̃ + @test V ≈ Ṽ + λ = minimum( + minimum(real(LinearAlgebra.diag(b))) + for (c, b) in blocks(D) + ) + @test cond(Ṽ) ≈ one(real(T)) + @test isposdef(t2) == isposdef(λ) + @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + + add!(t, t') + + d, v = @constinferred eigh_full(t) + @test t * v ≈ v * d + @test isunitary(v) + + λ = minimum(minimum(real(LinearAlgebra.diag(b))) for (c, b) in blocks(d)) + @test cond(v) ≈ one(real(T)) + @test isposdef(t) == isposdef(λ) + @test isposdef(t - λ * one(t) + 0.1 * one(t)) + @test !isposdef(t - λ * one(t) - 0.1 * one(t)) + + CUDA.@allowscalar begin + d, v = @constinferred eigh_trunc(t; trunc = truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + end + end + + @testset "Condition number and rank" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', + curand(T, W, V1), curand(T, V1, W), + curand(T, W, V1)', curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + d1, d2 = dim(codomain(t)), dim(domain(t)) + @test rank(t) == min(d1, d2) + M = left_null(t) + @test @constinferred(rank(M)) + rank(t) == d1 + Mᴴ = right_null(t) + @test rank(Mᴴ) + rank(t) == d2 + end + for T in eltypes + u = unitary(T, V1 ⊗ V2, V1 ⊗ V2) + @test @constinferred(cond(u)) ≈ one(real(T)) + @test @constinferred(rank(u)) == dim(V1 ⊗ V2) + + t = curand(T, zero(V1), W) + @test rank(t) == 0 + t2 = curand(T, zero(V1) * zero(V2), zero(V1) * zero(V2)) + @test rank(t2) == 0 + @test cond(t2) == 0.0 + end + for T in eltypes, t in (curand(T, W, W), curand(T, W, W)') + add!(t, t') + vals = @constinferred LinearAlgebra.eigvals(t) + λmax = maximum(s -> maximum(abs, s), values(vals)) + λmin = minimum(s -> minimum(abs, s), values(vals)) + @test cond(t) ≈ λmax / λmin + end + end + + @testset "Hermitian projections" begin + for T in eltypes, + t in ( + curand(T, V1, V1), curand(T, W, W), curand(T, W, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + normalize!(t) + noisefactor = eps(real(T))^(3 / 4) + + th = (t + t') / 2 + ta = (t - t') / 2 + tc = copy(t) + + th′ = @constinferred project_hermitian(t) + @test ishermitian(th′) + @test th′ ≈ th + @test t == tc + th_approx = th + noisefactor * ta + @test !ishermitian(th_approx) || (T <: Real && t isa CuDiagonalTensorMap) + @test ishermitian(th_approx; atol = 10 * noisefactor) + + ta′ = project_antihermitian(t) + @test isantihermitian(ta′) + @test ta′ ≈ ta + @test t == tc + ta_approx = ta + noisefactor * th + @test !isantihermitian(ta_approx) + @test isantihermitian(ta_approx; atol = 10 * noisefactor) || (T <: Real && t isa CuDiagonalTensorMap) + end + end + + @testset "Isometric projections" begin + for T in eltypes, + t in ( + curandn(T, W, W), curandn(T, W, W)', + curandn(T, W, V1), curandn(T, V1, W)', + ) + t2 = project_isometric(t) + @test isisometric(t2) + t3 = project_isometric(t2) + @test t3 ≈ t2 # stability of the projection + @test t2 * (t2' * t) ≈ t + + tc = similar(t) + t3 = @constinferred project_isometric!(copy!(tc, t), t2) + @test t3 === t2 + @test isisometric(t2) + + # test that t2 is closer to A then any other isometry + for k in 1:10 + δt = randn!(similar(t)) + t3 = project_isometric(t + δt / 100) + @test norm(t - t3) > norm(t - t2) + end + end + end + end +end diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl new file mode 100644 index 000000000..147d8c8af --- /dev/null +++ b/test/cuda/tensors.jl @@ -0,0 +1,608 @@ +using Adapt, CUDA, cuTENSOR +using Test, TestExtras +using TensorKit, Combinatorics +ad = adapt(Array) +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const CuDiagonalTensorMap = getglobal(CUDAExt, :CuDiagonalTensorMap) + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("CUDA Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred CUDA.zeros(T, W) + CUDA.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, CuVector{T, CUDA.DeviceMemory}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + d = convert(Dict, t) + @test t == convert(CuTensorMap, d) + end + end + if hasfusiontensor(I) || I == Trivial + @timedtestset "Tensor Array conversion" begin + W1 = V1 ← one(V1) + W2 = one(V2) ← V2 + W3 = V1 ⊗ V2 ← one(V1) + W4 = V1 ← V2 + W5 = one(V1) ← V1 ⊗ V2 + W6 = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for W in (W1, W2, W3, W4, W5, W6) + for T in (Int, Float32, ComplexF64) + if T == Int + t = CuTensorMap{T}(undef, W) + for (_, b) in blocks(t) + CUDA.@allowscalar CUDA.rand!(b, -20:20) + end + else + t = @constinferred CUDA.randn(T, W) + end + #=CUDA.@allowscalar begin + a = @constinferred convert(CuArray, t) + end + b = reshape(a, dim(codomain(W)), dim(domain(W))) + @test t ≈ @constinferred CuTensorMap(a, W) + @test t ≈ @constinferred CuTensorMap(b, W) + @test t === @constinferred CuTensorMap(t.data, W)=# # TODO convert(CuArray broken + end + end + for T in (Int, Float32, ComplexF64) + t = CUDA.randn(T, V1 ⊗ V2 ← zero(V1)) + CUDA.@allowscalar begin + a = convert(CuArray, t) + end + @test norm(a) == 0 + end + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + CUDA.@allowscalar begin + @test norm(t + t, p) ≈ 2 * norm(t, p) + end + @test norm(t) ≈ norm(t') + + t2 = @constinferred rand!(similar(t)) + β = rand(T) + #@test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) # broken for Irrep[CU₁] + @test dot(β * t2, α * t) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuVector{T}, V2 ⊗ V1, V1 ⊗ V2)) + CUDA.@allowscalar begin + #@test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) # TODO + #@test i2 * i1 == @constinferred(id(CuVector{T}, V2 ⊗ V1)) # TODO + w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuVector{T}, V1) + @test w * w' == (w * w')^2 + end + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + #=@timedtestset "Basic linear algebra: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = CUDA.rand(T, W) + t2 = @constinferred CUDA.rand!(similar(t)) + @test norm(t, 2) ≈ norm(convert(CuArray, t), 2) + @test dot(t2, t) ≈ dot(convert(CuArray, t2), convert(CuArray, t)) + α = rand(T) + @test convert(CuArray, α * t) ≈ α * convert(CuArray, t) + @test convert(CuArray, t + t) ≈ 2 * convert(CuArray, t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred CUDA.randn(T, W, W) + + tr = @constinferred real(t) + @test scalartype(tr) <: Real + @test real(convert(CuArray, t)) == convert(CuArray, tr) + + ti = @constinferred imag(t) + @test scalartype(ti) <: Real + @test imag(convert(CuArray, t)) == convert(CuArray, ti) + + tc = @inferred complex(t) + @test scalartype(tc) <: Complex + @test complex(convert(CuArray, t)) == convert(CuArray, tc) + + tc2 = @inferred complex(tr, ti) + @test tc2 ≈ tc + end + end=# # TODO convert(CuArray is broken + end + @timedtestset "Tensor conversion" begin + W = V1 ⊗ V2 + t = @constinferred CUDA.randn(W ← W) + @test typeof(convert(CuTensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + #=@timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = CUDA.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + # TODO find a way to use CUDA here + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + t′ = CUDA.randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + CUDA.@allowscalar begin + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + end + + CUDA.@allowscalar begin + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + a = convert(CuArray, t) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = permute(t, (p1, p2)) + a2 = convert(CuArray, t2) + @test a2 ≈ permutedims(a, (p1..., p2...)) + @test convert(CuArray, transpose(t2)) ≈ + permutedims(a2, (5, 4, 3, 2, 1)) + end + + t3 = repartition(t, k) + a3 = convert(CuArray, t3) + @test a3 ≈ permutedims( + a, (ntuple(identity, k)..., reverse(ntuple(i -> i + k, 5 - k))...) + ) + end + end + end=# # convert(CuArray broken + @timedtestset "Full trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + CUDA.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + CUDA.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + CUDA.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + end + @test ta ≈ tb + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via conversion" begin + A1 = CUDA.randn(ComplexF64, V1' * V2', V3') + A2 = CUDA.randn(ComplexF64, V3 * V4, V5) + rhoL = CUDA.randn(ComplexF64, V1, V1) + rhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor + H = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + CUDA.@allowscalar begin + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) * + A2[b, t2, c'] * rhoR[c', c] * + H[s1, s2, t1, t2] + + @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] * + conj(ad(A1)[a', t1, b]) * + ad(A2)[b, t2, c'] * + ad(rhoR)[c', c] * + ad(H)[s1, s2, t1, t2] + end + @test HrA12array ≈ ad(HrA12) + end + end + @timedtestset "Index flipping: test flipping inverse" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + for i in 1:4 + CUDA.@allowscalar begin + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #=@timedtestset "Index flipping: test via explicit flip" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) + + CUDA.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end=# # TODO + @timedtestset "Index flipping: test via contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + CUDA.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end + @timedtestset "Multiplication of isometries: test properties" begin + W2 = V4 ⊗ V5 + W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1)) + for T in (Float64, ComplexF64) + t1 = randisometry(T, CuMatrix{T}, W1, W2) + t2 = randisometry(T, CuMatrix{T}, W2 ← W2) + @test isisometric(t1) + @test isunitary(t2) + P = t1 * t1' + @test P * P ≈ P + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + @test one(t1) / t1 ≈ pinv(t1) + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + tp = pinv(t) * t + @test tp ≈ tp * tp + end + end + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via conversion" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + d1 = dim(W1) + d2 = dim(W2) + At1 = reshape(convert(CuArray, t1), d1, d1) + At2 = reshape(convert(CuArray, t2), d2, d2) + At = reshape(convert(CuArray, t), d1, d2) + @test reshape(convert(CuArray, t1 * t), d1, d2) ≈ At1 * At + @test reshape(convert(CuArray, t1' * t), d1, d2) ≈ At1' * At + @test reshape(convert(CuArray, t2 * t'), d2, d1) ≈ At2 * At' + @test reshape(convert(CuArray, t2' * t'), d2, d1) ≈ At2' * At' + + @test reshape(convert(CuArray, inv(t1)), d1, d1) ≈ inv(At1) + @test reshape(convert(CuArray, pinv(t)), d2, d1) ≈ pinv(At) + + if T == Float32 || T == ComplexF32 + continue + end + + @test reshape(convert(CuArray, t1 \ t), d1, d2) ≈ At1 \ At + @test reshape(convert(CuArray, t1' \ t), d1, d2) ≈ At1' \ At + @test reshape(convert(CuArray, t2 \ t'), d2, d1) ≈ At2 \ At' + @test reshape(convert(CuArray, t2' \ t'), d2, d1) ≈ At2' \ At' + + @test reshape(convert(CuArray, t2 / t), d2, d1) ≈ At2 / At + @test reshape(convert(CuArray, t2' / t), d2, d1) ≈ At2' / At + @test reshape(convert(CuArray, t1 / t'), d1, d2) ≈ At1 / At' + @test reshape(convert(CuArray, t1' / t'), d1, d2) ≈ At1' / At' + end + end + end=# # convert(CuArray broken + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = CUDA.randn(T, W, W) + s = dim(W) + CUDA.@allowscalar begin + #=@test (@constinferred sqrt(t))^2 ≈ t + @test ad(sqrt(t)) ≈ sqrt(ad(t)) + + expt = @constinferred exp(t) + @test ad(expt) ≈ exp(ad(t)) + + @test exp(@constinferred log(expt)) ≈ expt + @test ad(log(expt)) ≈ log(ad(expt)) + + @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t) + + t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8 + =# #exp not supported for CuArray + end + end + end + end + # Sylvester not defined for CUDA + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] + end + end + end + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = CUDA.rand(T, W2, W1 ⊗ W1') + CUDA.@allowscalar begin + t = @constinferred (t1 ⊠ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 854991a7f..2c63a8dde 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,6 @@ # ------------ using ArgParse: ArgParse using SafeTestsets: @safetestset -# using CUDA: CUDA -# using AMDGPU: AMDGPU function parse_commandline(args = ARGS) s = ArgParse.ArgParseSettings() @@ -49,9 +47,15 @@ istestfile(fn) = endswith(fn, ".jl") && !contains(fn, "setup") # handle GPU cases separately if group == "cuda" - # CUDA.functional() || continue + using CUDA + CUDA.functional() || continue + @time include("cuda/tensors.jl") + @time include("cuda/factorizations.jl") elseif group == "amd" - # AMDGPU.functional() || continue + using AMDGPU + AMDGPU.functional() || continue + #@time include("amd/tensors.jl") + @time include("amd/factorizations.jl") elseif is_buildkite continue end