diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6c414a31b..2b7905fc4 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -15,12 +15,12 @@ steps: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 matrix: setup: julia: - "1.10" - - "1.11" + - "1.12" - label: "Julia {{matrix.julia}} -- AMDGPU" plugins: @@ -36,9 +36,9 @@ steps: rocm: "*" rocmgpu: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 matrix: setup: julia: - "1.10" - - "1.11" + - "1.12" diff --git a/Project.toml b/Project.toml index 6c22acc60..f56b35ba7 100644 --- a/Project.toml +++ b/Project.toml @@ -18,23 +18,29 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [extensions] +TensorKitCUDAExt = ["CUDA", "cuTENSOR"] TensorKitChainRulesCoreExt = "ChainRulesCore" TensorKitFiniteDifferencesExt = "FiniteDifferences" [compat] +Adapt = "4" Aqua = "0.6, 0.7, 0.8" ArgParse = "1.2.0" +CUDA = "5.9" ChainRulesCore = "1" ChainRulesTestUtils = "1" Combinatorics = "1" FiniteDifferences = "0.12" +GPUArrays = "11.3.1" LRUCache = "1.0.2" LinearAlgebra = "1" -MatrixAlgebraKit = "0.6.0" +MatrixAlgebraKit = "0.6.1" OhMyThreads = "0.8.0" Printf = "1" Random = "1" @@ -48,21 +54,26 @@ TestExtras = "0.2,0.3" TupleTools = "1.1" VectorInterface = "0.4.8, 0.5" Zygote = "0.7" +cuTENSOR = "2" julia = "1.10" [extras] -ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] -test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl new file mode 100644 index 000000000..417970a02 --- /dev/null +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -0,0 +1,21 @@ +module TensorKitCUDAExt + +using CUDA, CUDA.CUBLAS, CUDA.CUSOLVER, LinearAlgebra +using CUDA: @allowscalar +using cuTENSOR: cuTENSOR +import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! + +using TensorKit +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check +import TensorKit: randisometry, rand, randn + +using TensorKit: MatrixAlgebraKit + +using Random + +include("cutensormap.jl") + +end diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl new file mode 100644 index 000000000..b4aa7f7a2 --- /dev/null +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -0,0 +1,166 @@ +const CuTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, CuVector{T, CUDA.DeviceMemory}} +const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0} + +const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, CuTensorMap{T, S, N₁, N₂}} + +function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} + return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) +end + +# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy +function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}} + h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V) + h_t = TensorKit.project_symmetric!(h_t, Array(data)) + # verify result + isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) || + throw(ArgumentError("Data has non-zero elements at incompatible positions")) + return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function CUDA.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return CUDA.$fname(codomain ← domain) + end + function CUDA.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return CUDA.$fname(T, codomain ← domain) + end + CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V) + function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = CuTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:curand, :curandn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {S <: IndexSpace} + return $randfun(codomain ← domain) + end + function $randfun( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = CuTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + + function $randfun!(rng::Random.AbstractRNG, t::CuTensorMap) + for (_, b) in blocks(t) + $randfun!(rng, b) + end + return t + end + end +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S} + inds = findall(!iszero, t.data) + return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)] +end + +function Base.convert( + TT::Type{CuTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function LinearAlgebra.isposdef(t::CuTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: CuTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: CuTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return CuTensorMap{T, S, N₁, N₂} +end + +# CuTensorMap exponentation: +function TensorKit.exp!(t::CuTensorMap) + domain(t) == codomain(t) || + error("Exponential of a tensor only exist when domain == codomain.") + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian CUDA tensors")) + for (c, b) in blocks(t) + copy!(b, parent(Base.exp(Hermitian(b)))) + end + return t +end + +# functions that don't map ℝ to (a subset of) ℝ +for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) + sf = string(f) + @eval function Base.$f(t::CuTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain")) + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian CUDA tensors")) + T = complex(float(scalartype(t))) + tf = similar(t, T) + for (c, b) in blocks(t) + copy!(block(tf, c), parent($f(Hermitian(b)))) + end + return tf + end +end diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index 44e7be002..4b2c7d4ee 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -270,20 +270,11 @@ function _norm(blockiter, p::Real, init::Real) return mapreduce(max, blockiter; init = init) do (c, b) return isempty(b) ? init : oftype(init, LinearAlgebra.normInf(b)) end - elseif p == 2 - n² = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2) + elseif p > 0 # finite positive p + np = sum(blockiter; init) do (c, b) + return oftype(init, dim(c) * norm(b, p)^p) end - return sqrt(n²) - elseif p == 1 - return mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * sum(abs, b)) - end - elseif p > 0 - nᵖ = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p) - end - return (nᵖ)^inv(oftype(nᵖ, p)) + return np^(inv(oftype(np, p))) else msg = "Norm with non-positive p is not defined for `AbstractTensorMap`" throw(ArgumentError(msg)) @@ -298,8 +289,8 @@ function LinearAlgebra.rank( ) r = 0 * dim(first(allunits(sectortype(t)))) dim(t) == 0 && return r - S = LinearAlgebra.svdvals(t) - tol = max(atol, rtol * maximum(first, values(S))) + S = MatrixAlgebraKit.svd_vals(t) + tol = max(atol, rtol * maximum(parent(S))) for (c, b) in pairs(S) if !isempty(b) r += dim(c) * count(>(tol), b) @@ -316,9 +307,9 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2) throw(SpaceMismatch("`cond` requires domain and codomain to be the same")) return zero(real(float(scalartype(t)))) end - S = LinearAlgebra.svdvals(t) - maxS = maximum(first, values(S)) - minS = minimum(last, values(S)) + S = MatrixAlgebraKit.svd_vals(t) + maxS = maximum(parent(S)) + minS = minimum(parent(S)) return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS) else throw(ArgumentError("cond currently only defined for p=2")) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 452072a87..300654632 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -198,6 +198,15 @@ TensorMap(data::AbstractArray, codom::TensorSpace, dom::TensorSpace; kwargs...) Tensor(data::AbstractArray, codom::TensorSpace; kwargs...) = TensorMap(data, codom ← one(codom); kwargs...) +function project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: DenseVector{T}} + t = TensorMapWithStorage{T, A}(undef, V) + t = project_symmetric!(t, data) + # verify result + isapprox(reshape(data, dims(t)), convert(Array, t); atol = tol) || + throw(ArgumentError("Data has non-zero elements at incompatible positions")) + return t +end + function TensorMapWithStorage{T, A}( data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data))))) ) where {T, A} @@ -209,15 +218,7 @@ function TensorMapWithStorage{T, A}( sectortype(V) === Trivial && return tensormaptype(spacetype(V), numout(V), numin(V), A)(reshape(data, length(data)), V) - # do projection - t = TensorMapWithStorage{T, A}(undef, V) - t = project_symmetric!(t, data) - - # verify result - isapprox(reshape(data, dims(t)), convert(Array, t); atol = tol) || - throw(ArgumentError("Data has non-zero elements at incompatible positions")) - - return t + return project_symmetric_and_check(T, A, data, V; tol) end TensorMapWithStorage{T, A}(data::AbstractArray, codom::TensorSpace, dom::TensorSpace; kwargs...) where {T, A} = TensorMapWithStorage{T, A}(data, codom ← dom; kwargs...) @@ -315,11 +316,15 @@ end for randf in (:rand, :randn, :randexp, :randisometry) _docstr = """ - $randf([rng=default_rng()], [T=Float64], codomain::ProductSpace{S,N₁}, + $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t - $randf([rng=default_rng()], [T=Float64], codomain ← domain) -> t - + $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t + Generate a tensor `t` with entries generated by `$randf`. + The type `TorA` can be used to control the element type and + data type generated. For example, if `TorA` is a `CuVector{ComplexF32}` + or `ROCVector{Float64}`, then the final output `TensorMap` will have + that as its storage type. See also [`Random.$(randf)!`](@ref). """ @@ -348,25 +353,25 @@ for randf in (:rand, :randn, :randexp, :randisometry) return $randfun(codomain ← domain) end function $randfun( - ::Type{T}, codomain::TensorSpace{S}, domain::TensorSpace{S} - ) where {T, S <: IndexSpace} - return $randfun(T, codomain ← domain) + ::Type{TorA}, codomain::TensorSpace{S}, domain::TensorSpace{S} + ) where {TorA, S <: IndexSpace} + return $randfun(TorA, codomain ← domain) end function $randfun( - rng::Random.AbstractRNG, ::Type{T}, codomain::TensorSpace{S}, domain::TensorSpace{S} - ) where {T, S <: IndexSpace} - return $randfun(rng, T, codomain ← domain) + rng::Random.AbstractRNG, ::Type{TorA}, codomain::TensorSpace{S}, domain::TensorSpace{S} + ) where {TorA, S <: IndexSpace} + return $randfun(rng, TorA, codomain ← domain) end # accepting single `TensorSpace` $randfun(codomain::TensorSpace) = $randfun(codomain ← one(codomain)) - function $randfun(::Type{T}, codomain::TensorSpace) where {T} - return $randfun(T, codomain ← one(codomain)) + function $randfun(::Type{TorA}, codomain::TensorSpace) where {TorA} + return $randfun(TorA, codomain ← one(codomain)) end function $randfun( - rng::Random.AbstractRNG, ::Type{T}, codomain::TensorSpace - ) where {T} - return $randfun(rng, T, codomain ← one(domain)) + rng::Random.AbstractRNG, ::Type{TorA}, codomain::TensorSpace + ) where {TorA} + return $randfun(rng, TorA, codomain ← one(domain)) end # filling in default eltype @@ -376,16 +381,16 @@ for randf in (:rand, :randn, :randexp, :randisometry) end # filling in default rng - function $randfun(::Type{T}, V::TensorMapSpace) where {T} - return $randfun(Random.default_rng(), T, V) + function $randfun(::Type{TorA}, V::TensorMapSpace) where {TorA} + return $randfun(Random.default_rng(), TorA, V) end $randfun!(t::AbstractTensorMap) = $randfun!(Random.default_rng(), t) # implementation function $randfun( - rng::Random.AbstractRNG, ::Type{T}, V::TensorMapSpace - ) where {T} - t = TensorMap{T}(undef, V) + rng::Random.AbstractRNG, ::Type{TorA}, V::TensorMapSpace + ) where {TorA} + t = tensormaptype(spacetype(V), numout(V), numin(V), TorA)(undef, V) $randfun!(rng, t) return t end @@ -399,12 +404,21 @@ for randf in (:rand, :randn, :randexp, :randisometry) end end +# Moving arbitrary TensorMaps to CPU +#----------------------------- +to_cpu(t::TensorMapWithStorage{T, Vector{T}}) where {T} = t # no op +to_cpu(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t) + # Efficient copy constructors #----------------------------- Base.copy(t::TensorMap) = typeof(t)(copy(t.data), t.space) # Conversion between TensorMap and Dict, for read and write purpose #------------------------------------------------------------------ +# We want to store the block data using simple data types, +# rather tha reshaped views or some other wrapped array type. +# Since this method is meant for storing data on disk, we can +# freely collect data to the CPU function Base.convert(::Type{Dict}, t::AbstractTensorMap) d = Dict{Symbol, Any}() d[:codomain] = repr(codomain(t)) @@ -521,6 +535,11 @@ function Base.convert(::Type{TensorMap}, t::AbstractTensorMap) return copy!(TensorMap{scalartype(t)}(undef, space(t)), t) end +function Base.convert(::Type{TensorMapWithStorage{T, A}}, t::TensorMap) where {T, A} + d_data = convert(A, t.data) + return TensorMapWithStorage{T, A}(d_data, space(t)) +end + function Base.convert( TT::Type{TensorMap{T, S, N₁, N₂, A}}, t::AbstractTensorMap{<:Any, S, N₁, N₂} ) where {T, S, N₁, N₂, A} @@ -535,7 +554,7 @@ end function Base.promote_rule( ::Type{<:TT₁}, ::Type{<:TT₂} ) where {S, N₁, N₂, TT₁ <: TensorMap{<:Any, S, N₁, N₂}, TT₂ <: TensorMap{<:Any, S, N₁, N₂}} - A = VectorInterface.promote_add(storagetype(TT₁), storagetype(TT₂)) - T = scalartype(A) - return TensorMap{T, S, N₁, N₂, A} + T = VectorInterface.promote_add(scalartype(TT₁), scalartype(TT₂)) + A = promote_storagetype(similarstoragetype(TT₁, T), similarstoragetype(TT₂, T)) + return tensormaptype(S, N₁, N₂, A) end diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl new file mode 100644 index 000000000..8a89a8ad8 --- /dev/null +++ b/test/cuda/tensors.jl @@ -0,0 +1,606 @@ +using Adapt, CUDA, cuTENSOR +using Test, TestExtras +using TensorKit, Combinatorics +ad = adapt(Array) +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const curand = getglobal(CUDAExt, :curand) +const curandn = getglobal(CUDAExt, :curandn) +const curand! = getglobal(CUDAExt, :curand!) +using CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("CUDA Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + # test default pass-throughs + for f in (CUDA.zeros, CUDA.ones, curand, curandn) + t = @constinferred f(W) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} + end + for f in (rand, randn) + t = @constinferred f(CuVector{Float64, CUDA.DeviceMemory}, W) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} + end + for f! in (curand!, curandn!) + t = @constinferred CUDA.zeros(W) + f!(t) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} + end + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred CUDA.zeros(T, W) + CUDA.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, CuVector{T, CUDA.DeviceMemory}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Conversion to/from host" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + h_t = @constinferred rand(T, W) + t1 = convert(CuTensorMap{T}, h_t) + @test collect(t1.data) == h_t.data + @test space(t1) == space(h_t) + @test scalartype(t1) == T + @test codomain(t1) == W + @test space(t1) == (W ← one(W)) + @test domain(t1) == one(W) + t2 = CuTensorMap(h_t) + @test collect(t2.data) == h_t.data + @test space(t2) == space(h_t) + @test scalartype(t2) == T + @test codomain(t2) == W + @test space(t2) == (W ← one(W)) + @test domain(t2) == one(W) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + d = convert(Dict, t) + @test TensorKit.to_cpu(t) == convert(TensorMap, d) + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + @test norm(t + t, p) ≈ 2 * norm(t, p) + @test norm(t) ≈ norm(t') + + t2 = @constinferred rand!(similar(t)) + β = rand(T) + #@test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) # broken for Irrep[CU₁] + @test dot(β * t2, α * t) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1)) + w = @constinferred(isometry(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuVector{T, CUDA.DeviceMemory}, V1) + @test w * w' == (w * w')^2 + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via CPU" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = CUDA.rand(T, W) + t2 = @constinferred CUDA.rand!(similar(t)) + α = rand(T) + @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) + @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) + @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred CUDA.randn(T, W, W) + + tr = @constinferred real(t) + @test scalartype(tr) <: Real + @test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr) + @test storagetype(tr) == CuVector{real(T), CUDA.DeviceMemory} + + ti = @constinferred imag(t) + @test scalartype(ti) <: Real + @test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti) + @test storagetype(ti) == CuVector{real(T), CUDA.DeviceMemory} + + tc = @inferred complex(t) + @test scalartype(tc) <: Complex + @test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc) + @test storagetype(tc) == CuVector{complex(T), CUDA.DeviceMemory} + + tc2 = @inferred complex(tr, ti) + @test tc2 ≈ tc + @test storagetype(tc2) == CuVector{complex(T), CUDA.DeviceMemory} + end + end + end + @timedtestset "Tensor conversion" begin # TODO adjoint conversion methods don't work yet + W = V1 ⊗ V2 + t = @constinferred CUDA.randn(W ← W) + #@test typeof(convert(TensorMap, t')) == typeof(t) # TODO Adjoint not supported yet + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + # @test typeof(convert(typeof(tc), t')) == typeof(tc) # TODO Adjoint not supported yet + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + #=@timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = CUDA.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + # TODO find a way to use CUDA here + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + t′ = CUDA.randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + CUDA.@allowscalar begin + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + end + + CUDA.@allowscalar begin + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + if BraidingStyle(I) isa SymmetricBraiding + @timedtestset "Permutations: test via CPU" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + dt2 = CUDA.@allowscalar permute(t, (p1, p2)) + ht2 = permute(TensorKit.to_cpu(t), (p1, p2)) + @test ht2 == TensorKit.to_cpu(dt2) + end + + dt3 = CUDA.@allowscalar repartition(t, k) + ht3 = repartition(TensorKit.to_cpu(t), k) + @test ht3 == TensorKit.to_cpu(dt3) + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + CUDA.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + CUDA.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + CUDA.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + end + @test ta ≈ tb + end + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via CPU" begin + dA1 = CUDA.randn(ComplexF64, V1' * V2', V3') + dA2 = CUDA.randn(ComplexF64, V3 * V4, V5) + drhoL = CUDA.randn(ComplexF64, V1, V1) + drhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor + dH = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * + dA2[b, t2, c'] * drhoR[c', c] * + dH[s1, s2, t1, t2] + @tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) * + TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] * + TensorKit.to_cpu(dH)[s1, s2, t1, t2] + @test TensorKit.to_cpu(dHrA12) ≈ hHrA12 + end + end=# # doesn't yet work because of AdjointTensor + @timedtestset "Index flipping: test flipping inverse" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + for i in 1:4 + CUDA.@allowscalar begin + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #=@timedtestset "Index flipping: test via explicit flip" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) + + CUDA.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end + @timedtestset "Index flipping: test via contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + CUDA.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end=# # TODO + @timedtestset "Multiplication of isometries: test properties" begin + W2 = V4 ⊗ V5 + W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1)) + for T in (Float64, ComplexF64) + t1 = randisometry(CuMatrix{T}, W1, W2) + t2 = randisometry(CuMatrix{T}, W2 ← W2) + @test isisometric(t1) + @test isunitary(t2) + P = t1 * t1' + @test P * P ≈ P + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + @test one(t1) / t1 ≈ pinv(t1) + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + tp = pinv(t) * t + @test tp ≈ tp * tp + end + end + @timedtestset "Multiplication and inverse: test via CPU" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + ht1 = TensorKit.to_cpu(t1) + ht2 = TensorKit.to_cpu(t2) + ht = TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht + @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht + @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' + @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' + + @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) + @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) + + if T == Float32 || T == ComplexF32 + continue + end + + @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht + @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht + @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' + @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' + + @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht + @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht + @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' + @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = project_hermitian!(CUDA.randn(T, W, W)) + s = dim(W) + #@test (@constinferred sqrt(t))^2 ≈ t + #@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) + + expt = @constinferred exp(t) + @test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) + + # log doesn't work on CUDA yet (scalar indexing) + #@test exp(@constinferred log(project_hermitian!(expt))) ≈ expt + #@test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt)) + + #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t)=# # TODO in CUDA + + #=t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8=# + # TODO in CUDA + end + end + end + # Sylvester not defined for CUDA + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] + end + end + end + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = CUDA.rand(T, W2, W1 ⊗ W1') + CUDA.@allowscalar begin + t = @constinferred (t1 ⊠ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 854991a7f..9513378db 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,6 @@ # ------------ using ArgParse: ArgParse using SafeTestsets: @safetestset -# using CUDA: CUDA -# using AMDGPU: AMDGPU function parse_commandline(args = ARGS) s = ArgParse.ArgParseSettings() @@ -49,9 +47,9 @@ istestfile(fn) = endswith(fn, ".jl") && !contains(fn, "setup") # handle GPU cases separately if group == "cuda" - # CUDA.functional() || continue - elseif group == "amd" - # AMDGPU.functional() || continue + using CUDA + CUDA.functional() || continue + @time include("cuda/tensors.jl") elseif is_buildkite continue end