From 3d1ba4e018085a1d8719ce655990482e31f3df41 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 7 Oct 2025 10:45:42 -0400 Subject: [PATCH 01/19] Start on CUDA extension --- .buildkite/pipeline.yml | 8 +- Project.toml | 15 +- ext/TensorKitCUDAExt/TensorKitCUDAExt.jl | 21 + ext/TensorKitCUDAExt/cutensormap.jl | 161 ++++++ src/tensors/diagonal.jl | 10 +- src/tensors/linalg.jl | 23 +- src/tensors/tensor.jl | 73 +-- test/cuda/tensors.jl | 604 +++++++++++++++++++++++ test/runtests.jl | 8 +- 9 files changed, 860 insertions(+), 63 deletions(-) create mode 100644 ext/TensorKitCUDAExt/TensorKitCUDAExt.jl create mode 100644 ext/TensorKitCUDAExt/cutensormap.jl create mode 100644 test/cuda/tensors.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6c414a31b..2b7905fc4 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -15,12 +15,12 @@ steps: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 matrix: setup: julia: - "1.10" - - "1.11" + - "1.12" - label: "Julia {{matrix.julia}} -- AMDGPU" plugins: @@ -36,9 +36,9 @@ steps: rocm: "*" rocmgpu: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 matrix: setup: julia: - "1.10" - - "1.11" + - "1.12" diff --git a/Project.toml b/Project.toml index 6c22acc60..c0c05603f 100644 --- a/Project.toml +++ b/Project.toml @@ -18,20 +18,26 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [extensions] +TensorKitCUDAExt = ["CUDA", "cuTENSOR"] TensorKitChainRulesCoreExt = "ChainRulesCore" TensorKitFiniteDifferencesExt = "FiniteDifferences" [compat] +Adapt = "4" Aqua = "0.6, 0.7, 0.8" ArgParse = "1.2.0" +CUDA = "5.9" ChainRulesCore = "1" ChainRulesTestUtils = "1" Combinatorics = "1" FiniteDifferences = "0.12" +GPUArrays = "11.3.1" LRUCache = "1.0.2" LinearAlgebra = "1" MatrixAlgebraKit = "0.6.0" @@ -48,21 +54,26 @@ TestExtras = "0.2,0.3" TupleTools = "1.1" VectorInterface = "0.4.8, 0.5" Zygote = "0.7" +cuTENSOR = "2" julia = "1.10" [extras] -ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] -test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl new file mode 100644 index 000000000..0562acf61 --- /dev/null +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -0,0 +1,21 @@ +module TensorKitCUDAExt + +using CUDA, CUDA.CUBLAS, CUDA.CUSOLVER, LinearAlgebra +using CUDA: @allowscalar +using cuTENSOR: cuTENSOR +import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! + +using TensorKit +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check +import TensorKit: randisometry, rand, randn + +using TensorKit.MatrixAlgebraKit + +using Random + +include("cutensormap.jl") + +end diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl new file mode 100644 index 000000000..5ae602cfa --- /dev/null +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -0,0 +1,161 @@ +const CuTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, CuVector{T, CUDA.DeviceMemory}} +const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0} + +const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, CuTensorMap{T, S, N₁, N₂}} + +function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} + return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) +end + +function Base.collect(t::CuTensorMap{T}) where {T} + return convert(TensorKit.TensorMapWithStorage{T, Vector{T}}, t) +end + +# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy +function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}} + h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V) + h_t = TensorKit.project_symmetric!(h_t, Array(data)) + # verify result + isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) || + throw(ArgumentError("Data has non-zero elements at incompatible positions")) + return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function CUDA.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return CUDA.$fname(codomain ← domain) + end + function CUDA.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return CUDA.$fname(T, codomain ← domain) + end + CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V) + function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = CuTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:curand, :curandn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {S <: IndexSpace} + return $randfun(codomain ← domain) + end + function $randfun( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = CuTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S} + inds = findall(!iszero, t.data) + return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)] +end + +function Base.convert( + TT::Type{CuTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function LinearAlgebra.isposdef(t::CuTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: CuTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: CuTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return CuTensorMap{T, S, N₁, N₂} +end + +# CuTensorMap exponentation: +function TensorKit.exp!(t::CuTensorMap) + domain(t) == codomain(t) || + error("Exponential of a tensor only exist when domain == codomain.") + for (c, b) in blocks(t) + copy!(b, parent(Base.exp(Hermitian(b)))) + end + return t +end + +# functions that don't map ℝ to (a subset of) ℝ +for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) + sf = string(f) + @eval function Base.$f(t::CuTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`$($sf)` of a tensor only exist when domain == codomain")) + T = complex(float(scalartype(t))) + tf = similar(t, T) + for (c, b) in blocks(t) + copy!(block(tf, c), parent($f(Hermitian(b)))) + end + return tf + end +end diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl index e35c2ffdd..356f5dbf4 100644 --- a/src/tensors/diagonal.jl +++ b/src/tensors/diagonal.jl @@ -78,12 +78,10 @@ function DiagonalTensorMap(t::AbstractTensorMap{T, S, 1, 1}) where {T, S} return d end -Base.similar(d::DiagonalTensorMap) = similar_diagonal(d) -Base.similar(d::DiagonalTensorMap, ::Type{T}) where {T} = similar_diagonal(d, T) - -similar_diagonal(d::DiagonalTensorMap) = DiagonalTensorMap(similar(d.data), d.domain) -similar_diagonal(d::DiagonalTensorMap, ::Type{T}) where {T <: Number} = - DiagonalTensorMap(similar(d.data, T), d.domain) +Base.similar(d::DiagonalTensorMap) = DiagonalTensorMap(similar(d.data), d.domain) +function Base.similar(d::DiagonalTensorMap, ::Type{T}) where {T <: Number} + return DiagonalTensorMap(similar(d.data, T), d.domain) +end # TODO: more constructors needed? diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index 44e7be002..caae1d488 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -270,20 +270,11 @@ function _norm(blockiter, p::Real, init::Real) return mapreduce(max, blockiter; init = init) do (c, b) return isempty(b) ? init : oftype(init, LinearAlgebra.normInf(b)) end - elseif p == 2 - n² = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2) + elseif p > 0 # finite positive p + np = sum(blockiter; init) do (c, b) + return oftype(init, dim(c) * norm(b, p)^p) end - return sqrt(n²) - elseif p == 1 - return mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * sum(abs, b)) - end - elseif p > 0 - nᵖ = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p) - end - return (nᵖ)^inv(oftype(nᵖ, p)) + return np^(inv(oftype(np, p))) else msg = "Norm with non-positive p is not defined for `AbstractTensorMap`" throw(ArgumentError(msg)) @@ -299,7 +290,7 @@ function LinearAlgebra.rank( r = 0 * dim(first(allunits(sectortype(t)))) dim(t) == 0 && return r S = LinearAlgebra.svdvals(t) - tol = max(atol, rtol * maximum(first, values(S))) + tol = max(atol, rtol * maximum(parent(S))) for (c, b) in pairs(S) if !isempty(b) r += dim(c) * count(>(tol), b) @@ -317,8 +308,8 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2) return zero(real(float(scalartype(t)))) end S = LinearAlgebra.svdvals(t) - maxS = maximum(first, values(S)) - minS = minimum(last, values(S)) + maxS = maximum(parent(S)) + minS = minimum(parent(S)) return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS) else throw(ArgumentError("cond currently only defined for p=2")) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 452072a87..c6a441a12 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -198,6 +198,15 @@ TensorMap(data::AbstractArray, codom::TensorSpace, dom::TensorSpace; kwargs...) Tensor(data::AbstractArray, codom::TensorSpace; kwargs...) = TensorMap(data, codom ← one(codom); kwargs...) +function project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: DenseVector{T}} + t = TensorMapWithStorage{T, A}(undef, V) + t = project_symmetric!(t, data) + # verify result + isapprox(reshape(data, dims(t)), convert(Array, t); atol = tol) || + throw(ArgumentError("Data has non-zero elements at incompatible positions")) + return t +end + function TensorMapWithStorage{T, A}( data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data))))) ) where {T, A} @@ -209,15 +218,7 @@ function TensorMapWithStorage{T, A}( sectortype(V) === Trivial && return tensormaptype(spacetype(V), numout(V), numin(V), A)(reshape(data, length(data)), V) - # do projection - t = TensorMapWithStorage{T, A}(undef, V) - t = project_symmetric!(t, data) - - # verify result - isapprox(reshape(data, dims(t)), convert(Array, t); atol = tol) || - throw(ArgumentError("Data has non-zero elements at incompatible positions")) - - return t + return project_symmetric_and_check(T, A, data, V; tol) end TensorMapWithStorage{T, A}(data::AbstractArray, codom::TensorSpace, dom::TensorSpace; kwargs...) where {T, A} = TensorMapWithStorage{T, A}(data, codom ← dom; kwargs...) @@ -315,11 +316,14 @@ end for randf in (:rand, :randn, :randexp, :randisometry) _docstr = """ - $randf([rng=default_rng()], [T=Float64], codomain::ProductSpace{S,N₁}, + $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t - $randf([rng=default_rng()], [T=Float64], codomain ← domain) -> t + $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t Generate a tensor `t` with entries generated by `$randf`. + The type `TorA` can be used to control the element type and + data type generated. For example, if `TorA` is a `SparseVector{ComplexF32}`, + then the final output `TensorMap` will have that as its storage type. See also [`Random.$(randf)!`](@ref). """ @@ -348,25 +352,25 @@ for randf in (:rand, :randn, :randexp, :randisometry) return $randfun(codomain ← domain) end function $randfun( - ::Type{T}, codomain::TensorSpace{S}, domain::TensorSpace{S} - ) where {T, S <: IndexSpace} - return $randfun(T, codomain ← domain) + ::Type{TorA}, codomain::TensorSpace{S}, domain::TensorSpace{S} + ) where {TorA, S <: IndexSpace} + return $randfun(TorA, codomain ← domain) end function $randfun( - rng::Random.AbstractRNG, ::Type{T}, codomain::TensorSpace{S}, domain::TensorSpace{S} - ) where {T, S <: IndexSpace} - return $randfun(rng, T, codomain ← domain) + rng::Random.AbstractRNG, ::Type{TorA}, codomain::TensorSpace{S}, domain::TensorSpace{S} + ) where {TorA, S <: IndexSpace} + return $randfun(rng, TorA, codomain ← domain) end # accepting single `TensorSpace` $randfun(codomain::TensorSpace) = $randfun(codomain ← one(codomain)) - function $randfun(::Type{T}, codomain::TensorSpace) where {T} - return $randfun(T, codomain ← one(codomain)) + function $randfun(::Type{TorA}, codomain::TensorSpace) where {TorA} + return $randfun(TorA, codomain ← one(codomain)) end function $randfun( - rng::Random.AbstractRNG, ::Type{T}, codomain::TensorSpace - ) where {T} - return $randfun(rng, T, codomain ← one(domain)) + rng::Random.AbstractRNG, ::Type{TorA}, codomain::TensorSpace + ) where {TorA} + return $randfun(rng, TorA, codomain ← one(domain)) end # filling in default eltype @@ -376,16 +380,16 @@ for randf in (:rand, :randn, :randexp, :randisometry) end # filling in default rng - function $randfun(::Type{T}, V::TensorMapSpace) where {T} - return $randfun(Random.default_rng(), T, V) + function $randfun(::Type{TorA}, V::TensorMapSpace) where {TorA} + return $randfun(Random.default_rng(), TorA, V) end $randfun!(t::AbstractTensorMap) = $randfun!(Random.default_rng(), t) # implementation function $randfun( - rng::Random.AbstractRNG, ::Type{T}, V::TensorMapSpace - ) where {T} - t = TensorMap{T}(undef, V) + rng::Random.AbstractRNG, ::Type{TorA}, V::TensorMapSpace + ) where {TorA} + t = tensormaptype(spacetype(V), numout(V), numin(V), TorA)(undef, V) $randfun!(rng, t) return t end @@ -405,6 +409,10 @@ Base.copy(t::TensorMap) = typeof(t)(copy(t.data), t.space) # Conversion between TensorMap and Dict, for read and write purpose #------------------------------------------------------------------ +# We want to store the block data using simple data types, +# rather tha reshaped views or some other wrapped array type. +# Since this method is meant for storing data on disk, we can +# freely collect data to the CPU function Base.convert(::Type{Dict}, t::AbstractTensorMap) d = Dict{Symbol, Any}() d[:codomain] = repr(codomain(t)) @@ -521,6 +529,11 @@ function Base.convert(::Type{TensorMap}, t::AbstractTensorMap) return copy!(TensorMap{scalartype(t)}(undef, space(t)), t) end +function Base.convert(::Type{TensorMapWithStorage{T, A}}, t::TensorMap) where {T, A} + d_data = convert(A, t.data) + return TensorMapWithStorage{T, A}(d_data, space(t)) +end + function Base.convert( TT::Type{TensorMap{T, S, N₁, N₂, A}}, t::AbstractTensorMap{<:Any, S, N₁, N₂} ) where {T, S, N₁, N₂, A} @@ -535,7 +548,7 @@ end function Base.promote_rule( ::Type{<:TT₁}, ::Type{<:TT₂} ) where {S, N₁, N₂, TT₁ <: TensorMap{<:Any, S, N₁, N₂}, TT₂ <: TensorMap{<:Any, S, N₁, N₂}} - A = VectorInterface.promote_add(storagetype(TT₁), storagetype(TT₂)) - T = scalartype(A) - return TensorMap{T, S, N₁, N₂, A} + T = VectorInterface.promote_add(scalartype(TT₁), scalartype(TT₂)) + A = promote_storagetype(similarstoragetype(TT₁, T), similarstoragetype(TT₂, T)) + return tensormaptype(S, N₁, N₂, A) end diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl new file mode 100644 index 000000000..678d7fc2a --- /dev/null +++ b/test/cuda/tensors.jl @@ -0,0 +1,604 @@ +using Adapt, CUDA, cuTENSOR +using Test, TestExtras +using TensorKit, Combinatorics +ad = adapt(Array) +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const curand = getglobal(CUDAExt, :curand) +const curandn = getglobal(CUDAExt, :curandn) + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("CUDA Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + # test default pass-throughs + for f in (CUDA.zeros, CUDA.ones, curand, curandn) + t = @constinferred f(W) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} + end + for f in (rand, randn) + t = @constinferred f(CuVector{Float64, CUDA.DeviceMemory}, W) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} + end + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred CUDA.zeros(T, W) + CUDA.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, CuVector{T, CUDA.DeviceMemory}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Conversion to/from host" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + h_t = @constinferred rand(T, W) + t1 = convert(CuTensorMap{T}, h_t) + @test collect(t1.data) == h_t.data + @test space(t1) == space(h_t) + @test scalartype(t1) == T + @test codomain(t1) == W + @test space(t1) == (W ← one(W)) + @test domain(t1) == one(W) + t2 = CuTensorMap(h_t) + @test collect(t2.data) == h_t.data + @test space(t2) == space(h_t) + @test scalartype(t2) == T + @test codomain(t2) == W + @test space(t2) == (W ← one(W)) + @test domain(t2) == one(W) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + d = convert(Dict, t) + @test collect(t) == convert(TensorMap, d) + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + @test norm(t + t, p) ≈ 2 * norm(t, p) + @test norm(t) ≈ norm(t') + + t2 = @constinferred rand!(similar(t)) + β = rand(T) + #@test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) # broken for Irrep[CU₁] + @test dot(β * t2, α * t) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuVector{T}, V2 ⊗ V1, V1 ⊗ V2)) + CUDA.@allowscalar begin + @test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1)) + w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuVector{T, CUDA.DeviceMemory}, V1) + @test w * w' == (w * w')^2 + end + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via CPU" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = CUDA.rand(T, W) + t2 = @constinferred CUDA.rand!(similar(t)) + α = rand(T) + @test norm(t, 2) ≈ norm(collect(t), 2) + @test dot(t2, t) ≈ dot(collect(t2), collect(t)) + @test collect(α * t) ≈ α * collect(t) + @test collect(t + t) ≈ 2 * collect(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred CUDA.randn(T, W, W) + + tr = @constinferred real(t) + @test scalartype(tr) <: Real + @test real(collect(t)) == collect(tr) + @test storagetype(tr) == CuVector{real(T), CUDA.DeviceMemory} + + ti = @constinferred imag(t) + @test scalartype(ti) <: Real + @test imag(collect(t)) == collect(ti) + @test storagetype(ti) == CuVector{real(T), CUDA.DeviceMemory} + + tc = @inferred complex(t) + @test scalartype(tc) <: Complex + @test complex(collect(t)) == collect(tc) + @test storagetype(tc) == CuVector{complex(T), CUDA.DeviceMemory} + + tc2 = @inferred complex(tr, ti) + @test tc2 ≈ tc + @test storagetype(tc2) == CuVector{complex(T), CUDA.DeviceMemory} + end + end + end + @timedtestset "Tensor conversion" begin # TODO adjoint conversion methods don't work yet + W = V1 ⊗ V2 + t = @constinferred CUDA.randn(W ← W) + @test typeof(convert(TensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + #=@timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = CUDA.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + # TODO find a way to use CUDA here + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + t′ = CUDA.randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + CUDA.@allowscalar begin + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + end + + CUDA.@allowscalar begin + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via CPU" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + a = convert(Array, collect(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = CUDA.@allowscalar permute(t, (p1, p2)) + a2 = convert(Array, collect(t2)) + @test a2 ≈ permutedims(a, (p1..., p2...)) + @test convert(Array, collect(transpose(t2))) ≈ + permutedims(a2, (5, 4, 3, 2, 1)) + end + + t3 = CUDA.@allowscalar repartition(t, k) + a3 = convert(Array, collect(t3)) + @test a3 ≈ permutedims( + a, (ntuple(identity, k)..., reverse(ntuple(i -> i + k, 5 - k))...) + ) + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + CUDA.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + CUDA.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + CUDA.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + end + @test ta ≈ tb + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via CPU" begin + dA1 = CUDA.randn(ComplexF64, V1' * V2', V3') + dA2 = CUDA.randn(ComplexF64, V3 * V4, V5) + drhoL = CUDA.randn(ComplexF64, V1, V1) + drhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor + dH = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * + dA2[b, t2, c'] * drhoR[c', c] * + dH[s1, s2, t1, t2] + @tensor hHrA12[a, s1, s2, c] := collect(drhoL)[a, a'] * conj(collect(dA1)[a', t1, b]) * + collect(dA2)[b, t2, c'] * collect(drhoR)[c', c] * + collect(dH)[s1, s2, t1, t2] + @test collect(dHrA12) ≈ hHrA12 + end + end + @timedtestset "Index flipping: test flipping inverse" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + for i in 1:4 + CUDA.@allowscalar begin + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #=@timedtestset "Index flipping: test via explicit flip" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) + + CUDA.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end + @timedtestset "Index flipping: test via contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + CUDA.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end=# # TODO + @timedtestset "Multiplication of isometries: test properties" begin + W2 = V4 ⊗ V5 + W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1)) + for T in (Float64, ComplexF64) + t1 = randisometry(CuMatrix{T}, W1, W2) + t2 = randisometry(CuMatrix{T}, W2 ← W2) + @test isisometric(t1) + @test isunitary(t2) + P = t1 * t1' + @test P * P ≈ P + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + @test one(t1) / t1 ≈ pinv(t1) + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + tp = pinv(t) * t + @test tp ≈ tp * tp + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via CPU" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + ht1 = collect(t1) + ht2 = collect(t2) + ht = collect(t) + @test collect(t1 * t) ≈ ht1 * ht + @test collect(t1' * t) ≈ ht1' * ht + @test collect(t2 * t') ≈ ht2 * ht' + @test collect(t2' * t') ≈ ht2' * ht' + + @test collect(inv(t1)) ≈ inv(ht1) + @test collect(pinv(t)) ≈ pinv(ht) + + if T == Float32 || T == ComplexF32 + continue + end + + @test collect(t1 \ t) ≈ ht1 \ ht + @test collect(t1' \ t) ≈ ht1' \ ht + @test collect(t2 \ t') ≈ ht2 \ ht' + @test collect(t2' \ t') ≈ ht2' \ ht' + + @test collect(t2 / t) ≈ ht2 / ht + @test collect(t2' / t) ≈ ht2' / ht + @test collect(t1 / t') ≈ ht1 / ht' + @test collect(t1' / t') ≈ ht1' / ht' + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = CUDA.randn(T, W, W) + s = dim(W) + @test (@constinferred sqrt(t))^2 ≈ t + #@test collect(sqrt(t)) ≈ sqrt(collect(t)) # schur not supported for CuArray + + expt = @constinferred exp(t) + @test collect(expt) ≈ exp(collect(t)) + + # log doesn't work on CUDA yet + @test exp(@constinferred log(expt)) ≈ expt + @test collect(log(expt)) ≈ log(collect(expt)) + + #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t)=# # TODO in CUDA + + #=t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8=# + # TODO in CUDA + end + end + end + # Sylvester not defined for CUDA + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] + end + end + end + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = CUDA.rand(T, W2, W1 ⊗ W1') + CUDA.@allowscalar begin + t = @constinferred (t1 ⊠ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 854991a7f..9513378db 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,6 @@ # ------------ using ArgParse: ArgParse using SafeTestsets: @safetestset -# using CUDA: CUDA -# using AMDGPU: AMDGPU function parse_commandline(args = ARGS) s = ArgParse.ArgParseSettings() @@ -49,9 +47,9 @@ istestfile(fn) = endswith(fn, ".jl") && !contains(fn, "setup") # handle GPU cases separately if group == "cuda" - # CUDA.functional() || continue - elseif group == "amd" - # AMDGPU.functional() || continue + using CUDA + CUDA.functional() || continue + @time include("cuda/tensors.jl") elseif is_buildkite continue end From b174787b3f2dade736ecddd45bd7c1d86f287a20 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 18 Dec 2025 06:09:00 -0500 Subject: [PATCH 02/19] More small fixes --- test/cuda/tensors.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 678d7fc2a..3a133c9cf 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -155,9 +155,9 @@ for V in spacelist i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) i2 = @constinferred(isomorphism(CuVector{T}, V2 ⊗ V1, V1 ⊗ V2)) CUDA.@allowscalar begin - @test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ V2)) @test i2 * i1 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1)) - w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + w = @constinferred(isometry(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) @test dim(w) == 2 * dim(V1 ← V1) @test w' * w == id(CuVector{T, CUDA.DeviceMemory}, V1) @test w * w' == (w * w')^2 @@ -353,7 +353,7 @@ for V in spacelist end @test ta ≈ tb end - if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @timedtestset "Tensor contraction: test via CPU" begin dA1 = CUDA.randn(ComplexF64, V1' * V2', V3') dA2 = CUDA.randn(ComplexF64, V3 * V4, V5) @@ -368,7 +368,7 @@ for V in spacelist collect(dH)[s1, s2, t1, t2] @test collect(dHrA12) ≈ hHrA12 end - end + end=# # doesn't yet work because of AdjointTensor @timedtestset "Index flipping: test flipping inverse" begin t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) for i in 1:4 @@ -478,15 +478,15 @@ for V in spacelist for T in (Float64, ComplexF64) t = CUDA.randn(T, W, W) s = dim(W) - @test (@constinferred sqrt(t))^2 ≈ t - #@test collect(sqrt(t)) ≈ sqrt(collect(t)) # schur not supported for CuArray + @test_broken (@constinferred sqrt(t))^2 ≈ t + @test_broken collect(sqrt(t)) ≈ sqrt(collect(t)) expt = @constinferred exp(t) - @test collect(expt) ≈ exp(collect(t)) + @test_broken collect(expt) ≈ exp(collect(t)) - # log doesn't work on CUDA yet - @test exp(@constinferred log(expt)) ≈ expt - @test collect(log(expt)) ≈ log(collect(expt)) + # log doesn't work on CUDA yet (scalar indexing) + #@test exp(@constinferred log(expt)) ≈ expt + #@test collect(log(expt)) ≈ log(collect(expt)) #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ id(storagetype(t), W) From 7da48358a184667ecc4e1768f5025b55aba50bee Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 18 Dec 2025 08:59:01 -0500 Subject: [PATCH 03/19] move collect in --- ext/TensorKitCUDAExt/cutensormap.jl | 4 ---- src/tensors/tensor.jl | 4 ++++ test/cuda/tensors.jl | 28 ++++++++++++++-------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 5ae602cfa..9496a74c1 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -7,10 +7,6 @@ function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) end -function Base.collect(t::CuTensorMap{T}) where {T} - return convert(TensorKit.TensorMapWithStorage{T, Vector{T}}, t) -end - # project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}} h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index c6a441a12..0f6b2f75d 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -403,6 +403,10 @@ for randf in (:rand, :randn, :randexp, :randisometry) end end +# Collecting arbitrary TensorMaps +#----------------------------- +Base.collect(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t) + # Efficient copy constructors #----------------------------- Base.copy(t::TensorMap) = typeof(t)(copy(t.data), t.space) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 3a133c9cf..9ce69fdfe 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -152,16 +152,14 @@ for V in spacelist @test dot(t2, t) ≈ conj(dot(t2', t')) @test dot(t2, t) ≈ dot(t', t2') - i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) - i2 = @constinferred(isomorphism(CuVector{T}, V2 ⊗ V1, V1 ⊗ V2)) - CUDA.@allowscalar begin - @test i1 * i2 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ V2)) - @test i2 * i1 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1)) - w = @constinferred(isometry(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) - @test dim(w) == 2 * dim(V1 ← V1) - @test w' * w == id(CuVector{T, CUDA.DeviceMemory}, V1) - @test w * w' == (w * w')^2 - end + i1 = @constinferred(isomorphism(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(CuVector{T, CUDA.DeviceMemory}, V2 ⊗ V1)) + w = @constinferred(isometry(CuVector{T, CUDA.DeviceMemory}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuVector{T, CUDA.DeviceMemory}, V1) + @test w * w' == (w * w')^2 end end @timedtestset "Trivial space insertion and removal" begin @@ -238,11 +236,11 @@ for V in spacelist @timedtestset "Tensor conversion" begin # TODO adjoint conversion methods don't work yet W = V1 ⊗ V2 t = @constinferred CUDA.randn(W ← W) - @test typeof(convert(TensorMap, t')) == typeof(t) + #@test typeof(convert(TensorMap, t')) == typeof(t) # TODO Adjoint not supported yet tc = complex(t) @test convert(typeof(tc), t) == tc @test typeof(convert(typeof(tc), t)) == typeof(tc) - @test typeof(convert(typeof(tc), t')) == typeof(tc) + # @test typeof(convert(typeof(tc), t')) == typeof(tc) # TODO Adjoint not supported yet @test Base.promote_typeof(t, tc) == typeof(tc) @test Base.promote_typeof(tc, t) == typeof(tc + t) end @@ -294,8 +292,10 @@ for V in spacelist t2 = CUDA.@allowscalar permute(t, (p1, p2)) a2 = convert(Array, collect(t2)) @test a2 ≈ permutedims(a, (p1..., p2...)) - @test convert(Array, collect(transpose(t2))) ≈ - permutedims(a2, (5, 4, 3, 2, 1)) + CUDA.@allowscalar begin + @test convert(Array, collect(transpose(t2))) ≈ + permutedims(a2, (5, 4, 3, 2, 1)) + end end t3 = CUDA.@allowscalar repartition(t, k) From e4f6d824907147157488ff4f4ee14ec5e50294e7 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 18 Dec 2025 09:47:33 -0500 Subject: [PATCH 04/19] More test via CPU --- src/tensors/tensor.jl | 2 +- test/cuda/tensors.jl | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 0f6b2f75d..a7364435d 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -403,7 +403,7 @@ for randf in (:rand, :randn, :randexp, :randisometry) end end -# Collecting arbitrary TensorMaps +# Collecting arbitrary TensorMaps #----------------------------- Base.collect(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 9ce69fdfe..ac8fe36d1 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -289,20 +289,14 @@ for V in spacelist for p in permutations(1:5) p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) - t2 = CUDA.@allowscalar permute(t, (p1, p2)) - a2 = convert(Array, collect(t2)) - @test a2 ≈ permutedims(a, (p1..., p2...)) - CUDA.@allowscalar begin - @test convert(Array, collect(transpose(t2))) ≈ - permutedims(a2, (5, 4, 3, 2, 1)) - end + dt2 = CUDA.@allowscalar permute(t, (p1, p2)) + ht2 = permute(collect(t), (p1, p2)) + @test ht2 == collect(dt2) end - t3 = CUDA.@allowscalar repartition(t, k) - a3 = convert(Array, collect(t3)) - @test a3 ≈ permutedims( - a, (ntuple(identity, k)..., reverse(ntuple(i -> i + k, 5 - k))...) - ) + dt3 = CUDA.@allowscalar repartition(t, k) + ht3 = repartition(collect(t), k) + @test ht3 == collect(dt3) end end end From a384cdc03582e922bc41bd1cef412678ebbd3594 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 12:06:51 +0100 Subject: [PATCH 05/19] Update example --- src/tensors/tensor.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index a7364435d..8bd110cc4 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -322,8 +322,9 @@ for randf in (:rand, :randn, :randexp, :randisometry) Generate a tensor `t` with entries generated by `$randf`. The type `TorA` can be used to control the element type and - data type generated. For example, if `TorA` is a `SparseVector{ComplexF32}`, - then the final output `TensorMap` will have that as its storage type. + data type generated. For example, if `TorA` is a `CuVector{ComplexF32}` + or `ROCVector{Float64}`, then the final output `TensorMap` will have + that as its storage type. See also [`Random.$(randf)!`](@ref). """ From 084f2ee04588a85f104297261c274400cf93b1a5 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 06:57:14 -0500 Subject: [PATCH 06/19] Comments --- ext/TensorKitCUDAExt/TensorKitCUDAExt.jl | 2 +- ext/TensorKitCUDAExt/cutensormap.jl | 13 +++++++++++-- src/tensors/linalg.jl | 4 ++-- test/cuda/tensors.jl | 11 +++++++++++ 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl index 0562acf61..417970a02 100644 --- a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -12,7 +12,7 @@ using TensorKit.Factorizations: AbstractAlgorithm using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check import TensorKit: randisometry, rand, randn -using TensorKit.MatrixAlgebraKit +using TensorKit: MatrixAlgebraKit using Random diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 9496a74c1..e17547789 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -84,6 +84,13 @@ for randfun in (:curand, :curandn) $randfun!(rng, t) return t end + + function $randfun!(rng::Random.AbstractRNG, t::CuTensorMap) + for (_, b) in blocks(t) + $randfun!(rng, b) + end + return t + end end end @@ -112,7 +119,7 @@ function LinearAlgebra.isposdef(t::CuTensorMap) InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false for (c, b) in blocks(t) # do our own hermitian check - isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) isherm || return false isposdef(Hermitian(b)) || return false end @@ -135,6 +142,7 @@ end function TensorKit.exp!(t::CuTensorMap) domain(t) == codomain(t) || error("Exponential of a tensor only exist when domain == codomain.") + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is only supported on hermitian CUDA tensors")) for (c, b) in blocks(t) copy!(b, parent(Base.exp(Hermitian(b)))) end @@ -146,7 +154,8 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) sf = string(f) @eval function Base.$f(t::CuTensorMap) domain(t) == codomain(t) || - throw(SpaceMismatch("`$($sf)` of a tensor only exist when domain == codomain")) + throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain")) + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is only supported on hermitian CUDA tensors")) T = complex(float(scalartype(t))) tf = similar(t, T) for (c, b) in blocks(t) diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index caae1d488..cc19ceaef 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -289,7 +289,7 @@ function LinearAlgebra.rank( ) r = 0 * dim(first(allunits(sectortype(t)))) dim(t) == 0 && return r - S = LinearAlgebra.svdvals(t) + S = svd_vals(t) tol = max(atol, rtol * maximum(parent(S))) for (c, b) in pairs(S) if !isempty(b) @@ -307,7 +307,7 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2) throw(SpaceMismatch("`cond` requires domain and codomain to be the same")) return zero(real(float(scalartype(t)))) end - S = LinearAlgebra.svdvals(t) + S = svd_vals(t) maxS = maximum(parent(S)) minS = minimum(parent(S)) return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index ac8fe36d1..67981b5ae 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -7,6 +7,8 @@ const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) const curand = getglobal(CUDAExt, :curand) const curandn = getglobal(CUDAExt, :curandn) +const curand! = getglobal(CUDAExt, :curand!) +const curandn! = getglobal(CUDAExt, :curandn!) @isdefined(TestSetup) || include("../setup.jl") using .TestSetup @@ -61,6 +63,15 @@ for V in spacelist @test domain(t) == one(W) @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} end + for f! in (curand!, curandn!) + t = @constinferred CUDA.zeros(W) + f!(t) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} + end for T in (Int, Float32, Float64, ComplexF32, ComplexF64) t = @constinferred CUDA.zeros(T, W) CUDA.@allowscalar begin From bd1966c0ff93b3dd318887e6eff0e84457bc84fd Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 07:02:31 -0500 Subject: [PATCH 07/19] Fixup diagonal --- src/tensors/diagonal.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl index 356f5dbf4..e35c2ffdd 100644 --- a/src/tensors/diagonal.jl +++ b/src/tensors/diagonal.jl @@ -78,10 +78,12 @@ function DiagonalTensorMap(t::AbstractTensorMap{T, S, 1, 1}) where {T, S} return d end -Base.similar(d::DiagonalTensorMap) = DiagonalTensorMap(similar(d.data), d.domain) -function Base.similar(d::DiagonalTensorMap, ::Type{T}) where {T <: Number} - return DiagonalTensorMap(similar(d.data, T), d.domain) -end +Base.similar(d::DiagonalTensorMap) = similar_diagonal(d) +Base.similar(d::DiagonalTensorMap, ::Type{T}) where {T} = similar_diagonal(d, T) + +similar_diagonal(d::DiagonalTensorMap) = DiagonalTensorMap(similar(d.data), d.domain) +similar_diagonal(d::DiagonalTensorMap, ::Type{T}) where {T <: Number} = + DiagonalTensorMap(similar(d.data, T), d.domain) # TODO: more constructors needed? From 832ed15681fdba4c384c2fe22cda253182ec323f Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 10:08:59 -0500 Subject: [PATCH 08/19] More comments --- ext/TensorKitCUDAExt/cutensormap.jl | 4 +- src/tensors/linalg.jl | 4 +- src/tensors/tensor.jl | 5 +- test/cuda/tensors.jl | 78 ++++++++++++++--------------- 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index e17547789..b4aa7f7a2 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -142,7 +142,7 @@ end function TensorKit.exp!(t::CuTensorMap) domain(t) == codomain(t) || error("Exponential of a tensor only exist when domain == codomain.") - !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is only supported on hermitian CUDA tensors")) + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian CUDA tensors")) for (c, b) in blocks(t) copy!(b, parent(Base.exp(Hermitian(b)))) end @@ -155,7 +155,7 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) @eval function Base.$f(t::CuTensorMap) domain(t) == codomain(t) || throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain")) - !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is only supported on hermitian CUDA tensors")) + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian CUDA tensors")) T = complex(float(scalartype(t))) tf = similar(t, T) for (c, b) in blocks(t) diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index cc19ceaef..4b2c7d4ee 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -289,7 +289,7 @@ function LinearAlgebra.rank( ) r = 0 * dim(first(allunits(sectortype(t)))) dim(t) == 0 && return r - S = svd_vals(t) + S = MatrixAlgebraKit.svd_vals(t) tol = max(atol, rtol * maximum(parent(S))) for (c, b) in pairs(S) if !isempty(b) @@ -307,7 +307,7 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2) throw(SpaceMismatch("`cond` requires domain and codomain to be the same")) return zero(real(float(scalartype(t)))) end - S = svd_vals(t) + S = MatrixAlgebraKit.svd_vals(t) maxS = maximum(parent(S)) minS = minimum(parent(S)) return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 8bd110cc4..d8118bcd7 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -404,9 +404,10 @@ for randf in (:rand, :randn, :randexp, :randisometry) end end -# Collecting arbitrary TensorMaps +# Moving arbitrary TensorMaps to CPU #----------------------------- -Base.collect(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t) +to_cpu(t::TensorMapWithStorage{T, Vector{T}}) where {T} = t # no op +to_cpu(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t) # Efficient copy constructors #----------------------------- diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 67981b5ae..f453bcda2 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -120,7 +120,7 @@ for V in spacelist for T in (Int, Float32, ComplexF64) t = @constinferred CUDA.rand(T, W) d = convert(Dict, t) - @test collect(t) == convert(TensorMap, d) + @test TensorKit.to_cpu(t) == convert(TensorMap, d) end end @timedtestset "Basic linear algebra" begin @@ -212,10 +212,10 @@ for V in spacelist t = CUDA.rand(T, W) t2 = @constinferred CUDA.rand!(similar(t)) α = rand(T) - @test norm(t, 2) ≈ norm(collect(t), 2) - @test dot(t2, t) ≈ dot(collect(t2), collect(t)) - @test collect(α * t) ≈ α * collect(t) - @test collect(t + t) ≈ 2 * collect(t) + @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) + @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) + @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t) end end @timedtestset "Real and imaginary parts" begin @@ -225,17 +225,17 @@ for V in spacelist tr = @constinferred real(t) @test scalartype(tr) <: Real - @test real(collect(t)) == collect(tr) + @test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr) @test storagetype(tr) == CuVector{real(T), CUDA.DeviceMemory} ti = @constinferred imag(t) @test scalartype(ti) <: Real - @test imag(collect(t)) == collect(ti) + @test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti) @test storagetype(ti) == CuVector{real(T), CUDA.DeviceMemory} tc = @inferred complex(t) @test scalartype(tc) <: Complex - @test complex(collect(t)) == collect(tc) + @test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc) @test storagetype(tc) == CuVector{complex(T), CUDA.DeviceMemory} tc2 = @inferred complex(tr, ti) @@ -295,19 +295,19 @@ for V in spacelist @timedtestset "Permutations: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 t = CUDA.rand(ComplexF64, W) - a = convert(Array, collect(t)) + a = convert(Array, TensorKit.to_cpu(t)) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) dt2 = CUDA.@allowscalar permute(t, (p1, p2)) - ht2 = permute(collect(t), (p1, p2)) - @test ht2 == collect(dt2) + ht2 = permute(TensorKit.to_cpu(t), (p1, p2)) + @test ht2 == TensorKit.to_cpu(dt2) end dt3 = CUDA.@allowscalar repartition(t, k) - ht3 = repartition(collect(t), k) - @test ht3 == collect(dt3) + ht3 = repartition(TensorKit.to_cpu(t), k) + @test ht3 == TensorKit.to_cpu(dt3) end end end @@ -368,10 +368,10 @@ for V in spacelist @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * dA2[b, t2, c'] * drhoR[c', c] * dH[s1, s2, t1, t2] - @tensor hHrA12[a, s1, s2, c] := collect(drhoL)[a, a'] * conj(collect(dA1)[a', t1, b]) * - collect(dA2)[b, t2, c'] * collect(drhoR)[c', c] * - collect(dH)[s1, s2, t1, t2] - @test collect(dHrA12) ≈ hHrA12 + @tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) * + TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] * + TensorKit.to_cpu(dH)[s1, s2, t1, t2] + @test TensorKit.to_cpu(dHrA12) ≈ hHrA12 end end=# # doesn't yet work because of AdjointTensor @timedtestset "Index flipping: test flipping inverse" begin @@ -450,48 +450,48 @@ for V in spacelist t1 = CUDA.rand(T, W1, W1) t2 = CUDA.rand(T, W2, W2) t = CUDA.rand(T, W1, W2) - ht1 = collect(t1) - ht2 = collect(t2) - ht = collect(t) - @test collect(t1 * t) ≈ ht1 * ht - @test collect(t1' * t) ≈ ht1' * ht - @test collect(t2 * t') ≈ ht2 * ht' - @test collect(t2' * t') ≈ ht2' * ht' + ht1 = TensorKit.to_cpu(t1) + ht2 = TensorKit.to_cpu(t2) + ht = TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht + @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht + @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' + @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' - @test collect(inv(t1)) ≈ inv(ht1) - @test collect(pinv(t)) ≈ pinv(ht) + @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) + @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) if T == Float32 || T == ComplexF32 continue end - @test collect(t1 \ t) ≈ ht1 \ ht - @test collect(t1' \ t) ≈ ht1' \ ht - @test collect(t2 \ t') ≈ ht2 \ ht' - @test collect(t2' \ t') ≈ ht2' \ ht' + @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht + @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht + @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' + @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' - @test collect(t2 / t) ≈ ht2 / ht - @test collect(t2' / t) ≈ ht2' / ht - @test collect(t1 / t') ≈ ht1 / ht' - @test collect(t1' / t') ≈ ht1' / ht' + @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht + @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht + @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' + @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' end end end if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) - @timedtestset "Tensor functions" begin + #=@timedtestset "Tensor functions" begin W = V1 ⊗ V2 for T in (Float64, ComplexF64) t = CUDA.randn(T, W, W) s = dim(W) @test_broken (@constinferred sqrt(t))^2 ≈ t - @test_broken collect(sqrt(t)) ≈ sqrt(collect(t)) + @test_broken TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) expt = @constinferred exp(t) - @test_broken collect(expt) ≈ exp(collect(t)) + @test_broken TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) # log doesn't work on CUDA yet (scalar indexing) #@test exp(@constinferred log(expt)) ≈ expt - #@test collect(log(expt)) ≈ log(collect(expt)) + #@test TensorKit.to_cpu(log(expt)) ≈ log(TensorKit.to_cpu(expt)) #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ id(storagetype(t), W) @@ -520,7 +520,7 @@ for V in spacelist @test coth(@constinferred acoth(t8)) ≈ t8=# # TODO in CUDA end - end + end=# end # Sylvester not defined for CUDA # @timedtestset "Sylvester equation" begin From 68b282a720e998d722125bf6df38dbe3e9aab261 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 18:07:59 +0100 Subject: [PATCH 09/19] Update test/cuda/tensors.jl Co-authored-by: Lukas Devos --- test/cuda/tensors.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index f453bcda2..1c8125ba2 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -8,7 +8,7 @@ const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) const curand = getglobal(CUDAExt, :curand) const curandn = getglobal(CUDAExt, :curandn) const curand! = getglobal(CUDAExt, :curand!) -const curandn! = getglobal(CUDAExt, :curandn!) +using CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! @isdefined(TestSetup) || include("../setup.jl") using .TestSetup From f188aed7a8a0b6a1b0bb63f6dc2ec322c2c82026 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 18:08:07 +0100 Subject: [PATCH 10/19] Update test/cuda/tensors.jl Co-authored-by: Lukas Devos --- test/cuda/tensors.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 1c8125ba2..ae979c01e 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -295,7 +295,6 @@ for V in spacelist @timedtestset "Permutations: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 t = CUDA.rand(ComplexF64, W) - a = convert(Array, TensorKit.to_cpu(t)) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) From 329afe6342e79bdd4430e0dea10a6f6aa078328c Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 18:08:17 +0100 Subject: [PATCH 11/19] Update test/cuda/tensors.jl Co-authored-by: Lukas Devos --- test/cuda/tensors.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index ae979c01e..c5863ac43 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -441,7 +441,6 @@ for V in spacelist @test tp ≈ tp * tp end end - if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @timedtestset "Multiplication and inverse: test via CPU" begin W1 = V1 ⊗ V2 ⊗ V3 W2 = V4 ⊗ V5 From e55f3380673cdb07480be5f053eb8e9ead85942b Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 19 Dec 2025 18:08:28 +0100 Subject: [PATCH 12/19] Update test/cuda/tensors.jl Co-authored-by: Lukas Devos --- test/cuda/tensors.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index c5863ac43..4ebf99c73 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -291,7 +291,7 @@ for V in spacelist end end end - if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + if BraidingStyle(I) isa SymmetricBraiding @timedtestset "Permutations: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 t = CUDA.rand(ComplexF64, W) From 61ba421d1bc0c52a17802e10815faa47cfe5ac52 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 22 Dec 2025 05:39:43 -0500 Subject: [PATCH 13/19] Fix bad end --- test/cuda/tensors.jl | 55 ++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 4ebf99c73..9f52161a8 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -441,38 +441,37 @@ for V in spacelist @test tp ≈ tp * tp end end - @timedtestset "Multiplication and inverse: test via CPU" begin - W1 = V1 ⊗ V2 ⊗ V3 - W2 = V4 ⊗ V5 - for T in (Float32, Float64, ComplexF32, ComplexF64) - t1 = CUDA.rand(T, W1, W1) - t2 = CUDA.rand(T, W2, W2) - t = CUDA.rand(T, W1, W2) - ht1 = TensorKit.to_cpu(t1) - ht2 = TensorKit.to_cpu(t2) - ht = TensorKit.to_cpu(t) - @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht - @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht - @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' - @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' + @timedtestset "Multiplication and inverse: test via CPU" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + ht1 = TensorKit.to_cpu(t1) + ht2 = TensorKit.to_cpu(t2) + ht = TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht + @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht + @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' + @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' - @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) - @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) + @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) + @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) - if T == Float32 || T == ComplexF32 - continue - end + if T == Float32 || T == ComplexF32 + continue + end - @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht - @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht - @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' - @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' + @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht + @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht + @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' + @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' - @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht - @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht - @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' - @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' - end + @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht + @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht + @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' + @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' end end if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) From 1c75de55adfa3847bf53af7fb9cfb294eaf4dcc3 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 22 Dec 2025 05:53:44 -0500 Subject: [PATCH 14/19] Make format happy --- src/tensors/tensor.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index d8118bcd7..69467963c 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -316,15 +316,15 @@ end for randf in (:rand, :randn, :randexp, :randisometry) _docstr = """ - $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, + $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t - $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t + $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t Generate a tensor `t` with entries generated by `$randf`. - The type `TorA` can be used to control the element type and - data type generated. For example, if `TorA` is a `CuVector{ComplexF32}` - or `ROCVector{Float64}`, then the final output `TensorMap` will have - that as its storage type. + The type `TorA` can be used to control the element type and + data type generated. For example, if `TorA` is a `CuVector{ComplexF32}` + or `ROCVector{Float64}`, then the final output `TensorMap` will have + that as its storage type. See also [`Random.$(randf)!`](@ref). """ From 3949ec000b79cadbd6aed5496f5df229c582535f Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 22 Dec 2025 14:27:26 -0500 Subject: [PATCH 15/19] Small updates --- Project.toml | 3 +++ test/cuda/tensors.jl | 16 ++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index c0c05603f..41d8a8002 100644 --- a/Project.toml +++ b/Project.toml @@ -77,3 +77,6 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] + +[sources] +MatrixAlgebraKit = {url="https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev="main"} diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 9f52161a8..8a89a8ad8 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -475,20 +475,20 @@ for V in spacelist end end if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) - #=@timedtestset "Tensor functions" begin + @timedtestset "Tensor functions" begin W = V1 ⊗ V2 for T in (Float64, ComplexF64) - t = CUDA.randn(T, W, W) + t = project_hermitian!(CUDA.randn(T, W, W)) s = dim(W) - @test_broken (@constinferred sqrt(t))^2 ≈ t - @test_broken TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) + #@test (@constinferred sqrt(t))^2 ≈ t + #@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) expt = @constinferred exp(t) - @test_broken TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) + @test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) # log doesn't work on CUDA yet (scalar indexing) - #@test exp(@constinferred log(expt)) ≈ expt - #@test TensorKit.to_cpu(log(expt)) ≈ log(TensorKit.to_cpu(expt)) + #@test exp(@constinferred log(project_hermitian!(expt))) ≈ expt + #@test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt)) #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ id(storagetype(t), W) @@ -517,7 +517,7 @@ for V in spacelist @test coth(@constinferred acoth(t8)) ≈ t8=# # TODO in CUDA end - end=# + end end # Sylvester not defined for CUDA # @timedtestset "Sylvester equation" begin From 7172691443dd2bb01bb14d564282e6dc025da15e Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sun, 28 Dec 2025 15:43:03 +0100 Subject: [PATCH 16/19] Update Project.toml --- Project.toml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 41d8a8002..f56b35ba7 100644 --- a/Project.toml +++ b/Project.toml @@ -40,7 +40,7 @@ FiniteDifferences = "0.12" GPUArrays = "11.3.1" LRUCache = "1.0.2" LinearAlgebra = "1" -MatrixAlgebraKit = "0.6.0" +MatrixAlgebraKit = "0.6.1" OhMyThreads = "0.8.0" Printf = "1" Random = "1" @@ -77,6 +77,3 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] - -[sources] -MatrixAlgebraKit = {url="https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev="main"} From 5a9c8cdfaf3f21cc1310b8b15a06ac59efe933d2 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sun, 28 Dec 2025 18:14:18 +0100 Subject: [PATCH 17/19] Try to fix spacing --- src/tensors/tensor.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 69467963c..e60d4d936 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -316,15 +316,15 @@ end for randf in (:rand, :randn, :randexp, :randisometry) _docstr = """ - $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, - domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t - $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t + $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, + domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t + $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t Generate a tensor `t` with entries generated by `$randf`. - The type `TorA` can be used to control the element type and - data type generated. For example, if `TorA` is a `CuVector{ComplexF32}` - or `ROCVector{Float64}`, then the final output `TensorMap` will have - that as its storage type. + The type `TorA` can be used to control the element type and + data type generated. For example, if `TorA` is a `CuVector{ComplexF32}` + or `ROCVector{Float64}`, then the final output `TensorMap` will have + that as its storage type. See also [`Random.$(randf)!`](@ref). """ From 658235a1dd1843d5fb80b6f3264179b728ec65e3 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 31 Dec 2025 12:13:42 +0100 Subject: [PATCH 18/19] one more format attempt --- src/tensors/tensor.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index e60d4d936..d8118bcd7 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -317,7 +317,7 @@ end for randf in (:rand, :randn, :randexp, :randisometry) _docstr = """ $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, - domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t + domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t Generate a tensor `t` with entries generated by `$randf`. From 4d79f1445eccde9561c9445e16148c965fc3c1e8 Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Thu, 1 Jan 2026 13:48:08 +0100 Subject: [PATCH 19/19] two more format attempt --- src/tensors/tensor.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index d8118bcd7..300654632 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -319,7 +319,7 @@ for randf in (:rand, :randn, :randexp, :randisometry) $randf([rng=default_rng()], [TorA=Float64], codomain::ProductSpace{S,N₁}, domain::ProductSpace{S,N₂}) where {S,N₁,N₂,T} -> t $randf([rng=default_rng()], [TorA=Float64], codomain ← domain) -> t - + Generate a tensor `t` with entries generated by `$randf`. The type `TorA` can be used to control the element type and data type generated. For example, if `TorA` is a `CuVector{ComplexF32}`