Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions src/ArrowTypes/src/ArrowTypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ For a give type `T`, define it's "arrow type kind", or the general category of a
* [`ArrowTypes.MapKind`](@ref): any `AbstractDict`
* [`ArrowTypes.StructKind`](@ref): any `NamedTuple` or plain struct (mutable or otherwise)
* [`ArrowTypes.UnionKind`](@ref): any `Union`
* [`ArrowTypes.DictEncodedKind`](@ref): array types that implement the `DataAPI.refarray` interface
* [`ArrowTypes.DictEncodedKind`](@ref): array types that implement the `DataAPI.refpool` interface

The list of `ArrowKind`s listed above translate to different ways to physically store data as supported by the arrow data format.
See the docs for each for an idea of whether they might be an appropriate fit for a custom type.
Expand Down Expand Up @@ -404,17 +404,15 @@ concrete_or_concreteunion(T) =
function ToArrow(x::A) where {A}
S = eltype(A)
T = ArrowType(S)
if S === T && concrete_or_concreteunion(S)
fi = firstindex(x)
if S === T && concrete_or_concreteunion(S) && fi == 1
return x
elseif !concrete_or_concreteunion(T)
# arrow needs concrete types, so try to find a concrete common type, preferring unions
if isempty(x)
return Missing[]
end
T = typeof(toarrow(x[1]))
for i = 2:length(x)
@inbounds T = promoteunion(T, typeof(toarrow(x[i])))
end
T = mapreduce(typeof ∘ toarrow, promoteunion, x)
if T === Missing && concrete_or_concreteunion(S)
T = promoteunion(T, typeof(toarrow(default(S))))
end
Expand Down Expand Up @@ -442,6 +440,7 @@ function _convert(::Type{T}, x) where {T}
return convert(T, x)
end
end
Base.getindex(x::ToArrow{T}, i::Int) where {T} = _convert(T, toarrow(getindex(x.data, i)))
Base.getindex(x::ToArrow{T}, i::Int) where {T} =
_convert(T, toarrow(getindex(x.data, i + firstindex(x.data) - 1)))

end # module ArrowTypes
1 change: 1 addition & 0 deletions src/ArrowTypes/test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

[deps]
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
2 changes: 1 addition & 1 deletion src/ArrowTypes/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
# specific language governing permissions and limitations
# under the License.

using Test, ArrowTypes, UUIDs, Sockets
using Test, ArrowTypes, UUIDs, Sockets, OffsetArrays

include("tests.jl")
10 changes: 10 additions & 0 deletions src/ArrowTypes/test/tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,16 @@ end
@test eltype(x) == Union{Float64,String}
@test x == [1.0, 3.14, "hey"]

x = ArrowTypes.ToArrow(OffsetArray([1, 2, 3], -3:-1))
@test x isa ArrowTypes.ToArrow{Int,OffsetVector{Int,Vector{Int}}}
@test eltype(x) == Int
@test x == [1, 2, 3]

x = ArrowTypes.ToArrow(OffsetArray(Any[1, 3.14], -3:-2))
@test x isa ArrowTypes.ToArrow{Float64,OffsetVector{Any,Vector{Any}}}
@test eltype(x) == Float64
@test x == [1, 3.14]

@testset "respect non-missing concrete type" begin
struct DateTimeTZ
instant::Int64
Expand Down
24 changes: 3 additions & 21 deletions src/arraytypes/dictencoding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -220,29 +220,15 @@ function arrowvector(
if DataAPI.refarray(x) === x || DataAPI.refpool(x) === nothing
# need to encode ourselves
x = PooledArray(x; signed=true, compress=true)
inds = DataAPI.refarray(x)
inds = refa = DataAPI.refarray(x)
pool = DataAPI.refpool(x)
else
pool = DataAPI.refpool(x)
refa = DataAPI.refarray(x)
inds = copyto!(similar(Vector{signedtype(length(pool))}, length(refa)), refa)
end
# horrible hack? yes. better than taking CategoricalArrays dependency? also yes.
if typeof(pool).name.name == :CategoricalRefPool
if eltype(x) >: Missing
pool = vcat(missing, DataAPI.levels(x))
else
pool = DataAPI.levels(x)
for i = 1:length(inds)
@inbounds inds[i] -= 1
end
end
else
# adjust to "offset" instead of index
for i = 1:length(inds)
@inbounds inds[i] -= 1
end
end
# adjust to "offset" instead of index
inds .-= firstindex(refa)
data = arrowvector(
pool,
i,
Expand Down Expand Up @@ -278,11 +264,7 @@ function arrowvector(
)
deltas = eltype(x)[]
inds = Vector{ET}(undef, len)
categorical = typeof(x).name.name == :CategoricalArray
for (j, val) in enumerate(x)
if categorical
val = get(val)
end
@inbounds inds[j] = get!(pool, val) do
push!(deltas, val)
return length(pool)
Expand Down
2 changes: 2 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
Expand All @@ -40,6 +41,7 @@ DataAPI = "1"
DataFrames = "1"
FilePathsBase = "0.9"
JSON3 = "1"
OffsetArrays = "1"
PooledArrays = "1"
StructTypes = "1"
SentinelArrays = "1"
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ using CategoricalArrays
using DataAPI
using FilePathsBase
using DataFrames
using OffsetArrays
import Random: randstring

include(joinpath(dirname(pathof(ArrowTypes)), "../test/tests.jl"))
Expand Down
Loading