Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

- V2 NoCompressor writes: replace `append!` over the reinterpret view with bulk `resize!` + `copyto!` in the generic `zcompress!` fallback [#280](https://github.com/JuliaIO/Zarr.jl/pull/280)
Comment thread
lazarusA marked this conversation as resolved.
Outdated
- V2 NoCompressor reads: add bulk-copy `zuncompress!` method dispatched on `::NoCompressor` to bypass `copyto!(::Array, ::ReinterpretArray)`'s element-by-element walk [#280](https://github.com/JuliaIO/Zarr.jl/pull/280)
- V2 read+write chunk allocation: add `getchunkarray_undef` and skip the dead zero-fill of the chunk-shaped scratch buffer on full-overwrite paths [#280](https://github.com/JuliaIO/Zarr.jl/pull/280)
- Fix CondaPkg branch in CI, use release version instead [#273](https://github.com/JuliaIO/Zarr.jl/pull/273)
- Fix creation of on-disk arrays that do not fit in memory [#269](https://github.com/JuliaIO/Zarr.jl/pull/269)

Expand Down
21 changes: 17 additions & 4 deletions src/Compressors/Compressors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ getCompressor(::Nothing) = NoCompressor()
zcompress!(compressed,data,c,::Nothing) = zcompress!(compressed,data,c)
zuncompress!(data,compressed,c,::Nothing) = zuncompress!(data,compressed,c)

# Fallback definition of mutating form of compress and uncompress
function zcompress!(compressed, data, c)
empty!(compressed)
append!(compressed,zcompress(data, c))
# Bulk `resize!` + `copyto!` (not `append!`): avoids elementwise growth over `NoCompressor`'s view.
function zcompress!(compressed, data, c)
src = zcompress(data, c)
resize!(compressed, length(src))
copyto!(compressed, src)
end
zuncompress!(data, compressed, c) = copyto!(data, zuncompress(compressed, c, eltype(data)))

Expand Down Expand Up @@ -72,6 +73,18 @@ function zcompress(a, ::NoCompressor)
_reinterpret(UInt8,a)
end

# Fast path: bulk `unsafe_copyto!` avoids the elementwise `ReinterpretArray` copy of the fallback.
function zuncompress!(data::Array{T}, compressed::Vector{UInt8}, ::NoCompressor) where {T}
isbitstype(T) || return copyto!(data, _reinterpret(T, compressed))
n = sizeof(data)
n == length(compressed) || throw(DimensionMismatch(
"Encoded byte length $(length(compressed)) does not match output byte size $n"
))
GC.@preserve data compressed unsafe_copyto!(Ptr{UInt8}(pointer(data)),
pointer(compressed), n)
return data
end

JSON.lower(::NoCompressor) = nothing

compressortypes[nothing] = NoCompressor
29 changes: 23 additions & 6 deletions src/ZArray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,19 @@ _zero(::Type{<:Vector{T}}) where T = T[]
_zero(::Type{Char}) = Char(0)
getchunkarray(z::ZArray) = fill(_zero(eltype(z)), z.metadata.chunks)

# Same as `getchunkarray` but skips the zero/fill_value-fill. Use only when
# the caller guarantees the buffer will be fully overwritten before any read
# (e.g. decode-into or full-chunk-write paths).
#
# Falls back to the standard `getchunkarray` for `>:Missing` element types:
# the codec pipeline requires the underlying buffer to be the `isbits` inner
# of a `SenMissArray`, not an `Array{Union{Missing,T}}` directly (Blosc and
# friends reject non-isbits eltypes).
function getchunkarray_undef(z::ZArray{T}) where {T}
Missing <: T && return getchunkarray(z)
return Array{T}(undef, z.metadata.chunks)
end

maybeinner(a::Array) = a
maybeinner(a::SenMissArray) = a.x
resetbuffer!(fv,a::Array) = fv === nothing || fill!(a,fv)
Expand All @@ -172,9 +185,11 @@ function readblock!(aout::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::Cartes
output_base_offsets = map(i->first(i)-1,r.indices)
# Determines which chunks are affected
blockr = CartesianIndices(map(trans_ind, r.indices, z.metadata.chunks))
# Allocate array of the size of a chunks where uncompressed data can be held
#bufferdict = IdDict((current_task()=>getchunkarray(z),))
a = getchunkarray(z)
# Allocate the chunk-shaped scratch buffer. Reads always either fill it
# from a decode (which writes every element) or fall through to the
# fill-value path (which calls `fill!` itself), so we don't need to
# pre-zero it.
a = getchunkarray_undef(z)
# Now loop through the chunks
c = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage))

Expand Down Expand Up @@ -208,9 +223,11 @@ function writeblock!(ain::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::Cartes
input_base_offsets = map(i->first(i)-1,r.indices)
# Determines which chunks are affected
blockr = CartesianIndices(map(trans_ind, r.indices, z.metadata.chunks))
# Allocate array of the size of a chunks where uncompressed data can be held
#bufferdict = IdDict((current_task()=>getchunkarray(z),))
a = getchunkarray(z)
# If `fill_value === nothing`, the legacy behaviour is that un-written
# cells in a partial-write to a new chunk default to zero (via the
# zero-fill of `getchunkarray`). Preserve that. Otherwise `resetbuffer!`
# handles initialisation explicitly and we save the dead memset.
a = z.metadata.fill_value === nothing ? getchunkarray(z) : getchunkarray_undef(z)
# Now loop through the chunks
readchannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage))

Expand Down
Loading