Skip to content

Commit aa53dfc

Browse files
authored
Use new BufferedIO type in Parsers for IO inputs for performance. Add… (#355)
* Use new BufferedIO type in Parsers for IO inputs for performance. Addresses #350 * Require Parsers 0.2.13 * Bump Parsers dependency
1 parent 5d98bd1 commit aa53dfc

5 files changed

Lines changed: 16 additions & 16 deletions

File tree

REQUIRE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ DataFrames 0.14.0
44
WeakRefStrings 0.4.1
55
CategoricalArrays 0.3.0
66
Tables
7-
Parsers
7+
Parsers 0.2.14

src/CSV.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,16 @@ function File(source::Union{String, IO};
190190
end
191191
end
192192

193-
!transpose && seek(io, positions[1])
193+
!transpose && Parsers.fastseek!(io, positions[1])
194194
return File{transpose, columnaccess, typeof(io), typeof(parsinglayers), typeof(kwargs)}(names, finaltypes, getname(source), io, parsinglayers, positions, originalpositions, Ref(1), ref, Ref(Parsers.SUCCESS), kwargs, pools, strict, silencewarnings)
195195
end
196196

197197
include("filedetection.jl")
198198
include("typedetection.jl")
199199
include("tables.jl")
200200

201-
getio(str::String, use_mmap) = IOBuffer(use_mmap ? Mmap.mmap(str) : Base.read(str))
202-
getio(io::IO, use_mmap) = io
201+
getio(str::String, use_mmap) = use_mmap ? IOBuffer(Mmap.mmap(str)) : Parsers.BufferedIO(open(str))
202+
getio(io::IO, use_mmap) = Parsers.BufferedIO(io)
203203
getname(str::String) = str
204204
getname(io::I) where {I <: IO} = string("<", I, ">")
205205

@@ -208,8 +208,8 @@ function consumeBOM!(io)
208208
startpos = position(io)
209209
if !eof(io) && Parsers.peekbyte(io) == 0xef
210210
Parsers.readbyte(io)
211-
(!eof(io) && Parsers.readbyte(io) == 0xbb) || seek(io, startpos)
212-
(!eof(io) && Parsers.readbyte(io) == 0xbf) || seek(io, startpos)
211+
(!eof(io) && Parsers.readbyte(io) == 0xbb) || Parsers.fastseek!(io, startpos)
212+
(!eof(io) && Parsers.readbyte(io) == 0xbf) || Parsers.fastseek!(io, startpos)
213213
end
214214
return
215215
end
@@ -310,7 +310,7 @@ function read(fullpath::Union{AbstractString,IO}, sink=DataFrame, args...; appen
310310
f = CSV.File(fullpath; kwargs...)
311311
if !isempty(transforms)
312312
Base.depwarn("`CSV.read(source; transforms=Dict(...)` is deprecated in favor of `CSV.File(source) |> transform(col1=x->...) |> DataFrame`", nothing)
313-
return f |> transform(transforms) |> sink
313+
return sink(transform(f, transforms))
314314
end
315315
return f |> sink
316316
end

src/filedetection.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ function datalayout_transpose(header, parsinglayers, io, datarow, footerskip, no
7676
push!(columnpositions, position(io))
7777
readline!(parsinglayers, io)
7878
end
79-
seek(io, datapos)
79+
Parsers.fastseek!(io, datapos)
8080
elseif isa(header, AbstractRange)
8181
# column names span several columns
8282
throw(ArgumentError("not implemented for transposed csv files"))
@@ -104,7 +104,7 @@ function datalayout_transpose(header, parsinglayers, io, datarow, footerskip, no
104104
push!(columnpositions, position(io))
105105
readline!(parsinglayers, io)
106106
end
107-
seek(io, datapos)
107+
Parsers.fastseek!(io, datapos)
108108
end
109109
rows = rows - footerskip # rows now equals the actual number of rows in the dataset
110110
return rows, makeunique(map(x->normalizenames ? normalizename(x) : Symbol(x), columnnames)), columnpositions
@@ -117,7 +117,7 @@ function datalayout(header::Integer, parsinglayers, io, datarow, normalizenames,
117117
skipto!(parsinglayers, io, 1, datarow)
118118
datapos = position(io)
119119
row_vals = readsplitline(parsinglayers, io, cmt)
120-
seek(io, datapos)
120+
Parsers.fastseek!(io, datapos)
121121
columnnames = [Symbol("Column$i") for i = eachindex(row_vals)]
122122
else
123123
skipto!(parsinglayers, io, 1, header)
@@ -148,7 +148,7 @@ function datalayout(header::Vector, parsinglayers, io, datarow, normalizenames,
148148
columnnames = makeunique([normalizenames ? normalizename(nm) : Symbol(nm) for nm in header])
149149
else
150150
row_vals = readsplitline(parsinglayers, io, cmt)
151-
seek(io, datapos)
151+
Parsers.fastseek!(io, datapos)
152152
if isempty(header)
153153
columnnames = [Symbol("Column$i") for i in eachindex(row_vals)]
154154
else

src/transforms.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Base.IteratorSize(::Type{<:Transforms{T}}) where {T} = Base.IteratorSize(T)
5353
Base.length(t::Transforms) = length(getfield(t, 1))
5454
Base.eltype(t::Transforms{T, F}) where {T, F} = TransformsRow{eltype(getfield(t, 1)), F}
5555

56-
function Base.iterate(t::Transforms, st=())
56+
@inline function Base.iterate(t::Transforms, st=())
5757
state = iterate(getfield(t, 1), st...)
5858
state === nothing && return nothing
5959
return TransformsRow(state[1], getfield(t, 2)), (state[2],)

src/typedetection.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,18 @@ function detect(types, io, positions, parsinglayers, kwargs, typemap, categorica
9090
for startingrow in rng
9191
for row = trunc(Int64, startingrow):trunc(Int64, startingrow + step - 1)
9292
rows += 1
93-
!transpose && seek(io, positions[row])
93+
!transpose && Parsers.fastseek!(io, positions[row])
9494
lastcode[] = Parsers.SUCCESS
9595
for col = 1:cols
9696
if !transpose && newline(lastcode[])
9797
typecodes[col] = promote_typecode(typecodes[col], MISSING)
9898
continue
9999
end
100-
transpose && seek(io, positions[col])
100+
transpose && Parsers.fastseek!(io, positions[col])
101101
# if debug
102102
# pos = position(io)
103103
# result = Parsers.parse(parsinglayers, io, String)
104-
# seek(io, pos)
104+
# Parsers.fastseek!(io, pos)
105105
# end
106106
@inbounds T = typecodes[col]
107107
if T === USER
@@ -145,7 +145,7 @@ function incr!(dict::Dict{String, Int}, key::Tuple{Ptr{UInt8}, Int})
145145
end
146146

147147
@inline function trytype(io, pos, layers, T, kwargs, lastcode)
148-
seek(io, pos)
148+
Parsers.fastseek!(io, pos)
149149
res = Parsers.parse(layers, io, T; kwargs...)
150150
lastcode[] = res.code
151151
return Parsers.ok(res.code) ? typecode(res.result) : EMPTY

0 commit comments

Comments
 (0)