Skip to content

Commit 58d160f

Browse files
authored
Adjust the columns threshold based on various sized benchmark results (#303)
1 parent 2b74858 commit 58d160f

2 files changed

Lines changed: 30 additions & 1 deletion

File tree

benchmark/benchmarks.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,31 @@ function Base.getproperty(csvrow::Row{F}, ::Type{T}, col::Int, name::Symbol) whe
371371
end
372372
return r
373373
end
374+
375+
376+
# code for generating various-sized csv files
377+
# used to determine the row vs. column-access threshold
378+
function gencsv(rows, cols)
379+
df = DataFrame([round.(rand(rows), digits=4) for _ 1:cols], Symbol.(["col$i" for i 1:cols]))
380+
CSV.write("random_$(rows)_$(cols).csv", df)
381+
end
382+
383+
function go(compile=true)
384+
# for cols in (10, 25, 50, 75, 100, 250)
385+
# for rows in (10, 50, 100, 1000, 5000, 10000, 50000, 100000)
386+
# println("generating $rows by $cols csv file...")
387+
# @time gencsv(rows, cols)
388+
# end
389+
# end
390+
rt = NamedTuple{(:compiled, :rows, :cols, :time), Tuple{Int, Int, Float64}}[]
391+
for cols in (10, 25, 50, 75, 100, 250)
392+
for rows in (10, 50, 100, 1000, 5000, 10000, 50000, 100000)
393+
println("reading $rows by $cols csv file...")
394+
e = @elapsed begin
395+
df = CSV.File("random_$(rows)_$(cols).csv") |> DataFrame;
396+
end
397+
push!(rt, (compiled=compile, rows=rows, cols=cols, time=e))
398+
end
399+
end
400+
CSV.write("results_$compile.csv", rt)
401+
end

src/CSV.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ function File(source::Union{String, IO};
168168
originalpositions = Int64[]
169169
footerskip > 0 && resize!(positions, length(positions) - footerskip)
170170
ref = Ref{Int}(0)
171-
columnaccess = length(positions) <= 1024
171+
# if the # of cells in the file is less than 500K
172+
columnaccess = (length(names) * length(positions)) < 500_000
172173
debug && @show positions
173174
end
174175

0 commit comments

Comments
 (0)