parquer - write Parquet files with pure Erlang
Rebar3:
{parquer, {git, "https://github.com/emqx/parquer.git", {tag, "0.1.0"}}}Mix:
{:parquer, github: "emqx/parquer", tag: "0.1.0", manager: :rebar3}}%% Define a schema
Schema =
parquer_schema:root(
<<"root">>,
[ parquer_schema:string(<<"f0">>, optional)
, parquer_schema:bool(<<"f1">>, required)
]).
%% Create a writer
Writer0 = parquer_writer:new(Schema, _WriterOpts = #{}).
%% Append records
{IOData1, Writer1} =
parquer_writer:write_many(Writer0, [
#{<<"f0">> => <<"hello">>, <<"f1">> => true},
#{<<"f0">> => undefined, <<"f1">> => false}
#{<<"f0">> => <<"world!">>, <<"f1">> => false}
]).
%% Finish writing
{IOData2, _WriteMetadata} = parquer_writer:close(Writer1).
%% Save data to a file
file:write_file("/tmp/data.parquet", [IOData1, IOData2]).Testing the output:
python -m venv .venv
source .venv/bin/activate
pip install -r dev/dev_requirements.txt
python -c "from fastparquet import ParquetFile; pf = ParquetFile('/tmp/data.parquet'); print(pf.info); print(pf.head(10))"
deactivatePrerequisites:
- Erlang/OTP 27+
- Rebar3
- Elixir 1.17+ (optional, for Mix support)
- Apache Thrift
# scripts/generate_code.sh ## only needed if `priv/parquet.thrift` is changed
scripts/format.sh fix
rebar3 compile