Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion rebar.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{port_specs, [{"priv/bitcask.so", ["c_src/*.c"]}]}.

{deps, [
{meck, "0.8.1", {git, "git://github.com/basho/meck.git", {tag, "0.8.1"}}}
{meck, "0.8.1", {git, "git://github.com/basho/meck.git", {tag, "0.8.1"}}},
{snappy, ".*", {git, "git://github.com/basho/snappy-erlang-nif.git", "master"}}
]}.

{port_env, [
Expand Down
54 changes: 52 additions & 2 deletions src/bitcask.erl
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ get(Ref, Key, TryNum) ->
{ok, _Key, ?TOMBSTONE} ->
not_found;
{ok, _Key, Value} ->
{ok, Value};
Val = decompress(Value),
{ok, Val};
{error, eof} ->
not_found;
{error, _} = Err ->
Expand Down Expand Up @@ -1222,9 +1223,11 @@ do_put(Key, Value, #bc_state{write_file = WriteFile} = State, Retries, _LastErr)
end,

Tstamp = bitcask_time:tstamp(),

CValue = compress(Value, State#bc_state.opts),
{ok, WriteFile2, Offset, Size} = bitcask_fileops:write(
State2#bc_state.write_file,
Key, Value, Tstamp),
Key, CValue, Tstamp),
case bitcask_nifs:keydir_put(State2#bc_state.keydir, Key,
bitcask_fileops:file_tstamp(WriteFile2),
Size, Offset, Tstamp, true) of
Expand Down Expand Up @@ -1333,6 +1336,53 @@ expiry_merge([File | Files], LiveKeyDir, Acc0) ->
end,
expiry_merge(Files, LiveKeyDir, Acc).

enable_compression(Opts) ->
case get_opt(enable_compression, Opts) of
true ->
true;
_ ->
false
end.

compression_threshold(Opts) ->
case get_opt(compression_threshold, Opts) of
T when is_float(T) ->
T;
_ ->
0.8
end.

check_compression_threshold(Value, Compressed, Opts) ->
Threshold = compression_threshold(Opts),
case (byte_size(Compressed) / byte_size(Value)) of
Ratio when Ratio < Threshold ->
Compressed;
_ ->
Value
end.

compress(Value, Opts) ->
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer these to be named maybe_compress and maybe_decompress to more clearly state intent.

case enable_compression(Opts) of
true ->
case snappy:compress(Value) of
{ok, Compressed} ->
check_compression_threshold(Value, Compressed, Opts);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to me to be a lot of extra work for a non-adaptive setting. I suspect that the better thing to do is to have a simple size threshold below which you don't bother to compress the data.

_ ->
Value
end;
false ->
Value
end.

decompress(Value) ->
case snappy:is_valid(Value) of
true ->
{ok, Val} = snappy:decompress(Value),
Val;
false ->
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may want to change false here to _, as the function can also return unknown if there is an error, which is possible if some feature of the data confuses snappy enough to throw an error.

Value
end.

%% ===================================================================
%% EUnit tests
%% ===================================================================
Expand Down