Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions librz/bin/bfile_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ typedef struct shared_data_t {
size_t min_str_length;
bool check_ascii_freq;
bool prefer_big_endian;
RzVector /*<RzCodePoint>*/ *user_unprintable;
} SharedData;

typedef struct search_thread_data_t {
Expand Down Expand Up @@ -233,6 +234,7 @@ static RzList /*<RzDetectedString *>*/ *string_scan_range(SharedData *shared, co
.min_str_length = shared->min_str_length,
.prefer_big_endian = shared->prefer_big_endian,
.check_ascii_freq = shared->check_ascii_freq,
.user_unprintable = shared->user_unprintable,
};

ut8 *buf = calloc(interval_size, 1);
Expand Down Expand Up @@ -462,6 +464,7 @@ RZ_API void rz_bin_string_search_opt_init(RZ_NONNULL RzBinStringSearchOpt *opt)
opt->raw_alignment = RZ_BIN_STRING_SEARCH_RAW_FILE_ALIGNMENT;
opt->string_encoding = RZ_STRING_ENC_GUESS;
opt->check_ascii_freq = RZ_BIN_STRING_SEARCH_CHECK_ASCII_FREQ;
opt->user_unprintable = NULL;
opt->mode = RZ_BIN_STRING_SEARCH_MODE_AUTO;
}

Expand Down Expand Up @@ -656,6 +659,7 @@ RZ_API RZ_OWN RzPVector /*<RzBinString *>*/ *rz_bin_file_strings(RZ_NONNULL RzBi
.min_str_length = opt->min_length,
.check_ascii_freq = opt->check_ascii_freq,
.prefer_big_endian = prefer_big_endian,
.user_unprintable = opt->user_unprintable,
};

if (shared.min_str_length < 1) {
Expand Down
1 change: 1 addition & 0 deletions librz/bin/bin.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ RZ_API void rz_bin_free(RZ_NULLABLE RzBin *bin) {
bin->file = NULL;
free(bin->force);
free(bin->srcdir);
rz_vector_free(bin->str_search_cfg.user_unprintable);
// rz_bin_free_bin_files (bin);
rz_list_free(bin->binfiles);

Expand Down
1 change: 1 addition & 0 deletions librz/core/canalysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ static bool find_string_at(RzCore *core, RzBinObject *bobj, ut64 pointer, char *
.min_str_length = bin->str_search_cfg.min_length,
.prefer_big_endian = big_endian,
.check_ascii_freq = bin->str_search_cfg.check_ascii_freq,
.user_unprintable = bin->str_search_cfg.user_unprintable,
};

rz_io_pread_at(core->io, pointer, buffer, sizeof(buffer));
Expand Down
69 changes: 69 additions & 0 deletions librz/core/cconfig.c
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,74 @@ static bool cb_str_encoding(void *user, void *data) {
return true;
}

static bool cb_str_unprintable(void *user, void *data) {
RzCore *core = (RzCore *)user;
RzConfigNode *node = (RzConfigNode *)data;
if (node->value[0] == '?') {
rz_cons_printf("Comma-separated list of Unicode code points treated as non-printable.\n");
rz_cons_printf("Examples:\n");
rz_cons_printf(" e str.unprintable=0x09,0x0a,0x0d,0x1b\n");
rz_cons_printf(" e str.unprintable=0x200B\n");
rz_cons_printf(" e str.unprintable=\n");
rz_cons_printf(" -- reset the list to empty.\n");
return false;
}

if (RZ_STR_ISEMPTY(node->value)) {
rz_vector_free(core->bin->str_search_cfg.user_unprintable);
core->bin->str_search_cfg.user_unprintable = NULL;
check_reload_bin_str_search(core);
return true;
}

char *list = rz_str_dup(node->value);
if (!list) {
return false;
}

int argc = rz_str_split(list, ',');
if (argc < 1) {
free(list);
return false;
}

RzVector *custom = rz_vector_new(sizeof(RzCodePoint), NULL, NULL);
if (!custom) {
free(list);
return false;
}

for (int i = 0; i < argc; i++) {
const char *word = rz_str_word_get0(list, i);
if (RZ_STR_ISEMPTY(word) || !rz_is_valid_input_num_value(core->num, word)) {
RZ_LOG_ERROR("Invalid value for str.unprintable (%s).\n", word ? word : "");
rz_vector_free(custom);
free(list);
return false;
}
ut64 cp = rz_num_math(core->num, word);
if (cp > RZ_UNICODE_LAST_CODE_POINT) {
RZ_LOG_ERROR("str.unprintable code point out of range (%s).\n", word);
rz_vector_free(custom);
free(list);
return false;
}
RzCodePoint point = (RzCodePoint)cp;
if (!rz_vector_push(custom, &point)) {
RZ_LOG_ERROR("Cannot append code point to str.unprintable (%s).\n", word);
rz_vector_free(custom);
free(list);
return false;
}
}
free(list);

rz_vector_free(core->bin->str_search_cfg.user_unprintable);
core->bin->str_search_cfg.user_unprintable = custom;
check_reload_bin_str_search(core);
return true;
}

static bool cb_str_search_mode(void *user, void *data) {
RzCore *core = (RzCore *)user;
RzConfigNode *node = (RzConfigNode *)data;
Expand Down Expand Up @@ -3568,6 +3636,7 @@ RZ_API int rz_core_config_init(RzCore *core) {
n = NODECB("str.encoding", "guess", &cb_str_encoding);
SETDESC(n, "The default string encoding type (when set to guess, it is automatically guessed).");
SETOPTIONS(n, "ascii", "8bit", "utf8", "utf16le", "utf32le", "utf16be", "utf32be", "ibm037", "ibm290", "ebcdices", "ebcdicuk", "ebcdicus", "guess", NULL);
SETCB("str.unprintable", "", &cb_str_unprintable, "Comma-separated hex code points treated as non-printable.");

/* string search options */
SETB("str.search.reload", true, "When enabled, any change to any option `str.search.*` will reload the bin strings.");
Expand Down
3 changes: 2 additions & 1 deletion librz/core/cmd/cmd_print.c
Original file line number Diff line number Diff line change
Expand Up @@ -1846,10 +1846,11 @@ static RzCmdStatus core_print_string_in_block(RzCore *core, bool stop_at_nil, bo
opt.encoding = encoding;
opt.stop_at_nil = stop_at_nil;
opt.stop_at_unprintable = stop_at_unprintable;
opt.user_unprintable = core->bin->str_search_cfg.user_unprintable;
core_print_raw_buffer(&opt);
break;
case RZ_OUTPUT_MODE_JSON:
print_json_string(core, buffer, length, encoding, stop_at_nil, stop_at_nil);
print_json_string(core, buffer, length, encoding, stop_at_nil, stop_at_unprintable);
break;
default:
RZ_LOG_ERROR("core: unsupported output mode\n");
Expand Down
1 change: 1 addition & 0 deletions librz/core/cmeta.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ static bool meta_string_guess_add(RzCore *core, ut64 addr, size_t limit, char **
.min_str_length = bin->str_search_cfg.min_length,
.prefer_big_endian = big_endian,
.check_ascii_freq = bin->str_search_cfg.check_ascii_freq,
.user_unprintable = bin->str_search_cfg.user_unprintable,
};
RzList *str_list = rz_list_new();
if (!str_list) {
Expand Down
1 change: 1 addition & 0 deletions librz/core/csearch.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ RZ_API RZ_OWN RzList /*<RzSearchHit *>*/ *rz_core_search_string(RZ_NONNULL RzCor
.min_str_length = RZ_MAX(re_pattern_len, core->bin->str_search_cfg.min_length),
.prefer_big_endian = rz_asm_is_big_endian_set(core->rasm),
.check_ascii_freq = core->bin->str_search_cfg.check_ascii_freq,
.user_unprintable = core->bin->str_search_cfg.user_unprintable,
};

RzList *hits = NULL;
Expand Down
1 change: 1 addition & 0 deletions librz/include/rz_bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ typedef struct rz_bin_string_search_opt_t {
*/
size_t raw_alignment;
bool check_ascii_freq; ///< If true, perform check on ASCII frequencies when looking for false positives
RzVector /*<RzCodePoint>*/ *user_unprintable; ///< User-defined non-printable code points
RzStrEnc string_encoding; ///< The default string encoding type (when set to guess, it is automatically guessed).
RzBinStringSearchMode mode; ///< String search mode (auto, ro sections or raw binary)
} RzBinStringSearchOpt;
Expand Down
2 changes: 2 additions & 0 deletions librz/include/rz_util/rz_str.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "rz_assert.h"
#include "rz_str_util.h"
#include "rz_list.h"
#include <rz_vector.h>
#include "rz_types.h"

#ifdef __cplusplus
Expand Down Expand Up @@ -288,6 +289,7 @@ typedef struct rz_str_stringify_opt_t {
bool stop_at_nil; ///< When enabled stops printing when '\0' is found.
bool stop_at_unprintable; ///< When enabled stops printing at first non-printable character.
bool urlencode; ///< Encodes the output following RFC 3986.
const RzVector /*<RzCodePoint>*/ *user_unprintable; ///< Borrowed vector of user-defined non-printable code points.
} RzStrStringifyOpt;

RZ_API RzStrEnc rz_str_guess_encoding_from_buffer(RZ_NONNULL const ut8 *buffer, ut32 length);
Expand Down
2 changes: 2 additions & 0 deletions librz/include/rz_util/rz_str_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define RZ_STR_SEARCH_H

#include <rz_util/rz_str.h>
#include <rz_util/rz_unicode.h>
#include <rz_util/rz_assert.h>
#include <rz_util/rz_buf.h>
#include <rz_util/rz_regex.h>
Expand Down Expand Up @@ -41,6 +42,7 @@ typedef struct {
size_t min_str_length; ///< Minimum string length
bool prefer_big_endian; ///< True if the preferred endianess for UTF strings is big-endian
bool check_ascii_freq; ///< If true, perform check on ASCII frequencies when looking for false positives
RzVector /*<RzCodePoint>*/ *user_unprintable; ///< Borrowed vector of user-defined non-printable code points
} RzUtilStrScanOptions;

RZ_API void rz_detected_string_free(RzDetectedString *str);
Expand Down
1 change: 1 addition & 0 deletions librz/include/rz_util/rz_unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef struct rz_unicode_case_mapping_t {
typedef RzUnicodeCaseMapping RzUnicodeCaseMap[];

RZ_API bool rz_unicode_code_point_is_printable(const RzCodePoint c);
RZ_API bool rz_unicode_code_point_is_user_unprintable(const RzCodePoint c, const RzCodePoint *user_unprintable, size_t user_unprintable_count);
RZ_API bool rz_unicode_code_point_is_defined(const RzCodePoint c);
RZ_API bool rz_unicode_code_point_is_legal_decode(const RzCodePoint c);
RZ_API bool rz_unicode_code_point_is_control(const RzCodePoint c);
Expand Down
55 changes: 45 additions & 10 deletions librz/util/str.c
Original file line number Diff line number Diff line change
Expand Up @@ -4259,6 +4259,35 @@ RZ_API RzStrEnc rz_str_guess_encoding_from_buffer(RZ_NONNULL const ut8 *buffer,
return enc == RZ_STRING_ENC_GUESS ? RZ_STRING_ENC_UTF8 : enc;
}

static inline bool is_user_defined_unprintable(const RzStrStringifyOpt *option, RzCodePoint cp) {
if (!option || !option->user_unprintable) {
return false;
}
const RzCodePoint *user_unprintable = (const RzCodePoint *)rz_vector_head(option->user_unprintable);
for (size_t i = 0, count = rz_vector_len(option->user_unprintable); i < count; i++) {
if (user_unprintable[i] == cp) {
return true;
}
}
Comment on lines +4266 to +4271
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use rz_vector_foreach It is way cleaner.

return false;
}

static inline bool stringification_has_incomplete_tail(const ut8 *buf, ut32 buflen, ut32 i, RzStrEnc enc) {
const size_t remaining = buflen - i;
switch (enc) {
case RZ_STRING_ENC_UTF8:
return rz_utf8_size(buf + i) > remaining;
case RZ_STRING_ENC_UTF16LE:
case RZ_STRING_ENC_UTF16BE:
return remaining < 2;
case RZ_STRING_ENC_UTF32LE:
case RZ_STRING_ENC_UTF32BE:
return remaining < 4;
default:
return false;
}
}

/**
* \brief Converts a raw buffer to a printable string based on the selected options
*
Expand Down Expand Up @@ -4321,6 +4350,9 @@ RZ_API RZ_OWN char *rz_str_stringify_raw_buffer(RzStrStringifyOpt *option, RZ_NU
}

if (rsize == 0) {
if (stringification_has_incomplete_tail(buf, buflen, i, enc)) {
break;
}
if (option->stop_at_unprintable) {
break;
}
Expand Down Expand Up @@ -4407,17 +4439,20 @@ RZ_API RZ_OWN char *rz_str_stringify_raw_buffer(RzStrStringifyOpt *option, RZ_NU
} else {
if (code_point == '\\') {
rz_strbuf_appendf(&sb, "\\\\");
} else if ((code_point == '\n' && !option->escape_nl) || (rz_unicode_code_point_is_printable(code_point))) {
char tmp[5] = { 0 };
rz_utf8_encode((ut8 *)tmp, code_point);
rz_strbuf_appendf(&sb, "%s", tmp);
} else if (option->stop_at_unprintable) {
break;
} else {
ut8 tmp[4];
int n_enc = rz_utf8_encode((ut8 *)tmp, code_point);
for (int j = 0; j < n_enc; ++j) {
rz_strbuf_appendf(&sb, "\\x%02x", tmp[j]);
const bool user_unprintable = is_user_defined_unprintable(option, code_point);
if (((code_point == '\n' && !option->escape_nl) || rz_unicode_code_point_is_printable(code_point)) && !user_unprintable) {
char tmp[5] = { 0 };
rz_utf8_encode((ut8 *)tmp, code_point);
rz_strbuf_appendf(&sb, "%s", tmp);
} else if (option->stop_at_unprintable) {
break;
} else {
ut8 tmp[4];
int n_enc = rz_utf8_encode((ut8 *)tmp, code_point);
for (int j = 0; j < n_enc; ++j) {
rz_strbuf_appendf(&sb, "\\x%02x", tmp[j]);
}
}
}
}
Expand Down
20 changes: 17 additions & 3 deletions librz/util/str_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ static inline bool is_c_escape_sequence(char ch) {
return strchr("\b\v\f\n\r\t\a\033\\", ch);
}

static inline bool is_user_defined_unprintable(const RzUtilStrScanOptions *opt, RzCodePoint cp) {
if (!opt || !opt->user_unprintable) {
return false;
}
const RzCodePoint *user_unprintable = (const RzCodePoint *)rz_vector_head(opt->user_unprintable);
for (size_t i = 0, count = rz_vector_len(opt->user_unprintable); i < count; i++) {
if (user_unprintable[i] == cp) {
return true;
}
}
Comment on lines +96 to +101
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rz_vector_foreach

return false;
}

static UTF8StringInfo calculate_utf8_string_info(ut8 *str, int size) {
UTF8StringInfo res = {
.num_ascii = 0,
Expand Down Expand Up @@ -351,10 +364,11 @@ static RzDetectedString *process_one_string(const ut8 *buf, const ut64 from, ut6
output_buf = heap_alloc;
}

if (rz_unicode_code_point_is_printable(ucp) && ucp != '\\') {
bool user_defined_unprintable = is_user_defined_unprintable(opt, ucp);
if (rz_unicode_code_point_is_printable(ucp) && !user_defined_unprintable && ucp != '\\') {
char_bytes = rz_utf8_encode(output_buf + i, ucp);
char_count++;
} else if (ucp && ucp < 0x100 && is_c_escape_sequence((char)ucp)) {
} else if (!user_defined_unprintable && ucp && ucp < 0x100 && is_c_escape_sequence((char)ucp)) {
if ((i + 32) < opt->max_str_length && ucp < 93) {
char_bytes = rz_utf8_encode(output_buf + i, ucp);
} else {
Expand Down Expand Up @@ -595,7 +609,7 @@ RZ_API int rz_scan_strings_raw(RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList /*<R
int i = 0;
for (; i < sz; i++) {
rz_str_ibm037_to_unicode(ptr[i], &code_points[i]);
if (!rz_unicode_code_point_is_printable(code_points[i])) {
if (!rz_unicode_code_point_is_printable(code_points[i]) || is_user_defined_unprintable(opt, code_points[i])) {
break;
}
}
Expand Down
16 changes: 16 additions & 0 deletions librz/util/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,22 @@ RZ_API bool rz_unicode_code_point_is_printable(const RzCodePoint c) {
!rz_unicode_code_point_is_private(c);
}

/**
* \brief Returns true if the code point is listed as user-unprintable.
*
* \param c Code point to check.
* \param user_unprintable Array of user-defined non-printable code points.
* \param user_unprintable_count Number of user-defined non-printable code points.
*/
RZ_API bool rz_unicode_code_point_is_user_unprintable(const RzCodePoint c, const RzCodePoint *user_unprintable, size_t user_unprintable_count) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function seems to be unused?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove it then , if I am not mistaken.

for (size_t i = 0; user_unprintable && i < user_unprintable_count; i++) {
if (user_unprintable[i] == c) {
return true;
}
}
return false;
}

static RzUnicodeCaseMapping bin_search_case_mapping(const RzUnicodeCaseMap map, size_t n, RzCodePoint key) {
size_t lo = 0, hi = n;
while (lo < hi) {
Expand Down
43 changes: 43 additions & 0 deletions test/db/cmd/cmd_search_z
Original file line number Diff line number Diff line change
Expand Up @@ -1522,3 +1522,46 @@ EXPECT=<<EOF
EOF
EXPECT_ERR=
RUN

NAME=String Search - str.unprintable - single code point
FILE=bins/cmd/search/string_encodings/Latin-Lipsum.ascii
CMDS=<<EOF
e str.unprintable=0x20
/z "placerat ut, eu etiam vitae nam" l ascii
ps ascii unprintable @ hit.string.ascii.0
EOF
EXPECT=<<EOF
0x000000fa 31 hit.string.ascii.0 31
placerat
EOF
EXPECT_ERR=
RUN

NAME=String Search - str.unprintable - multiple code points
FILE=bins/cmd/search/string_encodings/Latin-Lipsum.ascii
CMDS=<<EOF
e str.unprintable=0x20,0x2c
/z "placerat ut, eu etiam vitae nam" l ascii
ps ascii unprintable @ hit.string.ascii.0
EOF
EXPECT=<<EOF
0x000000fa 31 hit.string.ascii.0 31
placerat
EOF
EXPECT_ERR=
RUN

NAME=String Search - str.unprintable - utf8 code point
FILE=bins/cmd/search/string_encodings/Arabic-Lipsum.utf8
CMDS=<<EOF
e str.unprintable=0x629
/z "ا الأوضاع, لم بوابة المب" l utf8
b 0x1000
ps utf8 unprintable @ hit.string.utf8.0
EOF
EXPECT=<<EOF
0x00000116 43 hit.string.utf8.0 43
ا الأوضاع, لم بواب
EOF
EXPECT_ERR=
RUN
Expand Down
Loading
Loading