-
-
Notifications
You must be signed in to change notification settings - Fork 553
util: extend string search with user-defined printable characters #6161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4259,6 +4259,35 @@ RZ_API RzStrEnc rz_str_guess_encoding_from_buffer(RZ_NONNULL const ut8 *buffer, | |
| return enc == RZ_STRING_ENC_GUESS ? RZ_STRING_ENC_UTF8 : enc; | ||
| } | ||
|
|
||
| static inline bool is_user_defined_unprintable(const RzStrStringifyOpt *option, RzCodePoint cp) { | ||
| if (!option || !option->user_unprintable) { | ||
| return false; | ||
| } | ||
| const RzCodePoint *user_unprintable = (const RzCodePoint *)rz_vector_head(option->user_unprintable); | ||
| for (size_t i = 0, count = rz_vector_len(option->user_unprintable); i < count; i++) { | ||
| if (user_unprintable[i] == cp) { | ||
| return true; | ||
| } | ||
| } | ||
|
Comment on lines
+4266
to
+4271
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
| return false; | ||
| } | ||
|
|
||
| static inline bool stringification_has_incomplete_tail(const ut8 *buf, ut32 buflen, ut32 i, RzStrEnc enc) { | ||
| const size_t remaining = buflen - i; | ||
| switch (enc) { | ||
| case RZ_STRING_ENC_UTF8: | ||
| return rz_utf8_size(buf + i) > remaining; | ||
| case RZ_STRING_ENC_UTF16LE: | ||
| case RZ_STRING_ENC_UTF16BE: | ||
| return remaining < 2; | ||
| case RZ_STRING_ENC_UTF32LE: | ||
| case RZ_STRING_ENC_UTF32BE: | ||
| return remaining < 4; | ||
| default: | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * \brief Converts a raw buffer to a printable string based on the selected options | ||
| * | ||
|
|
@@ -4321,6 +4350,9 @@ RZ_API RZ_OWN char *rz_str_stringify_raw_buffer(RzStrStringifyOpt *option, RZ_NU | |
| } | ||
|
|
||
| if (rsize == 0) { | ||
| if (stringification_has_incomplete_tail(buf, buflen, i, enc)) { | ||
| break; | ||
| } | ||
| if (option->stop_at_unprintable) { | ||
| break; | ||
| } | ||
|
|
@@ -4407,17 +4439,20 @@ RZ_API RZ_OWN char *rz_str_stringify_raw_buffer(RzStrStringifyOpt *option, RZ_NU | |
| } else { | ||
| if (code_point == '\\') { | ||
| rz_strbuf_appendf(&sb, "\\\\"); | ||
| } else if ((code_point == '\n' && !option->escape_nl) || (rz_unicode_code_point_is_printable(code_point))) { | ||
| char tmp[5] = { 0 }; | ||
| rz_utf8_encode((ut8 *)tmp, code_point); | ||
| rz_strbuf_appendf(&sb, "%s", tmp); | ||
| } else if (option->stop_at_unprintable) { | ||
| break; | ||
| } else { | ||
| ut8 tmp[4]; | ||
| int n_enc = rz_utf8_encode((ut8 *)tmp, code_point); | ||
| for (int j = 0; j < n_enc; ++j) { | ||
| rz_strbuf_appendf(&sb, "\\x%02x", tmp[j]); | ||
| const bool user_unprintable = is_user_defined_unprintable(option, code_point); | ||
| if (((code_point == '\n' && !option->escape_nl) || rz_unicode_code_point_is_printable(code_point)) && !user_unprintable) { | ||
| char tmp[5] = { 0 }; | ||
| rz_utf8_encode((ut8 *)tmp, code_point); | ||
| rz_strbuf_appendf(&sb, "%s", tmp); | ||
| } else if (option->stop_at_unprintable) { | ||
| break; | ||
| } else { | ||
| ut8 tmp[4]; | ||
| int n_enc = rz_utf8_encode((ut8 *)tmp, code_point); | ||
| for (int j = 0; j < n_enc; ++j) { | ||
| rz_strbuf_appendf(&sb, "\\x%02x", tmp[j]); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -89,6 +89,19 @@ static inline bool is_c_escape_sequence(char ch) { | |
| return strchr("\b\v\f\n\r\t\a\033\\", ch); | ||
| } | ||
|
|
||
| static inline bool is_user_defined_unprintable(const RzUtilStrScanOptions *opt, RzCodePoint cp) { | ||
| if (!opt || !opt->user_unprintable) { | ||
| return false; | ||
| } | ||
| const RzCodePoint *user_unprintable = (const RzCodePoint *)rz_vector_head(opt->user_unprintable); | ||
| for (size_t i = 0, count = rz_vector_len(opt->user_unprintable); i < count; i++) { | ||
| if (user_unprintable[i] == cp) { | ||
| return true; | ||
| } | ||
| } | ||
|
Comment on lines
+96
to
+101
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rz_vector_foreach |
||
| return false; | ||
| } | ||
|
|
||
| static UTF8StringInfo calculate_utf8_string_info(ut8 *str, int size) { | ||
| UTF8StringInfo res = { | ||
| .num_ascii = 0, | ||
|
|
@@ -351,10 +364,11 @@ static RzDetectedString *process_one_string(const ut8 *buf, const ut64 from, ut6 | |
| output_buf = heap_alloc; | ||
| } | ||
|
|
||
| if (rz_unicode_code_point_is_printable(ucp) && ucp != '\\') { | ||
| bool user_defined_unprintable = is_user_defined_unprintable(opt, ucp); | ||
| if (rz_unicode_code_point_is_printable(ucp) && !user_defined_unprintable && ucp != '\\') { | ||
| char_bytes = rz_utf8_encode(output_buf + i, ucp); | ||
| char_count++; | ||
| } else if (ucp && ucp < 0x100 && is_c_escape_sequence((char)ucp)) { | ||
| } else if (!user_defined_unprintable && ucp && ucp < 0x100 && is_c_escape_sequence((char)ucp)) { | ||
| if ((i + 32) < opt->max_str_length && ucp < 93) { | ||
| char_bytes = rz_utf8_encode(output_buf + i, ucp); | ||
| } else { | ||
|
|
@@ -595,7 +609,7 @@ RZ_API int rz_scan_strings_raw(RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList /*<R | |
| int i = 0; | ||
| for (; i < sz; i++) { | ||
| rz_str_ibm037_to_unicode(ptr[i], &code_points[i]); | ||
| if (!rz_unicode_code_point_is_printable(code_points[i])) { | ||
| if (!rz_unicode_code_point_is_printable(code_points[i]) || is_user_defined_unprintable(opt, code_points[i])) { | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1028,6 +1028,22 @@ RZ_API bool rz_unicode_code_point_is_printable(const RzCodePoint c) { | |
| !rz_unicode_code_point_is_private(c); | ||
| } | ||
|
|
||
| /** | ||
| * \brief Returns true if the code point is listed as user-unprintable. | ||
| * | ||
| * \param c Code point to check. | ||
| * \param user_unprintable Array of user-defined non-printable code points. | ||
| * \param user_unprintable_count Number of user-defined non-printable code points. | ||
| */ | ||
| RZ_API bool rz_unicode_code_point_is_user_unprintable(const RzCodePoint c, const RzCodePoint *user_unprintable, size_t user_unprintable_count) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function seems to be unused?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove it then , if I am not mistaken. |
||
| for (size_t i = 0; user_unprintable && i < user_unprintable_count; i++) { | ||
| if (user_unprintable[i] == c) { | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| static RzUnicodeCaseMapping bin_search_case_mapping(const RzUnicodeCaseMap map, size_t n, RzCodePoint key) { | ||
| size_t lo = 0, hi = n; | ||
| while (lo < hi) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.