rizinorg · cheese-cakee · Apr 5, 2026 · Apr 8, 2026 · Apr 8, 2026 · Rot127
@@ -23,6 +23,7 @@ typedef struct shared_data_t {
 	size_t min_str_length;
 	bool check_ascii_freq;
 	bool prefer_big_endian;
+	RzVector /*<RzCodePoint>*/ *user_unprintable;
 } SharedData;
 
 typedef struct search_thread_data_t {
@@ -233,6 +234,7 @@ static RzList /*<RzDetectedString *>*/ *string_scan_range(SharedData *shared, co
 		.min_str_length = shared->min_str_length,
 		.prefer_big_endian = shared->prefer_big_endian,
 		.check_ascii_freq = shared->check_ascii_freq,
+		.user_unprintable = shared->user_unprintable,
 	};
 
 	ut8 *buf = calloc(interval_size, 1);
@@ -462,6 +464,7 @@ RZ_API void rz_bin_string_search_opt_init(RZ_NONNULL RzBinStringSearchOpt *opt)
 	opt->raw_alignment = RZ_BIN_STRING_SEARCH_RAW_FILE_ALIGNMENT;
 	opt->string_encoding = RZ_STRING_ENC_GUESS;
 	opt->check_ascii_freq = RZ_BIN_STRING_SEARCH_CHECK_ASCII_FREQ;
+	opt->user_unprintable = NULL;
 	opt->mode = RZ_BIN_STRING_SEARCH_MODE_AUTO;
 }
 
@@ -656,6 +659,7 @@ RZ_API RZ_OWN RzPVector /*<RzBinString *>*/ *rz_bin_file_strings(RZ_NONNULL RzBi
 		.min_str_length = opt->min_length,
 		.check_ascii_freq = opt->check_ascii_freq,
 		.prefer_big_endian = prefer_big_endian,
+		.user_unprintable = opt->user_unprintable,
 	};
 
 	if (shared.min_str_length < 1) {

@@ -525,6 +525,7 @@ RZ_API void rz_bin_free(RZ_NULLABLE RzBin *bin) {
 	bin->file = NULL;
 	free(bin->force);
 	free(bin->srcdir);
+	rz_vector_free(bin->str_search_cfg.user_unprintable);
 	// rz_bin_free_bin_files (bin);
 	rz_list_free(bin->binfiles);
 

@@ -81,6 +81,7 @@ static bool find_string_at(RzCore *core, RzBinObject *bobj, ut64 pointer, char *
 		.min_str_length = bin->str_search_cfg.min_length,
 		.prefer_big_endian = big_endian,
 		.check_ascii_freq = bin->str_search_cfg.check_ascii_freq,
+		.user_unprintable = bin->str_search_cfg.user_unprintable,
 	};
 
 	rz_io_pread_at(core->io, pointer, buffer, sizeof(buffer));

@@ -1000,6 +1000,74 @@ static bool cb_str_encoding(void *user, void *data) {
 	return true;
 }
 
+static bool cb_str_unprintable(void *user, void *data) {
+	RzCore *core = (RzCore *)user;
+	RzConfigNode *node = (RzConfigNode *)data;
+	if (node->value[0] == '?') {
+		rz_cons_printf("Comma-separated list of Unicode code points treated as non-printable.\n");
+		rz_cons_printf("Examples:\n");
+		rz_cons_printf("  e str.unprintable=0x09,0x0a,0x0d,0x1b\n");
+		rz_cons_printf("  e str.unprintable=0x200B\n");
+		rz_cons_printf("  e str.unprintable=\n");
+		rz_cons_printf("    -- reset the list to empty.\n");
+		return false;
+	}
+
+	if (RZ_STR_ISEMPTY(node->value)) {
+		rz_vector_free(core->bin->str_search_cfg.user_unprintable);
+		core->bin->str_search_cfg.user_unprintable = NULL;
+		check_reload_bin_str_search(core);
+		return true;
+	}
+
+	char *list = rz_str_dup(node->value);
+	if (!list) {
+		return false;
+	}
+
+	int argc = rz_str_split(list, ',');
+	if (argc < 1) {
+		free(list);
+		return false;
+	}
+
+	RzVector *custom = rz_vector_new(sizeof(RzCodePoint), NULL, NULL);
+	if (!custom) {
+		free(list);
+		return false;
+	}
+
+	for (int i = 0; i < argc; i++) {
+		const char *word = rz_str_word_get0(list, i);
+		if (RZ_STR_ISEMPTY(word) || !rz_is_valid_input_num_value(core->num, word)) {
+			RZ_LOG_ERROR("Invalid value for str.unprintable (%s).\n", word ? word : "");
+			rz_vector_free(custom);
+			free(list);
+			return false;
+		}
+		ut64 cp = rz_num_math(core->num, word);
+		if (cp > RZ_UNICODE_LAST_CODE_POINT) {
+			RZ_LOG_ERROR("str.unprintable code point out of range (%s).\n", word);
+			rz_vector_free(custom);
+			free(list);
+			return false;
+		}
+		RzCodePoint point = (RzCodePoint)cp;
+		if (!rz_vector_push(custom, &point)) {
+			RZ_LOG_ERROR("Cannot append code point to str.unprintable (%s).\n", word);
+			rz_vector_free(custom);
+			free(list);
+			return false;
+		}
+	}
+	free(list);
+
+	rz_vector_free(core->bin->str_search_cfg.user_unprintable);
+	core->bin->str_search_cfg.user_unprintable = custom;
+	check_reload_bin_str_search(core);
+	return true;
+}
+
 static bool cb_str_search_mode(void *user, void *data) {
 	RzCore *core = (RzCore *)user;
 	RzConfigNode *node = (RzConfigNode *)data;
@@ -3568,6 +3636,7 @@ RZ_API int rz_core_config_init(RzCore *core) {
 	n = NODECB("str.encoding", "guess", &cb_str_encoding);
 	SETDESC(n, "The default string encoding type (when set to guess, it is automatically guessed).");
 	SETOPTIONS(n, "ascii", "8bit", "utf8", "utf16le", "utf32le", "utf16be", "utf32be", "ibm037", "ibm290", "ebcdices", "ebcdicuk", "ebcdicus", "guess", NULL);
+	SETCB("str.unprintable", "", &cb_str_unprintable, "Comma-separated hex code points treated as non-printable.");
 
 	/* string search options */
 	SETB("str.search.reload", true, "When enabled, any change to any option `str.search.*` will reload the bin strings.");

@@ -1846,10 +1846,11 @@ static RzCmdStatus core_print_string_in_block(RzCore *core, bool stop_at_nil, bo
 		opt.encoding = encoding;
 		opt.stop_at_nil = stop_at_nil;
 		opt.stop_at_unprintable = stop_at_unprintable;
+		opt.user_unprintable = core->bin->str_search_cfg.user_unprintable;
 		core_print_raw_buffer(&opt);
 		break;
 	case RZ_OUTPUT_MODE_JSON:
-		print_json_string(core, buffer, length, encoding, stop_at_nil, stop_at_nil);
+		print_json_string(core, buffer, length, encoding, stop_at_nil, stop_at_unprintable);
 		break;
 	default:
 		RZ_LOG_ERROR("core: unsupported output mode\n");

@@ -381,6 +381,7 @@ static bool meta_string_guess_add(RzCore *core, ut64 addr, size_t limit, char **
 		.min_str_length = bin->str_search_cfg.min_length,
 		.prefer_big_endian = big_endian,
 		.check_ascii_freq = bin->str_search_cfg.check_ascii_freq,
+		.user_unprintable = bin->str_search_cfg.user_unprintable,
 	};
 	RzList *str_list = rz_list_new();
 	if (!str_list) {

@@ -268,6 +268,7 @@ RZ_API RZ_OWN RzList /*<RzSearchHit *>*/ *rz_core_search_string(RZ_NONNULL RzCor
 		.min_str_length = RZ_MAX(re_pattern_len, core->bin->str_search_cfg.min_length),
 		.prefer_big_endian = rz_asm_is_big_endian_set(core->rasm),
 		.check_ascii_freq = core->bin->str_search_cfg.check_ascii_freq,
+		.user_unprintable = core->bin->str_search_cfg.user_unprintable,
 	};
 
 	RzList *hits = NULL;

@@ -211,6 +211,7 @@ typedef struct rz_bin_string_search_opt_t {
 	 */
 	size_t raw_alignment;
 	bool check_ascii_freq; ///< If true, perform check on ASCII frequencies when looking for false positives
+	RzVector /*<RzCodePoint>*/ *user_unprintable; ///< User-defined non-printable code points
 	RzStrEnc string_encoding; ///< The default string encoding type (when set to guess, it is automatically guessed).
 	RzBinStringSearchMode mode; ///< String search mode (auto, ro sections or raw binary)
 } RzBinStringSearchOpt;

@@ -5,6 +5,7 @@
 #include "rz_assert.h"
 #include "rz_str_util.h"
 #include "rz_list.h"
+#include <rz_vector.h>
 #include "rz_types.h"
 
 #ifdef __cplusplus
@@ -288,6 +289,7 @@ typedef struct rz_str_stringify_opt_t {
 	bool stop_at_nil; ///< When enabled stops printing when '\0' is found.
 	bool stop_at_unprintable; ///< When enabled stops printing at first non-printable character.
 	bool urlencode; ///< Encodes the output following RFC 3986.
+	const RzVector /*<RzCodePoint>*/ *user_unprintable; ///< Borrowed vector of user-defined non-printable code points.
 } RzStrStringifyOpt;
 
 RZ_API RzStrEnc rz_str_guess_encoding_from_buffer(RZ_NONNULL const ut8 *buffer, ut32 length);

@@ -2,6 +2,7 @@
 #define RZ_STR_SEARCH_H
 
 #include <rz_util/rz_str.h>
+#include <rz_util/rz_unicode.h>
 #include <rz_util/rz_assert.h>
 #include <rz_util/rz_buf.h>
 #include <rz_util/rz_regex.h>
@@ -41,6 +42,7 @@ typedef struct {
 	size_t min_str_length; ///< Minimum string length
 	bool prefer_big_endian; ///< True if the preferred endianess for UTF strings is big-endian
 	bool check_ascii_freq; ///< If true, perform check on ASCII frequencies when looking for false positives
+	RzVector /*<RzCodePoint>*/ *user_unprintable; ///< Borrowed vector of user-defined non-printable code points
 } RzUtilStrScanOptions;
 
 RZ_API void rz_detected_string_free(RzDetectedString *str);

@@ -51,6 +51,7 @@ typedef struct rz_unicode_case_mapping_t {
 typedef RzUnicodeCaseMapping RzUnicodeCaseMap[];
 
 RZ_API bool rz_unicode_code_point_is_printable(const RzCodePoint c);
+RZ_API bool rz_unicode_code_point_is_user_unprintable(const RzCodePoint c, const RzCodePoint *user_unprintable, size_t user_unprintable_count);
 RZ_API bool rz_unicode_code_point_is_defined(const RzCodePoint c);
 RZ_API bool rz_unicode_code_point_is_legal_decode(const RzCodePoint c);
 RZ_API bool rz_unicode_code_point_is_control(const RzCodePoint c);

@@ -4259,6 +4259,35 @@ RZ_API RzStrEnc rz_str_guess_encoding_from_buffer(RZ_NONNULL const ut8 *buffer,
 	return enc == RZ_STRING_ENC_GUESS ? RZ_STRING_ENC_UTF8 : enc;
 }
 
+static inline bool is_user_defined_unprintable(const RzStrStringifyOpt *option, RzCodePoint cp) {
+	if (!option || !option->user_unprintable) {
+		return false;
+	}
+	const RzCodePoint *user_unprintable = (const RzCodePoint *)rz_vector_head(option->user_unprintable);
+	for (size_t i = 0, count = rz_vector_len(option->user_unprintable); i < count; i++) {
+		if (user_unprintable[i] == cp) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static inline bool stringification_has_incomplete_tail(const ut8 *buf, ut32 buflen, ut32 i, RzStrEnc enc) {
+	const size_t remaining = buflen - i;
+	switch (enc) {
+	case RZ_STRING_ENC_UTF8:
+		return rz_utf8_size(buf + i) > remaining;
+	case RZ_STRING_ENC_UTF16LE:
+	case RZ_STRING_ENC_UTF16BE:
+		return remaining < 2;
+	case RZ_STRING_ENC_UTF32LE:
+	case RZ_STRING_ENC_UTF32BE:
+		return remaining < 4;
+	default:
+		return false;
+	}
+}
+
 /**
  * \brief Converts a raw buffer to a printable string based on the selected options
  *
@@ -4321,6 +4350,9 @@ RZ_API RZ_OWN char *rz_str_stringify_raw_buffer(RzStrStringifyOpt *option, RZ_NU
 		}
 
 		if (rsize == 0) {
+			if (stringification_has_incomplete_tail(buf, buflen, i, enc)) {
+				break;
+			}
 			if (option->stop_at_unprintable) {
 				break;
 			}
@@ -4407,17 +4439,20 @@ RZ_API RZ_OWN char *rz_str_stringify_raw_buffer(RzStrStringifyOpt *option, RZ_NU
 		} else {
 			if (code_point == '\\') {
 				rz_strbuf_appendf(&sb, "\\\\");
-			} else if ((code_point == '\n' && !option->escape_nl) || (rz_unicode_code_point_is_printable(code_point))) {
-				char tmp[5] = { 0 };
-				rz_utf8_encode((ut8 *)tmp, code_point);
-				rz_strbuf_appendf(&sb, "%s", tmp);
-			} else if (option->stop_at_unprintable) {
-				break;
 			} else {
-				ut8 tmp[4];
-				int n_enc = rz_utf8_encode((ut8 *)tmp, code_point);
-				for (int j = 0; j < n_enc; ++j) {
-					rz_strbuf_appendf(&sb, "\\x%02x", tmp[j]);
+				const bool user_unprintable = is_user_defined_unprintable(option, code_point);
+				if (((code_point == '\n' && !option->escape_nl) || rz_unicode_code_point_is_printable(code_point)) && !user_unprintable) {
+					char tmp[5] = { 0 };
+					rz_utf8_encode((ut8 *)tmp, code_point);
+					rz_strbuf_appendf(&sb, "%s", tmp);
+				} else if (option->stop_at_unprintable) {
+					break;
+				} else {
+					ut8 tmp[4];
+					int n_enc = rz_utf8_encode((ut8 *)tmp, code_point);
+					for (int j = 0; j < n_enc; ++j) {
+						rz_strbuf_appendf(&sb, "\\x%02x", tmp[j]);
+					}
 				}
 			}
 		}

@@ -89,6 +89,19 @@ static inline bool is_c_escape_sequence(char ch) {
 	return strchr("\b\v\f\n\r\t\a\033\\", ch);
 }
 
+static inline bool is_user_defined_unprintable(const RzUtilStrScanOptions *opt, RzCodePoint cp) {
+	if (!opt || !opt->user_unprintable) {
+		return false;
+	}
+	const RzCodePoint *user_unprintable = (const RzCodePoint *)rz_vector_head(opt->user_unprintable);
+	for (size_t i = 0, count = rz_vector_len(opt->user_unprintable); i < count; i++) {
+		if (user_unprintable[i] == cp) {
+			return true;
+		}
+	}
+	return false;
+}
+
 static UTF8StringInfo calculate_utf8_string_info(ut8 *str, int size) {
 	UTF8StringInfo res = {
 		.num_ascii = 0,
@@ -351,10 +364,11 @@ static RzDetectedString *process_one_string(const ut8 *buf, const ut64 from, ut6
 			output_buf = heap_alloc;
 		}
 
-		if (rz_unicode_code_point_is_printable(ucp) && ucp != '\\') {
+		bool user_defined_unprintable = is_user_defined_unprintable(opt, ucp);
+		if (rz_unicode_code_point_is_printable(ucp) && !user_defined_unprintable && ucp != '\\') {
 			char_bytes = rz_utf8_encode(output_buf + i, ucp);
 			char_count++;
-		} else if (ucp && ucp < 0x100 && is_c_escape_sequence((char)ucp)) {
+		} else if (!user_defined_unprintable && ucp && ucp < 0x100 && is_c_escape_sequence((char)ucp)) {
 			if ((i + 32) < opt->max_str_length && ucp < 93) {
 				char_bytes = rz_utf8_encode(output_buf + i, ucp);
 			} else {
@@ -595,7 +609,7 @@ RZ_API int rz_scan_strings_raw(RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList /*<R
 				int i = 0;
 				for (; i < sz; i++) {
 					rz_str_ibm037_to_unicode(ptr[i], &code_points[i]);
-					if (!rz_unicode_code_point_is_printable(code_points[i])) {
+					if (!rz_unicode_code_point_is_printable(code_points[i]) || is_user_defined_unprintable(opt, code_points[i])) {
 						break;
 					}
 				}

@@ -1028,6 +1028,22 @@ RZ_API bool rz_unicode_code_point_is_printable(const RzCodePoint c) {
 		!rz_unicode_code_point_is_private(c);
 }
 
+/**
+ * \brief Returns true if the code point is listed as user-unprintable.
+ *
+ * \param c Code point to check.
+ * \param user_unprintable Array of user-defined non-printable code points.
+ * \param user_unprintable_count Number of user-defined non-printable code points.
+ */
+RZ_API bool rz_unicode_code_point_is_user_unprintable(const RzCodePoint c, const RzCodePoint *user_unprintable, size_t user_unprintable_count) {
+	for (size_t i = 0; user_unprintable && i < user_unprintable_count; i++) {
+		if (user_unprintable[i] == c) {
+			return true;
+		}
+	}
+	return false;
+}
+
 static RzUnicodeCaseMapping bin_search_case_mapping(const RzUnicodeCaseMap map, size_t n, RzCodePoint key) {
 	size_t lo = 0, hi = n;
 	while (lo < hi) {

@@ -1522,3 +1522,46 @@ EXPECT=<<EOF
 EOF
 EXPECT_ERR=
 RUN
+
+NAME=String Search - str.unprintable - single code point
+FILE=bins/cmd/search/string_encodings/Latin-Lipsum.ascii
+CMDS=<<EOF
+e str.unprintable=0x20
+/z "placerat ut, eu etiam vitae nam" l ascii
+ps ascii unprintable @ hit.string.ascii.0
+EOF
+EXPECT=<<EOF
+0x000000fa 31 hit.string.ascii.0 31
+placerat
+EOF
+EXPECT_ERR=
+RUN
+
+NAME=String Search - str.unprintable - multiple code points
+FILE=bins/cmd/search/string_encodings/Latin-Lipsum.ascii
+CMDS=<<EOF
+e str.unprintable=0x20,0x2c
+/z "placerat ut, eu etiam vitae nam" l ascii
+ps ascii unprintable @ hit.string.ascii.0
+EOF
+EXPECT=<<EOF
+0x000000fa 31 hit.string.ascii.0 31
+placerat
+EOF
+EXPECT_ERR=
+RUN
+
+NAME=String Search - str.unprintable - utf8 code point
+FILE=bins/cmd/search/string_encodings/Arabic-Lipsum.utf8
+CMDS=<<EOF
+e str.unprintable=0x629
+/z "ا الأوضاع, لم بوابة المب" l utf8
+b 0x1000
+ps utf8 unprintable @ hit.string.utf8.0
+EOF
+EXPECT=<<EOF
+0x00000116 43 hit.string.utf8.0 43
+ا الأوضاع, لم بواب
+EOF
+EXPECT_ERR=
+RUN