Skip to content

Commit 628f2d3

Browse files
committed
[perf] start preload of file contents when opening logfile
And various other things: * Do builds with -O3 * Check for null bytes in is_utf8() * Redo scrub_to_utf8() since it was suboptimal * Fix bad line_buffer::lb_compressed_offset value * Only do preloading for line_buffer's used by logfiles. Otherwise, other uses like detect_file_format() will waste time waiting for the preload to finish and clog up the io_looper. * Always do a full_sort if the index is empty. Might be needed if there is a logfile that has a new order, but we skipped the previous iteration because we passed the deadline. * Increase the file name panel width so we can see file size.
1 parent e6ee476 commit 628f2d3

File tree

16 files changed

+104
-42
lines changed

16 files changed

+104
-42
lines changed

.github/actions/muslbuilder/entrypoint.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ mkdir lbuild
1414
cd lbuild
1515
../configure \
1616
--with-libarchive=/fake.root \
17-
CFLAGS='-static -g1 -gz=zlib -no-pie -O2' \
18-
CXXFLAGS='-static -g1 -gz=zlib -U__unused -no-pie -O2' \
17+
CFLAGS='-static -g1 -gz=zlib -no-pie -O3' \
18+
CXXFLAGS='-static -g1 -gz=zlib -U__unused -no-pie -O3' \
1919
LDFLAGS="-L/fake.root/lib" \
2020
CPPFLAGS="-I/fake.root/include -I/fake.root/include/ncursesw" \
2121
LIBS="-L/fake.root/lib -lssh2 -llzma -lssl -lcrypto -lz -llz4" \

.github/workflows/bins.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ jobs:
166166
--with-pcre2=$(brew --prefix pcre2) \
167167
--with-sqlite3=$(brew --prefix sqlite3) \
168168
"CPPFLAGS=-I$(brew --prefix libunistring)/include" \
169-
"CXXFLAGS=-g2 -O2" \
170-
'CFLAGS=-O2 -g2' \
169+
"CXXFLAGS=-g2 -O3" \
170+
'CFLAGS=-O3 -g2' \
171171
"LDFLAGS=-L$(brew --prefix xz)/lib -L$(brew --prefix zstd)/lib/ -L$(brew --prefix lz4)/lib/ -L$(brew --prefix libunistring)/lib" \
172172
--with-libarchive=$(brew --prefix libarchive) \
173173
"LIBS=-llzma -lexpat -lzstd -liconv -llz4 -lbz2 -lz -lpcre2-8"
@@ -261,7 +261,7 @@ jobs:
261261
../lnav/configure \
262262
--enable-static \
263263
LDFLAGS="-static" \
264-
CPPFLAGS="-O2" \
264+
CPPFLAGS="-O3" \
265265
CXXFLAGS="-fPIC" \
266266
CFLAGS="-fPIC" \
267267
LIBS="-larchive -lssh2 -llzma -lexpat -llz4 -lz -lzstd -lssl -lcrypto -liconv -lunistring -lbrotlicommon -lcrypt32" \

src/base/is_utf8.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ is_utf8(string_fragment str, std::optional<unsigned char> terminator)
104104

105105
valid_end = i;
106106
if (ustr[i] <= 0x7F) /* 00..7F */ {
107+
if (ustr[i] == '\x00') {
108+
retval.usr_message = "Null bytes are not allowed";
109+
retval.usr_faulty_bytes = 1;
110+
continue;
111+
}
107112
if (ustr[i] == '\t') {
108113
retval.usr_column_width_guess += 7;
109114
} else if (ustr[i] == '\x1b' || ustr[i] == '\b') {

src/base/string_util.cc

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,17 @@ using namespace std::string_view_literals;
4545
void
4646
scrub_to_utf8(char* buffer, size_t length)
4747
{
48-
while (true) {
49-
auto frag = string_fragment::from_bytes(buffer, length);
50-
auto scan_res = is_utf8(frag);
51-
52-
if (scan_res.is_valid()) {
53-
break;
54-
}
55-
for (size_t lpc = 0; lpc < scan_res.usr_faulty_bytes; lpc++) {
56-
buffer[scan_res.usr_valid_frag.sf_end + lpc] = '?';
48+
size_t index = 0;
49+
while (index < length) {
50+
auto start_index = index;
51+
auto ch_res = ww898::utf::utf8::read([buffer, &index, length]() {
52+
if (index < length) {
53+
return buffer[index++];
54+
}
55+
return '\x00';
56+
});
57+
if (ch_res.isErr()) {
58+
buffer[start_index] = '?';
5759
}
5860
}
5961
}

src/base/string_util.tests.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ TEST_CASE("endswith")
6060
CHECK(endswith(hw, "lo") == true);
6161
}
6262

63+
TEST_CASE("scrub_to_utf8")
64+
{
65+
char buffer[8]{};
66+
67+
scrub_to_utf8(buffer, sizeof(buffer));
68+
CHECK(buffer[0] == '\x00');
69+
}
70+
6371
TEST_CASE("truncate_to")
6472
{
6573
const std::string orig = "0123456789abcdefghijklmnopqrstuvwxyz";

src/bottom_status_source.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ bottom_status_source::update_loading(file_off_t off,
192192
{
193193
auto& sf = this->bss_fields[BSF_LOADING];
194194

195-
require(off >= 0);
195+
require_ge(off, 0);
196196
require_ge(total, off);
197197

198198
if (total == 0) {

src/k_merge_tree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ void kmerge_tree_c<T, owner_t, iterator_t, comparitor>::compare_nodes(kmerge_tre
436436

437437
/*
438438
pop the top node, factor it down the list to find its
439-
leaf, replace its leaf and then factor that back up
439+
leaf, replace its leaf and then factor that bacck up
440440
*/
441441
template <class T, class owner_t, class iterator_t, class comparitor>
442442
void kmerge_tree_c<T, owner_t, iterator_t, comparitor>::next(void)

src/line_buffer.cc

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,7 @@ line_buffer::set_fd(auto_fd& fd)
466466
if (this->lb_file_time < 0) {
467467
this->lb_file_time = 0;
468468
}
469-
this->lb_compressed_offset
470-
= lseek(this->lb_fd, 0, SEEK_CUR);
469+
this->lb_compressed_offset = 0;
471470
if (!hdr.empty()) {
472471
this->lb_header = std::move(hdr);
473472
}
@@ -638,6 +637,7 @@ line_buffer::load_next_buffer()
638637
start + this->lb_alt_buffer.value().size(),
639638
this->lb_alt_buffer.value().available());
640639
this->lb_compressed_offset = gi->get_source_offset();
640+
ensure(this->lb_compressed_offset >= 0);
641641
if (rc != -1
642642
&& rc < (ssize_t) this->lb_alt_buffer.value().available()
643643
&& (start + (ssize_t) this->lb_alt_buffer.value().size() + rc
@@ -703,7 +703,7 @@ line_buffer::load_next_buffer()
703703
rc = BZ2_bzread(bz_file,
704704
this->lb_alt_buffer->end(),
705705
this->lb_alt_buffer->available());
706-
this->lb_compressed_offset = lseek(bzfd, 0, SEEK_SET);
706+
this->lb_compressed_offset = 0;
707707
BZ2_bzclose(bz_file);
708708

709709
if (rc != -1
@@ -849,8 +849,8 @@ line_buffer::fill_range(file_off_t start,
849849
{
850850
/* Cache already has the data, nothing to do. */
851851
retval = true;
852-
if (!lnav::pid::in_child && this->lb_seekable && this->lb_buffer.full()
853-
&& !this->lb_loader_file_offset)
852+
if (this->lb_do_preloading && !lnav::pid::in_child && this->lb_seekable
853+
&& this->lb_buffer.full() && !this->lb_loader_file_offset)
854854
{
855855
// log_debug("loader available start=%d", start);
856856
auto last_lf_iter = std::find(
@@ -911,6 +911,7 @@ line_buffer::fill_range(file_off_t start,
911911
this->lb_file_offset + this->lb_buffer.size(),
912912
this->lb_buffer.available());
913913
this->lb_compressed_offset = gi->get_source_offset();
914+
ensure(this->lb_compressed_offset >= 0);
914915
if (rc != -1 && (rc < (ssize_t) this->lb_buffer.available())) {
915916
this->lb_file_size
916917
= (this->lb_file_offset + this->lb_buffer.size() + rc);
@@ -974,7 +975,7 @@ line_buffer::fill_range(file_off_t start,
974975
rc = BZ2_bzread(bz_file,
975976
this->lb_buffer.end(),
976977
this->lb_buffer.available());
977-
this->lb_compressed_offset = lseek(bzfd, 0, SEEK_SET);
978+
this->lb_compressed_offset = 0;
978979
BZ2_bzclose(bz_file);
979980

980981
if (rc != -1 && (rc < (ssize_t) this->lb_buffer.available())) {
@@ -1063,8 +1064,8 @@ line_buffer::fill_range(file_off_t start,
10631064
break;
10641065
}
10651066

1066-
if (!lnav::pid::in_child && this->lb_seekable && this->lb_buffer.full()
1067-
&& !this->lb_loader_file_offset)
1067+
if (this->lb_do_preloading && !lnav::pid::in_child && this->lb_seekable
1068+
&& this->lb_buffer.full() && !this->lb_loader_file_offset)
10681069
{
10691070
// log_debug("loader available2 start=%d", start);
10701071
auto last_lf_iter = std::find(
@@ -1591,3 +1592,31 @@ line_buffer::cleanup_cache()
15911592
}
15921593
});
15931594
}
1595+
1596+
void
1597+
line_buffer::send_initial_load()
1598+
{
1599+
if (!this->lb_seekable) {
1600+
log_warning("file is not seekable, not doing preload");
1601+
return;
1602+
}
1603+
1604+
if (this->lb_loader_future.valid()) {
1605+
log_warning("preload is already active");
1606+
return;
1607+
}
1608+
1609+
log_debug("sending initial load");
1610+
if (!this->lb_alt_buffer) {
1611+
// log_debug("allocating new buffer!");
1612+
this->lb_alt_buffer = auto_buffer::alloc(this->lb_buffer.capacity());
1613+
}
1614+
this->lb_loader_file_offset = 0;
1615+
auto prom = std::make_shared<std::promise<bool>>();
1616+
this->lb_loader_future = prom->get_future();
1617+
this->lb_stats.s_requested_preloads += 1;
1618+
isc::to<io_looper&, io_looper_tag>().send(
1619+
[this, prom](auto& ioloop) mutable {
1620+
prom->set_value(this->load_next_buffer());
1621+
});
1622+
}

src/line_buffer.hh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ public:
145145

146146
virtual ~line_buffer();
147147

148+
void set_do_preloading(bool value) { this->lb_do_preloading = value; }
149+
150+
bool get_do_preloading() const { return this->lb_do_preloading; }
151+
148152
/** @param fd The file descriptor that data should be pulled from. */
149153
void set_fd(auto_fd& fd);
150154

@@ -278,6 +282,8 @@ public:
278282
return this->lb_decompress_error;
279283
}
280284

285+
void send_initial_load();
286+
281287
static void cleanup_cache();
282288

283289
private:
@@ -381,6 +387,7 @@ private:
381387
bool lb_seekable{false}; /*< Flag set for seekable file descriptors. */
382388
bool lb_compressed{false};
383389
bool lb_is_utf8{true};
390+
bool lb_do_preloading{false};
384391
file_off_t lb_last_line_offset{-1}; /*< */
385392

386393
std::vector<uint32_t> lb_line_starts;

src/lnav.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,6 +3686,7 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%'
36863686
}
36873687

36883688
if (mode_flags.mf_check_configs) {
3689+
isc::supervisor root_superv(injector::get<isc::service_list>());
36893690
rescan_files(true);
36903691
for (auto& lf : lnav_data.ld_active_files.fc_files) {
36913692
logfile::rebuild_result_t rebuild_result;

0 commit comments

Comments
 (0)