Skip to content

Commit ff4b81a

Browse files
base,sstable/block: dump block data on checksum mismatch errors
When Pebble encounters a bad checksum error, it is fatal and typically impossible to get more information about what went wrong. This change attaches the raw block data to the checksum mismatch error so it can be extracted into DataCorruptionInfo and logged as a hex dump. A new transparent wrapper error type CorruptBlockData carries the raw block bytes without affecting Error() output, Sentry reporting, or wire encoding. ValidateChecksum attaches the block data on checksum failure. reportCorruption extracts the data into DataCorruptionInfo, and MakeLoggingEventListener logs it as hex. Co-Authored-By: roachdev-claude <roachdev-claude-bot@cockroachlabs.com>
1 parent 80da87c commit ff4b81a

File tree

6 files changed

+156
-5
lines changed

6 files changed

+156
-5
lines changed

event.go

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package pebble
66

77
import (
8+
"encoding/hex"
89
"fmt"
910
"strings"
1011
"sync"
@@ -63,6 +64,9 @@ type DataCorruptionInfo struct {
6364
// Details of the error. See cockroachdb/error for how to format with or
6465
// without redaction.
6566
Details error
67+
// CorruptedBlockData contains raw data of the corrupted block. Nil if not
68+
// applicable.
69+
CorruptedBlockData []byte
6670
}
6771

6872
func (i DataCorruptionInfo) String() string {
@@ -78,6 +82,39 @@ func (i DataCorruptionInfo) SafeFormat(w redact.SafePrinter, _ rune) {
7882
w.Printf("; bounds: %s; details: %+v", i.Bounds.String(), i.Details)
7983
}
8084

85+
// FormatBlockDataAsHex formats the corrupted block data as a hex dump with 64
86+
// bytes per line grouped into 8-byte groups, up to 256 KiB. Returns empty
87+
// string if no block data is available.
88+
func (i DataCorruptionInfo) FormatBlockDataAsHex() string {
89+
data := i.CorruptedBlockData
90+
if len(data) == 0 {
91+
return ""
92+
}
93+
const maxSize = 256 << 10 // 256 KiB
94+
truncated := len(data) > maxSize
95+
if truncated {
96+
data = data[:maxSize]
97+
}
98+
const bytesPerLine = 64
99+
var buf strings.Builder
100+
for j := 0; j < len(data); j += bytesPerLine {
101+
end := min(j+bytesPerLine, len(data))
102+
line := data[j:end]
103+
fmt.Fprintf(&buf, "%6d ", j)
104+
for k := 0; k < len(line); k += 8 {
105+
if k > 0 {
106+
buf.WriteByte(' ')
107+
}
108+
buf.WriteString(hex.EncodeToString(line[k:min(k+8, len(line))]))
109+
}
110+
buf.WriteByte('\n')
111+
}
112+
if truncated {
113+
fmt.Fprintf(&buf, "... (truncated from %d bytes)\n", len(i.CorruptedBlockData))
114+
}
115+
return buf.String()
116+
}
117+
81118
// LevelInfo contains info pertaining to a particular level.
82119
type LevelInfo struct {
83120
Level int
@@ -1147,6 +1184,9 @@ func MakeLoggingEventListener(logger Logger) EventListener {
11471184
},
11481185
DataCorruption: func(info DataCorruptionInfo) {
11491186
logger.Errorf("%s", info)
1187+
if s := info.FormatBlockDataAsHex(); s != "" {
1188+
logger.Errorf("corrupted block data (hex):\n%s", s)
1189+
}
11501190
},
11511191
CompactionBegin: func(info CompactionInfo) {
11521192
logger.Infof("%s", info)
@@ -1421,11 +1461,12 @@ func (d *DB) reportCorruption(meta base.ObjectInfo, err error) error {
14211461
err = errors.WithHintf(err, "path: %s", redact.Safe(path))
14221462
}
14231463
info := DataCorruptionInfo{
1424-
Path: path,
1425-
IsRemote: objMeta.IsRemote(),
1426-
Locator: objMeta.Remote.Locator,
1427-
Bounds: meta.UserKeyBounds(),
1428-
Details: err,
1464+
Path: path,
1465+
IsRemote: objMeta.IsRemote(),
1466+
Locator: objMeta.Remote.Locator,
1467+
Bounds: meta.UserKeyBounds(),
1468+
Details: err,
1469+
CorruptedBlockData: base.ExtractCorruptBlockData(err),
14291470
}
14301471
d.opts.EventListener.DataCorruption(info)
14311472
// We don't use errors.Join() because that also annotates with this stack

event_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright 2026 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
package pebble
6+
7+
import (
8+
"encoding/hex"
9+
"strings"
10+
"testing"
11+
12+
"github.com/cockroachdb/datadriven"
13+
)
14+
15+
func TestFormatBlockDataAsHex(t *testing.T) {
16+
datadriven.RunTest(t, "testdata/format_block_data_as_hex", func(t *testing.T, td *datadriven.TestData) string {
17+
switch td.Cmd {
18+
case "format":
19+
input := strings.ReplaceAll(td.Input, "\n", "")
20+
input = strings.ReplaceAll(input, " ", "")
21+
data, err := hex.DecodeString(input)
22+
if err != nil {
23+
t.Fatalf("invalid hex input: %v", err)
24+
}
25+
info := DataCorruptionInfo{CorruptedBlockData: data}
26+
return info.FormatBlockDataAsHex()
27+
28+
default:
29+
t.Fatalf("unknown command: %s", td.Cmd)
30+
return ""
31+
}
32+
})
33+
}

internal/base/error.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
package base
66

77
import (
8+
"slices"
9+
810
"github.com/cockroachdb/errors"
911
"github.com/cockroachdb/pebble/internal/invariants"
1012
)
@@ -35,6 +37,34 @@ func CorruptionErrorf(format string, args ...interface{}) error {
3537
return errors.Mark(errors.Newf(format, args...), ErrCorruption)
3638
}
3739

40+
// CorruptBlockData is a wrapper error type that carries the raw block data
41+
// associated with a corruption error. It wraps the cause error transparently
42+
// (Error() delegates to cause), does not implement SafeDetailer, and is not
43+
// registered with the error encoding system, ensuring the raw data does not
44+
// leak to Sentry or over the wire.
45+
type CorruptBlockData struct {
46+
cause error
47+
Data []byte
48+
}
49+
50+
func (e *CorruptBlockData) Error() string { return e.cause.Error() }
51+
func (e *CorruptBlockData) Unwrap() error { return e.cause }
52+
53+
// AttachCorruptBlockData wraps an error with a copy of the raw block data.
54+
func AttachCorruptBlockData(err error, data []byte) error {
55+
return &CorruptBlockData{cause: err, Data: slices.Clone(data)}
56+
}
57+
58+
// ExtractCorruptBlockData extracts the raw block data from the error chain, if
59+
// present.
60+
func ExtractCorruptBlockData(err error) []byte {
61+
var e *CorruptBlockData
62+
if errors.As(err, &e) {
63+
return e.Data
64+
}
65+
return nil
66+
}
67+
3868
// AssertionFailedf creates an assertion error and panics in invariants.Enabled
3969
// builds. It should only be used when it indicates a bug.
4070
func AssertionFailedf(format string, args ...interface{}) error {

internal/base/error_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright 2026 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
package base
6+
7+
import (
8+
"testing"
9+
10+
"github.com/cockroachdb/errors"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestCorruptBlockData(t *testing.T) {
15+
data := []byte("test block data")
16+
inner := errors.New("inner error")
17+
err := AttachCorruptBlockData(inner, data)
18+
// Wrap further.
19+
err = errors.Wrap(err, "outer")
20+
err = errors.WithStack(err)
21+
22+
got := ExtractCorruptBlockData(err)
23+
require.Equal(t, data, got)
24+
25+
// Original error message is preserved.
26+
require.Contains(t, err.Error(), "inner error")
27+
28+
// No data when not attached.
29+
require.Nil(t, ExtractCorruptBlockData(errors.New("plain")))
30+
}

sstable/block/block.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ func ValidateChecksum(checksumType ChecksumType, b []byte, bh Handle) error {
197197
err = errors.WithSafeDetails(err, ". bit flip found: byte index %d. got: 0x%x. want: 0x%x.",
198198
errors.Safe(indexFound), errors.Safe(data[indexFound]), errors.Safe(data[indexFound]^(1<<bitFound)))
199199
}
200+
err = base.AttachCorruptBlockData(err, b)
200201
return err
201202
}
202203
return nil

testdata/format_block_data_as_hex

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# 100 bytes (0x00..0x63): one full 64-byte line + a partial second line.
2+
format
3+
000102030405060708090a0b0c0d0e0f
4+
101112131415161718191a1b1c1d1e1f
5+
202122232425262728292a2b2c2d2e2f
6+
303132333435363738393a3b3c3d3e3f
7+
404142434445464748494a4b4c4d4e4f
8+
505152535455565758595a5b5c5d5e5f
9+
60616263
10+
----
11+
0 0001020304050607 08090a0b0c0d0e0f 1011121314151617 18191a1b1c1d1e1f 2021222324252627 28292a2b2c2d2e2f 3031323334353637 38393a3b3c3d3e3f
12+
64 4041424344454647 48494a4b4c4d4e4f 5051525354555657 58595a5b5c5d5e5f 60616263
13+
14+
# Empty block: no output.
15+
format
16+
----

0 commit comments

Comments
 (0)