Skip to content

Commit 1a1841e

Browse files
committed
chunk validation tasks
1 parent 5b03365 commit 1a1841e

16 files changed

Lines changed: 791 additions & 93 deletions

lang/en_us.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ prime_backup:
310310
missing: 'Missing blob amount: {}'
311311
corrupted: 'Corrupted blob amount: {}'
312312
mismatched: 'Mismatched blob amount: {}'
313+
bad_layout: 'Bad-layout blob amount: {}'
313314
orphan: 'Orphan blob amount: {}'
314315
affected: 'Affected range: {} / {} file objects, {} / {} filesets, {} / {} backups'
315316
see_log: 'See log file {} for details and affected stuffs of these bad blobs'
@@ -537,6 +538,8 @@ prime_backup:
537538
blob_hash_not_unique.candidates: 'Found at least {} candidates: {}'
538539
chunk_id_not_found: Chunk with id {} does not exist
539540
chunk_hash_not_found: Chunk with hash {} does not exist
541+
chunk_group_id_not_found: Chunk group with id {} does not exist
542+
chunk_group_hash_not_found: Chunk group with hash {} does not exist
540543
db_locked: Task {} execution error cuz the database is locked. Please retry later
541544
initializing: Plugin initializing
542545
disabled: Plugin disabled

lang/zh_cn.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ prime_backup:
310310
missing: '文件缺失的数据对象: {}个'
311311
corrupted: '文件损坏的数据对象: {}个'
312312
mismatched: '信息不匹配的数据对象: {}个'
313+
bad_layout: '分块布局异常的数据对象: {}个'
313314
orphan: '孤儿数据对象: {}个'
314315
affected: '影响范围: {}/{}个文件对象, {}/{}个文件集, {}/{}个备份'
315316
see_log: '见日志文件 {} 以了解这些数据对象的详细信息及影响范围'
@@ -537,6 +538,8 @@ prime_backup:
537538
blob_hash_not_unique.candidates: '找到了至少{}个可能的数据对象: {}'
538539
chunk_id_not_found: ID为{}的数据块不存在
539540
chunk_hash_not_found: 哈希值为{}的数据块不存在
541+
chunk_group_id_not_found: ID为{}的数据块组不存在
542+
chunk_group_hash_not_found: 哈希值为{}的数据块组不存在
540543
db_locked: 任务{}执行失败, 数据库已被锁定。请稍候再试
541544
initializing: 插件初始化中
542545
disabled: 插件已禁用

prime_backup/action/scan_unknown_blob_files.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,13 @@ def run(self) -> ScanUnknownBlobFilesResult:
5757
count += 1
5858
size = unknown_blob_file.stat().st_size
5959
size_sum += size
60-
self.logger.debug('Found unknown blob at {} with size {}, deleting'.format(unknown_blob_file, size))
6160

61+
self.logger.debug('Found unknown blob file at {} with size {}{}'.format(unknown_blob_file, size, ', deleting' if self.delete else ''))
62+
if self.delete:
63+
unknown_chunk_file.unlink(missing_ok=True)
6264
if self.delete:
6365
unknown_blob_file.unlink(missing_ok=True)
66+
6467
if len(unknown_blob_file_samples) < 5:
6568
unknown_blob_file_samples.append(str(unknown_blob_file))
6669
if self.result_sample_limit is not None and len(result_files) < self.result_sample_limit:
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import dataclasses
2+
import enum
3+
from typing import List
4+
5+
from typing_extensions import override
6+
7+
from prime_backup.action import Action
8+
from prime_backup.db.access import DbAccess
9+
from prime_backup.db.session import DbSession
10+
from prime_backup.types.chunk_group_info import BlobChunkGroupBindingInfo
11+
12+
13+
class BadBlobChunkGroupBindingItemType(enum.Enum):
14+
orphan = enum.auto()
15+
bad_storage_method = enum.auto()
16+
17+
18+
@dataclasses.dataclass(frozen=True)
19+
class BadBlobChunkGroupBindingItem:
20+
binding: BlobChunkGroupBindingInfo
21+
typ: BadBlobChunkGroupBindingItemType
22+
desc: str
23+
24+
25+
@dataclasses.dataclass
26+
class ValidateBlobChunkGroupBindingsResult:
27+
total: int = 0
28+
bad_bindings: List[BadBlobChunkGroupBindingItem] = dataclasses.field(default_factory=list)
29+
30+
@property
31+
def ok(self) -> int:
32+
return self.total - self.bad
33+
34+
@property
35+
def bad(self) -> int:
36+
return len(self.bad_bindings)
37+
38+
def add_bad(self, binding: BlobChunkGroupBindingInfo, typ: BadBlobChunkGroupBindingItemType, msg: str):
39+
self.bad_bindings.append(BadBlobChunkGroupBindingItem(binding, typ, msg))
40+
41+
42+
class ValidateBlobChunkGroupBindingsAction(Action[ValidateBlobChunkGroupBindingsResult]):
43+
"""
44+
NOTE: BlobChunkGroupBinding's .chunk_group_offset and .chunk_group_id checks are done in ValidateBlobsAction
45+
"""
46+
47+
@override
48+
def run(self) -> ValidateBlobChunkGroupBindingsResult:
49+
self.logger.info('Scanning all blob chunk group bindings for orphan check')
50+
result = ValidateBlobChunkGroupBindingsResult()
51+
52+
session: DbSession
53+
with DbAccess.open_session() as session:
54+
self.logger.info('Scanning all blob chunk group bindings for orphan check')
55+
result.total = session.get_blob_chunk_group_binding_count()
56+
57+
for lor in session.list_orphan_blob_chunk_group_bindings(limit=1000):
58+
if lor.blob is None:
59+
result.add_bad(BlobChunkGroupBindingInfo.of(lor.binding), BadBlobChunkGroupBindingItemType.orphan, f'orphan binding refers to a non-existent blob {lor.binding.blob_id}')
60+
else:
61+
result.add_bad(BlobChunkGroupBindingInfo.of(lor.binding), BadBlobChunkGroupBindingItemType.bad_storage_method, f'Chunk group binding has invalid storage method {lor.blob.storage_method}')
62+
63+
self.logger.info('BlobChunkGroupBindingInfo validation done: total {}, ok {}, bad {}'.format(
64+
result.total, result.ok, result.bad,
65+
))
66+
return result

0 commit comments

Comments
 (0)