|
5 | 5 | import time |
6 | 6 | from concurrent.futures import Future |
7 | 7 | from pathlib import Path |
8 | | -from typing import BinaryIO, Dict, List, Optional |
| 8 | +from typing import BinaryIO, Dict, List, Optional, Union |
9 | 9 |
|
10 | 10 | from prime_backup.action.helpers.blob_creator_common import BlobLookupRoutine, BlobCreateContext, BlobCreatorBase, _BLOB_ALLOC_PERF_MODE |
11 | 11 | from prime_backup.action.helpers.blob_pre_calc_result import BlobPrecalculateResult |
@@ -88,14 +88,14 @@ def get_or_create(self) -> BlobLookupRoutine[schema.Blob]: |
88 | 88 |
|
89 | 89 | with contextlib.ExitStack() as es: |
90 | 90 | actual_path_to_read = self.__prepare_path_to_read(es, plan.policy) |
91 | | - snapshot = self.__load_or_cut_chunks(actual_path_to_read, plan.pre_cal_result, src_path_str) |
| 91 | + |
| 92 | + snapshot_or_blob = yield from self.__load_or_cut_chunks(actual_path_to_read, plan.pre_cal_result, src_path_str) |
| 93 | + if isinstance(snapshot_or_blob, schema.Blob): |
| 94 | + return snapshot_or_blob |
| 95 | + snapshot: _ChunkedBlobSnapshot = snapshot_or_blob |
92 | 96 | if snapshot.blob_size == 0: |
93 | 97 | self.log_and_raise_blob_file_changed('Blob size becomes zero', self.args.last_chance) |
94 | 98 |
|
95 | | - if (cache := (yield from self.query_cached_blob(snapshot.blob_hash))) is not None: |
96 | | - self.logger.debug('Chunked file {} (hash {}) already exists in DB'.format(src_path_str, snapshot.blob_hash)) |
97 | | - return cache |
98 | | - |
99 | 99 | # notes: the following code cannot be interrupted (yield). |
100 | 100 | # The blob is specifically generated by the generator |
101 | 101 | # if any yield is done, ensure to check blob_by_hash_cache again |
@@ -155,26 +155,42 @@ def __prepare_path_to_read(self, es: contextlib.ExitStack, policy: _ChunkedBlobC |
155 | 155 | return temp_file_path |
156 | 156 | raise AssertionError('bad policy {!r}'.format(policy)) |
157 | 157 |
|
158 | | - def __load_or_cut_chunks(self, actual_path_to_read: Path, pre_cal_result: Optional[BlobPrecalculateResult], src_path_str: str) -> _ChunkedBlobSnapshot: |
| 158 | + def __load_or_cut_chunks(self, actual_path_to_read: Path, pre_cal_result: Optional[BlobPrecalculateResult], src_path_str: str) -> BlobLookupRoutine[Union[_ChunkedBlobSnapshot, schema.Blob]]: |
| 159 | + pre_calc_blob_hash: Optional[str] = None |
159 | 160 | if pre_cal_result is not None: |
160 | | - chunks = pre_cal_result.chunks |
161 | | - blob_hash = pre_cal_result.hash |
162 | | - blob_size = pre_cal_result.size |
163 | | - self.logger.debug('Cut and hashed file {} with size {} into {} chunks using {} (precalc)'.format( |
164 | | - src_path_str, ByteCount(blob_size).auto_str(), len(chunks), self.args.chunk_method.name, |
165 | | - )) |
166 | | - return _ChunkedBlobSnapshot(chunks, blob_hash, blob_size) |
| 161 | + pre_calc_blob_hash = pre_cal_result.hash |
| 162 | + if pre_cal_result.chunks is not None: |
| 163 | + chunks = pre_cal_result.chunks |
| 164 | + blob_size = pre_cal_result.size |
| 165 | + self.logger.debug('Cut and hashed file {} with size {} into {} chunks using {} (precalc)'.format( |
| 166 | + src_path_str, ByteCount(blob_size).auto_str(), len(chunks), self.args.chunk_method.name, |
| 167 | + )) |
| 168 | + if (cache := (yield from self.query_cached_blob(pre_calc_blob_hash))) is not None: |
| 169 | + self.logger.debug('Chunked file {} (hash {}) already exists in DB'.format(src_path_str, pre_calc_blob_hash)) |
| 170 | + return cache |
| 171 | + return _ChunkedBlobSnapshot(chunks, pre_calc_blob_hash, blob_size) |
| 172 | + |
| 173 | + if pre_calc_blob_hash is not None: |
| 174 | + if (cache := (yield from self.query_cached_blob(pre_calc_blob_hash))) is not None: |
| 175 | + self.logger.debug('Chunked file {} (hash {}) already exists in DB'.format(src_path_str, pre_calc_blob_hash)) |
| 176 | + return cache |
167 | 177 |
|
168 | 178 | previous_chunks = self.ctx.file_lookup.get_previous_chunks(self.args.src_path) if self.args.chunk_method.needs_previous_chunks() else None |
169 | 179 | chunker = self.args.chunk_method.create_file_chunker(actual_path_to_read, need_entire_file_hash=True, previous_chunks=previous_chunks) |
170 | 180 | with self.ctx.time_costs.measure_time_cost(CreateBackupTimeCostKey.kind_io_read) as chunking_cost: |
171 | 181 | chunks = chunker.cut_all() |
172 | 182 | blob_hash = chunker.get_entire_file_hash() |
173 | 183 | blob_size = chunker.get_read_file_size() |
| 184 | + if pre_calc_blob_hash is not None and pre_calc_blob_hash != blob_hash: |
| 185 | + self.log_and_raise_blob_file_changed('Blob hash mismatch, pre calc {}, chunked {}'.format(pre_calc_blob_hash, blob_hash), self.args.last_chance) |
| 186 | + |
174 | 187 | self.logger.debug('Cut and hashed file {} with size {} into {} chunks using {} in {:.2f}s ({}/s)'.format( |
175 | 188 | src_path_str, ByteCount(blob_size).auto_str(), len(chunks), self.args.chunk_method.name, chunking_cost(), |
176 | 189 | ByteCount(blob_size / chunking_cost() if chunking_cost() > 0 else 0).auto_str(), |
177 | 190 | )) |
| 191 | + if pre_calc_blob_hash is None and (cache := (yield from self.query_cached_blob(blob_hash))) is not None: |
| 192 | + self.logger.debug('Chunked file {} (hash {}) already exists in DB'.format(src_path_str, blob_hash)) |
| 193 | + return cache |
178 | 194 | return _ChunkedBlobSnapshot(chunks, blob_hash, blob_size) |
179 | 195 |
|
180 | 196 | def __create_missing_chunks(self, actual_path_to_read: Path, snapshot: _ChunkedBlobSnapshot, known_db_chunks: Dict[str, Optional[schema.Chunk]]) -> _ChunkWriteResult: |
|
0 commit comments