2424import org .apache .iotdb .db .i18n .StorageEngineMessages ;
2525import org .apache .iotdb .db .storageengine .dataregion .compaction .execute .exception .CompactionLastTimeCheckFailedException ;
2626import org .apache .iotdb .db .storageengine .dataregion .compaction .execute .task .CompactionTaskSummary ;
27+ import org .apache .iotdb .db .storageengine .dataregion .compaction .execute .utils .executor .batch .utils .FollowingBatchCompactionAlignedChunkWriter ;
2728import org .apache .iotdb .db .storageengine .dataregion .compaction .execute .utils .executor .fast .element .AlignedPageElement ;
2829import org .apache .iotdb .db .storageengine .dataregion .compaction .execute .utils .executor .fast .element .ChunkMetadataElement ;
2930import org .apache .iotdb .db .storageengine .dataregion .compaction .execute .utils .writer .flushcontroller .AbstractCompactionFlushController ;
3031import org .apache .iotdb .db .storageengine .dataregion .compaction .io .CompactionTsFileWriter ;
3132import org .apache .iotdb .db .storageengine .dataregion .modification .ModEntry ;
3233
34+ import org .apache .tsfile .block .column .Column ;
3335import org .apache .tsfile .encrypt .EncryptParameter ;
36+ import org .apache .tsfile .enums .TSDataType ;
3437import org .apache .tsfile .exception .write .PageException ;
3538import org .apache .tsfile .file .header .PageHeader ;
3639import org .apache .tsfile .file .metadata .ChunkMetadata ;
@@ -66,6 +69,13 @@ public abstract class AbstractCompactionWriter implements AutoCloseable {
6669 // The index of the array corresponds to subTaskId.
6770 protected int [] chunkPointNumArray = new int [subTaskNum ];
6871
72+ // Each sub task has estimated total size of written points in current chunk.
73+ // The index of the array corresponds to subTaskId.
74+ protected long [] writtenPointTotalSizeArray = new long [subTaskNum ];
75+
76+ // Whether each sub task's current chunk writer contains TEXT, STRING, BLOB or OBJECT.
77+ protected boolean [] hasVariableLengthTypeArray = new boolean [subTaskNum ];
78+
6979 // used to control the target chunk size
7080 protected long targetChunkSize = IoTDBDescriptor .getInstance ().getConfig ().getTargetChunkSize ();
7181
@@ -77,7 +87,12 @@ public abstract class AbstractCompactionWriter implements AutoCloseable {
7787 @ SuppressWarnings ("squid:S1170" )
7888 private final long checkPoint = (targetChunkPointNum >= 10 ? targetChunkPointNum : 10 ) / 10 ;
7989
80- private long lastCheckIndex = 0 ;
90+ private final long [] lastCheckIndexArray = new long [subTaskNum ];
91+
92+ // When estimated size of written points reaches check point, then check chunk size.
93+ private final long writtenPointTotalSizeCheckPoint = Math .max (targetChunkSize / 10 , 1L );
94+
95+ private final long [] lastWrittenPointTotalSizeCheckIndexArray = new long [subTaskNum ];
8196
8297 // if unsealed chunk size is lower then this, then deserialize next chunk no matter it is
8398 // overlapped or not
@@ -122,10 +137,24 @@ public ModEntry getTTLLowerBoundForCurrentDevice() {
122137 }
123138
124139 public void startMeasurement (String measurement , IChunkWriter chunkWriter , int subTaskId ) {
125- lastCheckIndex = 0 ;
140+ resetChunkWriterStatistics ( subTaskId ) ;
126141 lastTimeSet [subTaskId ] = false ;
127142 chunkWriters [subTaskId ] = chunkWriter ;
128143 measurementId [subTaskId ] = measurement ;
144+ hasVariableLengthTypeArray [subTaskId ] = containsVariableLengthType (chunkWriter );
145+ }
146+
147+ private boolean containsVariableLengthType (IChunkWriter chunkWriter ) {
148+ if (chunkWriter instanceof ChunkWriterImpl ) {
149+ return ((ChunkWriterImpl ) chunkWriter ).getDataType ().isBinary ();
150+ }
151+ AlignedChunkWriterImpl alignedChunkWriter = (AlignedChunkWriterImpl ) chunkWriter ;
152+ for (ValueChunkWriter valueChunkWriter : alignedChunkWriter .getValueChunkWriterList ()) {
153+ if (valueChunkWriter .getDataType ().isBinary ()) {
154+ return true ;
155+ }
156+ }
157+ return false ;
129158 }
130159
131160 public abstract void endMeasurement (int subTaskId ) throws IOException ;
@@ -146,7 +175,9 @@ public void startMeasurement(String measurement, IChunkWriter chunkWriter, int s
146175 */
147176 public abstract void checkAndMayFlushChunkMetadata () throws IOException ;
148177
149- protected void writeDataPoint (long timestamp , TsPrimitiveType value , IChunkWriter chunkWriter ) {
178+ protected void writeDataPoint (
179+ long timestamp , TsPrimitiveType value , IChunkWriter chunkWriter , int subTaskId ) {
180+ long writtenPointTotalSize = 0 ;
150181 if (chunkWriter instanceof ChunkWriterImpl ) {
151182 ChunkWriterImpl chunkWriterImpl = (ChunkWriterImpl ) chunkWriter ;
152183 switch (chunkWriterImpl .getDataType ()) {
@@ -155,6 +186,7 @@ protected void writeDataPoint(long timestamp, TsPrimitiveType value, IChunkWrite
155186 case BLOB :
156187 case OBJECT :
157188 chunkWriterImpl .write (timestamp , value .getBinary ());
189+ writtenPointTotalSize += value .getBinary ().getLength ();
158190 break ;
159191 case DOUBLE :
160192 chunkWriterImpl .write (timestamp , value .getDouble ());
@@ -180,17 +212,103 @@ protected void writeDataPoint(long timestamp, TsPrimitiveType value, IChunkWrite
180212 } else {
181213 AlignedChunkWriterImpl alignedChunkWriter = (AlignedChunkWriterImpl ) chunkWriter ;
182214 alignedChunkWriter .write (timestamp , value .getVector ());
215+ if (hasVariableLengthTypeArray [subTaskId ]) {
216+ writtenPointTotalSize = estimateWrittenPointTotalSize (value );
217+ }
218+ }
219+ chunkPointNumArray [subTaskId ]++;
220+ if (hasVariableLengthTypeArray [subTaskId ]) {
221+ writtenPointTotalSizeArray [subTaskId ] += writtenPointTotalSize ;
183222 }
184223 }
185224
225+ private long estimateWrittenPointTotalSize (TsPrimitiveType value ) {
226+ long size = Long .BYTES ;
227+ TsPrimitiveType [] vector = value .getVector ();
228+ for (TsPrimitiveType tsPrimitiveType : vector ) {
229+ if (tsPrimitiveType == null ) {
230+ continue ;
231+ }
232+ TSDataType dataType = tsPrimitiveType .getDataType ();
233+ switch (dataType ) {
234+ case TEXT :
235+ case STRING :
236+ case BLOB :
237+ case OBJECT :
238+ size += tsPrimitiveType .getBinary ().getLength ();
239+ break ;
240+ case DOUBLE :
241+ case INT64 :
242+ case TIMESTAMP :
243+ size += Long .BYTES ;
244+ break ;
245+ case INT32 :
246+ case DATE :
247+ case FLOAT :
248+ size += Integer .BYTES ;
249+ break ;
250+ case BOOLEAN :
251+ size += 1 ;
252+ break ;
253+ default :
254+ break ;
255+ }
256+ }
257+ return size ;
258+ }
259+
260+ protected long estimateWrittenPointTotalSize (TsBlock tsBlock ) {
261+ int pointCount = tsBlock .getPositionCount ();
262+ long size = (long ) Long .BYTES * pointCount ;
263+ Column [] columns = tsBlock .getValueColumns ();
264+ for (Column column : columns ) {
265+ TSDataType dataType = column .getDataType ();
266+ if (dataType .isBinary ()) {
267+ for (int j = 0 ; j < pointCount ; j ++) {
268+ if (column .isNull (j )) {
269+ continue ;
270+ }
271+ size += column .getBinary (j ).getLength ();
272+ }
273+ continue ;
274+ }
275+ // This is only used as a checkpoint estimate, so fixed-width values use count directly.
276+ switch (dataType ) {
277+ case DOUBLE :
278+ case INT64 :
279+ case TIMESTAMP :
280+ size += (long ) Long .BYTES * pointCount ;
281+ break ;
282+ case INT32 :
283+ case DATE :
284+ case FLOAT :
285+ size += (long ) Integer .BYTES * pointCount ;
286+ break ;
287+ case BOOLEAN :
288+ size += pointCount ;
289+ break ;
290+ default :
291+ break ;
292+ }
293+ }
294+ return size ;
295+ }
296+
186297 @ SuppressWarnings ("squid:S2445" )
187298 protected void sealChunk (
188299 CompactionTsFileWriter targetWriter , IChunkWriter chunkWriter , int subTaskId )
189300 throws IOException {
190301 synchronized (targetWriter ) {
191302 targetWriter .writeChunk (chunkWriter );
192303 }
304+ resetChunkWriterStatistics (subTaskId );
305+ }
306+
307+ private void resetChunkWriterStatistics (int subTaskId ) {
193308 chunkPointNumArray [subTaskId ] = 0 ;
309+ writtenPointTotalSizeArray [subTaskId ] = 0 ;
310+ lastCheckIndexArray [subTaskId ] = 0 ;
311+ lastWrittenPointTotalSizeCheckIndexArray [subTaskId ] = 0 ;
194312 }
195313
196314 public abstract EncryptParameter getEncryptParameter ();
@@ -214,7 +332,7 @@ protected void flushNonAlignedChunkToFileWriter(
214332 synchronized (targetWriter ) {
215333 // seal last chunk to file writer
216334 targetWriter .writeChunk (chunkWriters [subTaskId ]);
217- chunkPointNumArray [ subTaskId ] = 0 ;
335+ resetChunkWriterStatistics ( subTaskId ) ;
218336 targetWriter .writeChunk (chunk , chunkMetadata );
219337 }
220338 }
@@ -232,7 +350,7 @@ protected void flushAlignedChunkToFileWriter(
232350 AlignedChunkWriterImpl alignedChunkWriter = (AlignedChunkWriterImpl ) chunkWriters [subTaskId ];
233351 // seal last chunk to file writer
234352 targetWriter .writeChunk (alignedChunkWriter );
235- chunkPointNumArray [ subTaskId ] = 0 ;
353+ resetChunkWriterStatistics ( subTaskId ) ;
236354
237355 targetWriter .markStartingWritingAligned ();
238356
@@ -279,6 +397,9 @@ protected void flushNonAlignedPageToChunkWriter(
279397 chunkWriter .writePageHeaderAndDataIntoBuff (compressedPageData , pageHeader );
280398
281399 chunkPointNumArray [subTaskId ] += pageHeader .getStatistics ().getCount ();
400+ if (hasVariableLengthTypeArray [subTaskId ]) {
401+ writtenPointTotalSizeArray [subTaskId ] += pageHeader .getSerializedPageSize ();
402+ }
282403 }
283404
284405 public abstract boolean flushAlignedPage (AlignedPageElement alignedPageElement , int subTaskId )
@@ -303,29 +424,51 @@ protected void flushAlignedPageToChunkWriter(
303424 // flush new time page to chunk writer directly
304425 alignedChunkWriter .writePageHeaderAndDataIntoTimeBuff (compressedTimePageData , timePageHeader );
305426
427+ long writtenValuePageSize = 0 ;
306428 // flush new value pages to chunk writer directly
307429 for (int i = 0 ; i < valuePageHeaders .size (); i ++) {
308- if (valuePageHeaders .get (i ) == null ) {
430+ PageHeader valuePageHeader = valuePageHeaders .get (i );
431+ if (valuePageHeader == null ) {
309432 // sub sensor does not exist in current file or value page has been deleted completely
310433 alignedChunkWriter .getValueChunkWriterByIndex (i ).writeEmptyPageToPageBuffer ();
311434 continue ;
312435 }
313436 alignedChunkWriter .writePageHeaderAndDataIntoValueBuff (
314- compressedValuePageDatas .get (i ), valuePageHeaders .get (i ), i );
437+ compressedValuePageDatas .get (i ), valuePageHeader , i );
438+ if (hasVariableLengthTypeArray [subTaskId ]) {
439+ writtenValuePageSize += valuePageHeader .getSerializedPageSize ();
440+ }
315441 }
316442
317443 chunkPointNumArray [subTaskId ] += timePageHeader .getStatistics ().getCount ();
444+ if (hasVariableLengthTypeArray [subTaskId ]) {
445+ // Direct-flushed pages are already serialized, so use page size as checkpoint estimate.
446+ writtenPointTotalSizeArray [subTaskId ] +=
447+ timePageHeader .getSerializedPageSize () + writtenValuePageSize ;
448+ }
318449 }
319450
320451 protected void checkChunkSizeAndMayOpenANewChunk (
321452 CompactionTsFileWriter fileWriter , IChunkWriter chunkWriter , int subTaskId )
322453 throws IOException {
323- if (chunkPointNumArray [subTaskId ] >= (lastCheckIndex + 1 ) * checkPoint ) {
324- // if chunk point num reaches the check point, then check if the chunk size over threshold
325- lastCheckIndex = chunkPointNumArray [subTaskId ] / checkPoint ;
454+ if (chunkWriter instanceof FollowingBatchCompactionAlignedChunkWriter
455+ && chunkWriter .checkIsChunkSizeOverThreshold (targetChunkSize , targetChunkPointNum , false )) {
456+ sealChunk (fileWriter , chunkWriter , subTaskId );
457+ return ;
458+ }
459+ boolean reachesPointCheckPoint =
460+ chunkPointNumArray [subTaskId ] >= (lastCheckIndexArray [subTaskId ] + 1 ) * checkPoint ;
461+ boolean reachesSizeCheckPoint =
462+ hasVariableLengthTypeArray [subTaskId ]
463+ && writtenPointTotalSizeArray [subTaskId ]
464+ >= (lastWrittenPointTotalSizeCheckIndexArray [subTaskId ] + 1 )
465+ * writtenPointTotalSizeCheckPoint ;
466+ if (reachesPointCheckPoint || reachesSizeCheckPoint ) {
467+ lastCheckIndexArray [subTaskId ] = chunkPointNumArray [subTaskId ] / checkPoint ;
468+ lastWrittenPointTotalSizeCheckIndexArray [subTaskId ] =
469+ writtenPointTotalSizeArray [subTaskId ] / writtenPointTotalSizeCheckPoint ;
326470 if (chunkWriter .checkIsChunkSizeOverThreshold (targetChunkSize , targetChunkPointNum , false )) {
327471 sealChunk (fileWriter , chunkWriter , subTaskId );
328- lastCheckIndex = 0 ;
329472 }
330473 }
331474 }
0 commit comments