@@ -279,10 +279,21 @@ TEST(ParquetUtils, CountPresentFromDefinitionLevelsV1_BitPackedCanonical_0to7_88
279279 // Canonical example from Parquet Encodings.md:
280280 // values 0..7 with bit_width=3 are packed as bytes 0x88, 0xC6, 0xFA.
281281 // Bit-packed header for one group of 8 values is varint((1 << 1) | 1) = 0x03.
282+ //
283+ // Important detail: decoder bit_width is derived from max_def_level at runtime.
284+ // So this payload is valid only when max_def_level implies bit_width=3 (e.g. 7).
285+ // Reusing these same bytes with max_def_level=3 (bit_width=2) is a different
286+ // encoding contract and can leave unread trailing bytes by design.
282287 std::vector<uint8_t > def_payload = {0x03 , 0x88 , 0xC6 , 0xFA };
283288
284289 EXPECT_EQ (1u , CountPresentFromDefinitionLevelsV1 (def_payload, 8 , 7 )); // only value 7
285- EXPECT_EQ (1u , CountPresentFromDefinitionLevelsV1 (def_payload, 8 , 3 )); // only value 3
290+
291+ // Separate bit_width=2 payload for max_def_level=3.
292+ // This keeps payload bit-width consistent with decoder configuration and
293+ // avoids mixing a 3-bit packed stream with a 2-bit decode expectation.
294+ std::vector<uint32_t > levels_bw2 = {0 , 1 , 2 , 3 , 0 , 1 , 2 , 0 }; // one value at level 3
295+ auto def_payload_bw2 = MakeBitPackedDefPayload (levels_bw2, 2 );
296+ EXPECT_EQ (1u , CountPresentFromDefinitionLevelsV1 (def_payload_bw2, 8 , 3 ));
286297}
287298
288299TEST (ParquetUtils, CountPresentFromDefinitionLevelsV1_ManualBytes_RleRunLen4_Level1) {
@@ -338,6 +349,31 @@ TEST(ParquetUtils, CountPresentFromDefinitionLevelsV1_RejectsZeroBitPackedGroups
338349 EXPECT_THROW (CountPresentFromDefinitionLevelsV1 (def_payload, 8 , 1 ), InvalidInputException);
339350}
340351
352+ TEST (ParquetUtils, CountPresentFromDefinitionLevelsV1_BitPackedFinalRunAllowsPadding) {
353+ // Corner case: a final bit-packed run is encoded as a full 8-value group,
354+ // while logical num_values ends mid-group. Decode only the logical values
355+ // and ignore padded trailing values in the last group.
356+ // Payload: header=0x03 (1 bit-packed group => 8 values), packed=0x07 (bits 1,1,1,0,0,0,0,0).
357+ std::vector<uint8_t > def_payload = {0x03 , 0x07 };
358+ EXPECT_EQ (3u , CountPresentFromDefinitionLevelsV1 (def_payload, 3 , 1 ));
359+ }
360+
361+ TEST (ParquetUtils, CountPresentFromDefinitionLevelsV1_RejectsTrailingBytesAfterDecoding) {
362+ // One full bit-packed group (8 values) plus extra trailing byte that must be rejected.
363+ std::vector<uint8_t > def_payload = {0x03 , 0xAA , 0xFF };
364+ EXPECT_THROW (CountPresentFromDefinitionLevelsV1 (def_payload, 8 , 1 ), InvalidInputException);
365+ }
366+
367+ TEST (ParquetUtils, CountPresentFromDefinitionLevelsV1_RejectsNonPositiveMaxDefLevel) {
368+ auto def_payload = MakeRleDefPayload (1 , 0 , 1 );
369+ EXPECT_THROW (CountPresentFromDefinitionLevelsV1 (def_payload, 1 , 0 ), InvalidInputException);
370+ }
371+
372+ TEST (ParquetUtils, CountPresentFromDefinitionLevelsV1_RejectsNegativeNumValues) {
373+ auto def_payload = MakeRleDefPayload (1 , 1 , 1 );
374+ EXPECT_THROW (CountPresentFromDefinitionLevelsV1 (def_payload, -1 , 1 ), InvalidInputException);
375+ }
376+
341377// -----------------------------------------------------------------------------
342378// Tests for DecompressAndSplit function.
343379// -----------------------------------------------------------------------------
0 commit comments