Skip to content

Commit d1ef4c9

Browse files
committed
HM_RehashIfRequired: Determine the optimum size
When the load factor is >> 1 it can be the case that just doubling the size does not result in a hashmap with a load factor smaller than HM_MAX_LOAD_FACTOR. So let's calculate the optimum hashmap size instead. By factoring it out into a function we can also properly test it.
1 parent 2e82552 commit d1ef4c9

File tree

2 files changed

+36
-9
lines changed

2 files changed

+36
-9
lines changed

Packages/MIES/MIES_Hashmap.ipf

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -778,10 +778,20 @@ threadsafe static Function HM_CalculateLoadFactor(WAVE/WAVE hashmap)
778778
return totalEntries[HM_TOTAL_ENTRIES_ROW] / HM_GetSize(hashmap)
779779
End
780780

781+
/// @brief Calculate the optimum size for the hashmap so that the load factor is below #HM_MAX_LOAD_FACTOR
782+
///
783+
/// Complexity: O(1)
784+
threadsafe Function HM_CalculateOptimumSize(variable totalEntries)
785+
786+
totalEntries = max(1, totalEntries)
787+
788+
return 2^ceil(log(totalEntries / HM_MAX_LOAD_FACTOR) / log(2))
789+
End
790+
781791
/// @brief Rehashes if required and returns a modified hashmap pass-by-reference
782792
///
783793
/// The load factor (number of available entries vs filled entries) is determined.
784-
/// And if that is above #HM_MAX_LOAD_FACTOR we create a new hashmap with the doubled size and
794+
/// And if that is above #HM_MAX_LOAD_FACTOR we create a new hashmap with a large enough size and
785795
/// add all existing entries to it.
786796
///
787797
/// Complexity: Usually amortized O(1) but in the worst case O(n)
@@ -801,8 +811,11 @@ threadsafe Function HM_RehashIfRequired(WAVE/WAVE &hashmap)
801811
WAVE values = HM_FetchValues(hashmap, 0)
802812
isStr = IsTextWave(values)
803813

814+
WAVE totalEntries = HM_FetchStats(hashmap)
815+
804816
srcNumEntries = HM_GetSize(hashmap)
805-
newSize = 2 * srcNumEntries
817+
newSize = HM_CalculateOptimumSize(totalEntries[HM_TOTAL_ENTRIES_ROW])
818+
ASSERT_TS(newSize > srcNumEntries, "Invalid size calculation")
806819
WAVE/WAVE hashmapLarger = HM_Create(size = newSize, valueType = WaveType(values))
807820

808821
WAVE usedRows = HM_FetchUsedRows(hashmap)

Packages/tests/Basic/UTF_Hashmap.ipf

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -451,12 +451,13 @@ End
451451
/// UTF_TD_GENERATOR DataGenerators#PermanentOrFree
452452
static Function RehashingWorks([variable var])
453453

454-
variable numEntries, i, size, found
454+
variable numEntries, i, found, initialSize
455455
string value
456456

457-
size = 8
457+
initialSize = 1
458+
numEntries = 8
458459

459-
WAVE/WAVE hashmapFree = HM_Create(size = size)
460+
WAVE/WAVE hashmapFree = HM_Create(size = initialSize)
460461

461462
if(var)
462463
Duplicate/WAVE hashmapFree, hashmap
@@ -469,21 +470,21 @@ static Function RehashingWorks([variable var])
469470
CHECK_EQUAL_VAR(HM_RehashIfRequired(hashmap), 0)
470471
CHECK(WaveRefsEqual(hashmap, hashmap_old))
471472

472-
for(i = 0; i < size; i += 1)
473+
for(i = 0; i < numEntries; i += 1)
473474
HM_AddEntry(hashmap, num2str(i), str = "-" + num2str(i))
474475
endfor
475476

476477
CHECK_EQUAL_VAR(HM_RehashIfRequired(hashmap), 1)
477478
WAVE/WAVE hashmap_impl = hashmap[1]
478-
CHECK_EQUAL_VAR(DimSize(hashmap_impl, ROWS), size * 2)
479+
CHECK_EQUAL_VAR(DimSize(hashmap_impl, ROWS), numEntries * 2)
479480
CHECK(!WaveRefsEqual(hashmap, hashmap_old))
480481

481482
WAVE/Z filledEntries = GetFilledEntries(hashmap)
482483
CHECK_WAVE(filledEntries, NUMERIC_WAVE)
483-
CHECK_EQUAL_VAR(DimSize(filledEntries, ROWS), size)
484+
CHECK_EQUAL_VAR(DimSize(filledEntries, ROWS), numEntries)
484485
CHECK_EQUAL_WAVES(filledEntries, {5, 6, 7, 8, 9, 10, 11, 12}, mode = WAVE_DATA)
485486

486-
for(i = 0; i < size; i += 1)
487+
for(i = 0; i < numEntries; i += 1)
487488
[value, found] = HM_GetEntryAsString(hashmap, num2str(i))
488489
CHECK(found)
489490
CHECK_EQUAL_STR(value, "-" + num2str(i))
@@ -590,3 +591,16 @@ static Function WorksWithNumericValues([variable var])
590591
CHECK_NO_RTE()
591592
endtry
592593
End
594+
595+
Function CalcOptSizeWorks()
596+
597+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(0), 2)
598+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(1), 2)
599+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(2), 4)
600+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(3), 8)
601+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(4), 8)
602+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(5), 8)
603+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(6), 16)
604+
605+
CHECK_EQUAL_VAR(HM_CalculateOptimumSize(32), 64)
606+
End

0 commit comments

Comments
 (0)