Skip to content

Commit 1737473

Browse files
salotzmahmoud
authored andcommitted
Adds cache option to remap and use in research
There is a mismatch between the caching of transformed objects in the `remap` function and the need for `research` to traverse all sub trees. For most values this is inconsequential (like atomic ints, etc.) but for small tuples (e.g. `("hello",)`) these get compiled as the same value and return the same `id(...)`. In `remap` these get cached and never get `enter` called on them and thus the hooks for `research` to return the values never gets called. In this fix an option to disable/enable the cache is introduced to the `remap` function which simply disables using transformed values from the cache. Then in the `research` function caching is turned off. The existing `remap` behavior is maintained as caching by default is turned on. fixes: #393
1 parent ce7c7d2 commit 1737473

2 files changed

Lines changed: 32 additions & 4 deletions

File tree

boltons/iterutils.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,8 +1055,14 @@ def default_exit(path, key, old_parent, new_parent, new_items):
10551055
return ret
10561056

10571057

1058-
def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
1059-
**kwargs):
1058+
def remap(
1059+
root,
1060+
visit=default_visit,
1061+
enter=default_enter,
1062+
exit=default_exit,
1063+
cache: bool = True,
1064+
**kwargs,
1065+
):
10601066
"""The remap ("recursive map") function is used to traverse and
10611067
transform nested structures. Lists, tuples, sets, and dictionaries
10621068
are just a few of the data structures nested into heterogeneous
@@ -1130,6 +1136,10 @@ def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
11301136
:class:`namedtuple`, must be recreated from scratch, but
11311137
use the same type as the new parent passed back from the
11321138
*enter* function.
1139+
cache (bool): Controls whether to cache transformed
1140+
objects. Uses object identity for the cache. For example
1141+
this is turned off for applications like `research` which
1142+
need to traverse all trees.
11331143
reraise_visit (bool): A pragmatic convenience for the *visit*
11341144
callable. When set to ``False``, remap ignores any errors
11351145
raised by the *visit* callback. Items causing exceptions
@@ -1195,7 +1205,7 @@ def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
11951205
registry[id_value] = value
11961206
if not new_items_stack:
11971207
continue
1198-
elif id_value in registry:
1208+
elif cache and id_value in registry:
11991209
value = registry[id_value]
12001210
else:
12011211
if trace_enter:
@@ -1388,7 +1398,7 @@ def _enter(path, key, value):
13881398
raise
13891399
return enter(path, key, value)
13901400

1391-
remap(root, enter=_enter)
1401+
remap(root, enter=_enter, cache=False)
13921402
return ret
13931403

13941404

tests/test_iterutils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,24 @@ def broken_query(p, k, v):
395395
# empty results with default, reraise=False
396396
assert research(root, broken_query) == []
397397

398+
# test that different branches with object identical values are
399+
# still traversed and returned
400+
literal_tree = {
401+
"maybe" : ("hello",),
402+
"another" : ("hello",),
403+
}
404+
405+
assert id(literal_tree["maybe"]) == id(literal_tree["another"])
406+
assert research(
407+
root=literal_tree,
408+
) == [
409+
((None,), literal_tree),
410+
(("maybe",), ("hello",)),
411+
(("maybe", 0,), "hello"),
412+
(("another",), ("hello",)),
413+
(("another", 0,), "hello"),
414+
]
415+
398416

399417
def test_research_custom_enter():
400418
# see #368

0 commit comments

Comments
 (0)