Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cylc/flow/data_store_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
from cylc.flow.id import Tokens
from cylc.flow.network import API
from cylc.flow.parsec.util import (
listjoin,
fast_listjoin as listjoin,
pdeepcopy,
poverride,
)
Expand Down
24 changes: 21 additions & 3 deletions cylc/flow/parsec/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,31 @@ def listjoin(lst, none_str=''):
for item in lst:
if item is None:
items.append(none_str)
elif any(char in str(item) for char in ',#"\''):

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an issue of the same type that @dwsutherland spotted in #7306 (comment).

One str(item) call was being made for each character in the list, resulting in 5x the number of str calls.

items.append(repr(item)) # will be quoted
else:
items.append(str(item))
string = str(item)
if any(char in string for char in ',#"\''):
items.append(repr(item)) # will be quoted
else:
items.append(string)
return ', '.join(items)


def fast_listjoin(lst, none_str=''):
"""Faster variant of listjoin suitable in select cases.

Compared to listjoin, this does *not*:
* Rationalise and pretty-format integer lists.
* Intelligently quote arguments which need quoting.

Suitable for use in situations where you know the data type of the field
being joined and the above is of no concern.
"""
return ', '.join((
none_str if item is None else str(item)
for item in lst or [None]
))
Comment on lines +125 to +138

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a slimmed down version of listjoin above. The savings are small, however, this method gets hammered by the data store because it's called for:

  • execution polling intervals
  • execution retry delays
  • submission polling intervals
  • submission retry delays

Once for every task in the n-window.

According to cProfile, this reduces the totaltime (i.e, the time taken within the method itself, as opposed to methods invoked from the method) from 2.821s to 0.01014s (i.e, a 2.8s saving for the example).

@dwsutherland dwsutherland May 22, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was curious, not much difference constructing the [None] conditional list outside:

#!/usr/bin/env python


import time

N = 100
M = 100

Ns = [_n for _n in range(N)]
none_str = 'null'
NNones = Ns or [None]

def one():  # as before
    return ', '.join((
        none_str if item is None else str(item)
        for item in Ns or [None]
    ))


def two():  # None list constructed outside
    return ', '.join((
        none_str if item is None else str(item)
        for item in NNones
    ))


for method in (one, two):
    start = time.time()
    for _try in range(M):
        method()
    end = time.time()
    print(f'{method.__name__:10} {end - start}')
(flow) sutherlander@cortex-hyper:bin$ p7313.py 
one        0.0006687641143798828
two        0.0006625652313232422

N=0 also:

(flow) sutherlander@cortex-hyper:bin$ p7313.py 
one        2.9325485229492188e-05
two        2.7418136596679688e-05

Strike that (silly me), there would only be one invocation of lst or [None] anyway...



def printcfg(cfg, level=0, indent=0, prefix='', none_str='',
handle=None):
"""Pretty-print a parsec config item or section (nested dict).
Expand Down
15 changes: 11 additions & 4 deletions cylc/flow/parsec/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Also provides default values from the spec as a nested dict.
"""

from functools import lru_cache
import re
import shlex
from collections import deque
Expand Down Expand Up @@ -655,12 +656,18 @@ def parsec_validate(cfg_root, spec_root):
return ParsecValidator().validate(cfg_root, spec_root)


class DurationFloat(float):
"""Duration in floating point seconds, but stringify as ISO8601 format."""

def __str__(self):
class _DurationFloat(float):
def _str(self):
return str(Duration(seconds=self, standardize=True))

# NOTE: This object is immutable so we cache the value of __str__.
__str__ = lru_cache(1)(_str)


# NOTE: Prevent duplicate DurationFloat objects being created for the same
# duration value. This allows __str__ caching to be effective.
DurationFloat = lru_cache(None)(_DurationFloat.__call__)
Comment on lines +663 to +669

@oliver-sanders oliver-sanders May 19, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reduces the remaining 2 million calls down to one.

This will cache the objects for the lifetime of the Python process in which they are created. This sort of caching is appropriate in circumstances where we wouldn't expect this memory to be released during runtime.

If we did want the cache to release memory, we would use weakref.



class Range(tuple):

Expand Down
48 changes: 39 additions & 9 deletions tests/unit/parsec/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from cylc.flow.parsec.util import (
SECTION_EXPAND_PATTERN,
expand_many_section,
fast_listjoin,
itemstr,
listjoin,
m_override,
Expand All @@ -31,15 +32,44 @@
replicate,
un_many,
)


def test_listjoin():
assert listjoin(None) == ''
assert listjoin(None, 'test') == 'test'
assert listjoin([], 'test') == 'test'
assert listjoin([None], 'test') == 'test'
assert listjoin(['test', 'test']) == 'test, test'
assert listjoin(['test,', 'test']) == '\'test,\', test'
from cylc.flow.parsec.validate import DurationFloat


@pytest.mark.parametrize('method', (listjoin, fast_listjoin))
def test_listjoin(method):
assert method(None) == ''
assert method(None, 'test') == 'test'
assert method([]) == ''
assert method([], 'test') == 'test'
assert method([None], 'test') == 'test'
assert method(['test', 'test']) == 'test, test'
if method == listjoin:
# intelligent quoting
assert method(['a#', 'b,', "c'"]) == "'a#', 'b,', \"c'\""
assert method(['test,', 'test']) == '\'test,\', test'

# integer list rationalisation
assert method([10, 11, 12]) == '10..12'
assert method([10, 10, 10]) == '3*10'
assert method([10, 10, 10, 42]) == '3*10, 42'


def test_duration_float():
"""It should prevent the creation of duplicate instances."""
# these calls should only create two discrete instances of DurationFloat
a = DurationFloat(1.0)
b = DurationFloat(1.0)
c = DurationFloat(42.0)

# a and b should be no only equal but also the same instance
assert a == b
assert id(a) == id(b)
assert str(a) == 'PT1S'

# a and c should be unequal and different instances
assert a != c
assert id(a) != id(c)
assert str(c) == 'PT42S'


# --- printcfg
Expand Down
Loading