Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ coverage:
ignore:
- "tests/**"
- "ws_messages_pb2.py"
- "cylc/flow/scripts/report_timings.py"
- "cylc/flow/network/graphql_subscribe.py"

flag_management:
Expand Down
1 change: 0 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ omit =
tests/*
*/cylc/flow/*_pb2.py
cylc/flow/etc/*
cylc/flow/scripts/report_timings.py
parallel = True
source = ./cylc
# https://github.com/coveragepy/coveragepy/issues/2082:
Expand Down
1 change: 1 addition & 0 deletions changes.d/7205.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added CSV and JSON output options to Cylc Report Timings
2 changes: 1 addition & 1 deletion conda-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- urwid >=2.2,<4,!=2.6.2,!=2.6.3

# optional dependencies
#- pandas >=1.0,<2
#- pandas >=2,<3
#- pympler
#- matplotlib-base
#- sqlparse
Expand Down
144 changes: 70 additions & 74 deletions cylc/flow/scripts/report_timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@
import io as StringIO
import sys
from collections import Counter
from functools import partial
from typing import TYPE_CHECKING

from cylc.flow import LOG
from cylc.flow.exceptions import CylcError
from cylc.flow.id_cli import parse_id
from cylc.flow.option_parsers import (
Expand Down Expand Up @@ -90,25 +90,51 @@ def smart_open(filename=None):
fh.close()


def format_raw(row_buf, output):
"""Implement --format=raw"""
output.write(row_buf.getvalue())


def format_summary(row_buf, output):
"""Implement --format=summary"""
summary = TextTimingSummary(row_buf)
summary.write_summary(output)


def format_html(row_buf, output):
"""Implement --format=html"""
summary = HTMLTimingSummary(row_buf)
summary.write_summary(output)


def format_generic(row_buf, output, format):
PandasTimingsRaw(row_buf).write_summary(output, f'to_{format}')


# suported output formats
FORMATS = {
'raw': format_raw,
'summary': format_summary,
'html': format_html,
'csv': partial(format_generic, format='csv'),

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly to_markdown and to_excel require extra dependencies, and to_html, though it works isn't very nice to look at and needs some CSS appended.

'json': partial(format_generic, format='json'),
}


def get_option_parser() -> COP:
parser = COP(
__doc__,
argdoc=[WORKFLOW_ID_ARG_DOC]
)
parser.add_option(
"-r", "--raw",
help="Show raw timing output suitable for custom diagnostics.",
action="store_true", default=False, dest="show_raw"
)
parser.add_option(
"-s", "--summary",
help="Show textual summary timing output for tasks.",
action="store_true", default=False, dest="show_summary"
)
parser.add_option(
"-w", "--web-summary",
help="Show HTML summary timing output for tasks.",
action="store_true", default=False, dest="html_summary"
'--format', '-t',
help=(
'Select output format (default=summary). Available formats: '
+ ', '.join(FORMATS)
),
action='store',
default='summary',
choices=list(FORMATS)
)
parser.add_option(
"-O", "--output-file",
Expand All @@ -120,39 +146,19 @@ def get_option_parser() -> COP:

@cli_function(get_option_parser)
def main(parser: COP, options: 'Values', workflow_id: str) -> None:
_main(options, workflow_id)


def _main(options: 'Values', workflow_id: str) -> None:
workflow_id, *_ = parse_id(
workflow_id,
constraint='workflows',
)

LOG.warning(
"cylc report-timings is deprecated."
" The analysis view in the GUI provides"
" similar functionality."
)

output_options = [
options.show_raw, options.show_summary, options.html_summary
]
if output_options.count(True) > 1:
parser.error('Cannot combine output formats (choose one)')
if not any(output_options):
# No output specified - choose summary by default
options.show_summary = True

db_file = get_workflow_run_pub_db_path(workflow_id)
with CylcWorkflowDAO(db_file, is_public=True) as dao:
row_buf = format_rows(*dao.select_task_times())
with smart_open(options.output_filename) as output:
if options.show_raw:
output.write(row_buf.getvalue())
else:
summary: TimingSummary
if options.show_summary:
summary = TextTimingSummary(row_buf)
elif options.html_summary:
summary = HTMLTimingSummary(row_buf)
summary.write_summary(output)
FORMATS[options.format](row_buf, output)


def format_rows(header, rows):
Expand All @@ -172,7 +178,7 @@ def format_rows(header, rows):
]
formatter = ' '.join('%%-%ds' % line for line in max_lengths) + '\n'
sio.write(formatter % header)
for r in rows:
for r in sorted(rows):
sio.write(formatter % r)
sio.seek(0)
return sio
Expand All @@ -181,15 +187,10 @@ def format_rows(header, rows):
class TimingSummary:
"""Base class for summarizing timing output from cylc.flow run database."""

def __init__(self, filepath_or_buffer=None):
def __init__(self, filepath_or_buffer):
"""Set up internal dataframe storage for time durations."""

self._check_imports()
if filepath_or_buffer is not None:
self.read_timings(filepath_or_buffer)
else:
self.df = None
self.by_host_and_job_runner = None
self.read_timings(filepath_or_buffer)

def read_timings(self, filepath_or_buffer):
"""
Expand All @@ -203,7 +204,7 @@ def read_timings(self, filepath_or_buffer):
pd.set_option('display.max_colwidth', 10000)

df = pd.read_csv(
filepath_or_buffer, delim_whitespace=True, index_col=[0, 1, 2, 3],
filepath_or_buffer, sep=r'\s+', index_col=[0, 1, 2, 3],
parse_dates=[4, 5, 6]
)
self.df = pd.DataFrame({
Expand All @@ -219,18 +220,13 @@ def read_timings(self, filepath_or_buffer):
level=['host', 'job_runner']
)

def write_summary(self, buf=None):
def write_summary(self, buf=sys.stdout):
"""Using the stored timings dataframe, output the data summary."""

if buf is None:
buf = sys.stdout
self.write_summary_header(buf)
for group, df in self.by_host_and_job_runner:
self.write_group_header(buf, group)
df_reshape = self._reshape_timings(df)
df_describe = df.groupby(level='name').describe()
if df_describe.index.nlevels > 1:
df_describe = df_describe.unstack() # for pandas < 0.20.0
df_describe.index.rename(None, inplace=True)
for timing_category in self.df.columns:
self.write_category(
Expand Down Expand Up @@ -286,17 +282,24 @@ def _reshape_timings(timings):
timings = timings.assign(retry=retry)
timings = timings.set_index('retry', append=True)

return timings.unstack('name').stack(level=0)
return timings.unstack('name').stack(level=0, future_stack=True)

@staticmethod
def _dt_to_s(dt):
return dt.total_seconds()


class PandasTimingsRaw(TimingSummary):
"""Generic Form designed to leverage the power of pandas.DataFrame.to*
methods.
"""
def read_timings(self, filepath_or_buffer):
self.data = [i.split() for i in filepath_or_buffer.readlines()]

def write_summary(self, buf, method, *args, **kwargs):
import pandas as pd
try:
return dt.total_seconds()
except AttributeError:
# Older versions of pandas have the timedelta as a numpy
# timedelta64 type, which didn't support total_seconds
return pd.Timedelta(dt).total_seconds()
df = pd.DataFrame(self.data[1:], columns=self.data[0])
buf.write(getattr(df, method)(*args, **kwargs))


class TextTimingSummary(TimingSummary):
Expand Down Expand Up @@ -359,7 +362,7 @@ def write_summary_header(self, buf):
}
"""

buf.write('<html><head><style>%s</style></head><body>' % css)
buf.write('<!DOCTYPE html><html><head><style>%s</style></head><body>' % css)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not having this showed as an warning when I looked at the HTML with dev tools on


def write_summary_footer(self, buf):
buf.write('</body></html>')
Expand All @@ -374,23 +377,16 @@ def write_category(self, buf, category, df_reshape, df_describe):
ax = (
df_reshape
.xs(category, level='timing_category')
.plot(kind='box', vert=False)
.plot(kind='box', orientation='vertical')
)
ax.invert_yaxis()
ax.set_xlabel('Seconds')
plt.xticks(rotation=90)
plt.tight_layout()
plt.gcf().savefig(buf, format='svg')
try:
table = df_describe[category].to_html(
classes="summary", index_names=False, border=0
)
except TypeError:
# older pandas don't support the "border" argument
# so explicitly remove it
table = df_describe[category].to_html(
classes="summary", index_names=False
)
table = table.replace('border="1"', '')
table = df_describe[category].to_html(
classes="summary", index_names=False, border=0
)
buf.write(table)
buf.write('</div>')
pass
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ main_loop-log_memory =
main_loop-log_db =
sqlparse
report-timings =
pandas==1.*
pandas==2.*
matplotlib
tests =
aiosmtpd
Expand Down Expand Up @@ -131,6 +131,7 @@ all =
%(main_loop-log_memory)s
%(tests)s
%(tutorials)s
%(report-timings)s

[options.entry_points]
# top level shell commands
Expand Down
Loading
Loading