Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ coverage:
ignore:
- "tests/**"
- "ws_messages_pb2.py"
- "cylc/flow/scripts/report_timings.py"
- "cylc/flow/network/graphql_subscribe.py"

flag_management:
Expand Down
1 change: 0 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ omit =
tests/*
*/cylc/flow/*_pb2.py
cylc/flow/etc/*
cylc/flow/scripts/report_timings.py
parallel = True
source = ./cylc
# https://github.com/coveragepy/coveragepy/issues/2082:
Expand Down
1 change: 1 addition & 0 deletions changes.d/7205.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added CSV and JSON output options to Cylc Report Timings
2 changes: 1 addition & 1 deletion conda-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- urwid >=2.2,<4,!=2.6.2,!=2.6.3

# optional dependencies
#- pandas >=1.0,<2
#- pandas >=2,<3
#- pympler
#- matplotlib-base
#- sqlparse
Expand Down
144 changes: 70 additions & 74 deletions cylc/flow/scripts/report_timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@
import io as StringIO
import sys
from collections import Counter
from functools import partial
from typing import TYPE_CHECKING

from cylc.flow import LOG
from cylc.flow.exceptions import CylcError
from cylc.flow.id_cli import parse_id
from cylc.flow.option_parsers import (
Expand Down Expand Up @@ -90,25 +90,51 @@ def smart_open(filename=None):
fh.close()


def format_raw(row_buf, output):
"""Implement --format=raw"""
output.write(row_buf.getvalue())


def format_summary(row_buf, output):
"""Implement --format=summary"""
summary = TextTimingSummary(row_buf)
summary.write_summary(output)


def format_html(row_buf, output):
"""Implement --format=html"""
summary = HTMLTimingSummary(row_buf)
summary.write_summary(output)


def format_generic(row_buf, output, format):
PandasSummary(row_buf).write_summary(output, f'to_{format}')


# suported output formats
FORMATS = {
'raw': format_raw,
'summary': format_summary,
'html': format_html,
'csv': partial(format_generic, format='csv'),

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly to_markdown and to_excel require extra dependencies, and to_html, though it works isn't very nice to look at and needs some CSS appended.

'json': partial(format_generic, format='json'),
}


def get_option_parser() -> COP:
parser = COP(
__doc__,
argdoc=[WORKFLOW_ID_ARG_DOC]
)
parser.add_option(
"-r", "--raw",
help="Show raw timing output suitable for custom diagnostics.",
action="store_true", default=False, dest="show_raw"
)
parser.add_option(
"-s", "--summary",
help="Show textual summary timing output for tasks.",
action="store_true", default=False, dest="show_summary"
)
parser.add_option(
"-w", "--web-summary",
help="Show HTML summary timing output for tasks.",
action="store_true", default=False, dest="html_summary"
'--format', '-t',
help=(
'Select output format (default=summary). Available formats: '
+ ', '.join(FORMATS)
),
action='store',
default='summary',
choices=list(FORMATS)
)
parser.add_option(
"-O", "--output-file",
Expand All @@ -120,39 +146,19 @@ def get_option_parser() -> COP:

@cli_function(get_option_parser)
def main(parser: COP, options: 'Values', workflow_id: str) -> None:
_main(options, workflow_id)


def _main(options: 'Values', workflow_id: str) -> None:
workflow_id, *_ = parse_id(
workflow_id,
constraint='workflows',
)

LOG.warning(
"cylc report-timings is deprecated."
" The analysis view in the GUI provides"
" similar functionality."
)

output_options = [
options.show_raw, options.show_summary, options.html_summary
]
if output_options.count(True) > 1:
parser.error('Cannot combine output formats (choose one)')
if not any(output_options):
# No output specified - choose summary by default
options.show_summary = True

db_file = get_workflow_run_pub_db_path(workflow_id)
with CylcWorkflowDAO(db_file, is_public=True) as dao:
row_buf = format_rows(*dao.select_task_times())
with smart_open(options.output_filename) as output:
if options.show_raw:
output.write(row_buf.getvalue())
else:
summary: TimingSummary
if options.show_summary:
summary = TextTimingSummary(row_buf)
elif options.html_summary:
summary = HTMLTimingSummary(row_buf)
summary.write_summary(output)
FORMATS[options.format](row_buf, output)


def format_rows(header, rows):
Expand All @@ -172,7 +178,7 @@ def format_rows(header, rows):
]
formatter = ' '.join('%%-%ds' % line for line in max_lengths) + '\n'
sio.write(formatter % header)
for r in rows:
for r in sorted(rows):
sio.write(formatter % r)
sio.seek(0)
return sio
Expand All @@ -181,15 +187,10 @@ def format_rows(header, rows):
class TimingSummary:
"""Base class for summarizing timing output from cylc.flow run database."""

def __init__(self, filepath_or_buffer=None):
def __init__(self, filepath_or_buffer):
"""Set up internal dataframe storage for time durations."""

self._check_imports()
if filepath_or_buffer is not None:
self.read_timings(filepath_or_buffer)
else:
self.df = None
self.by_host_and_job_runner = None
self.read_timings(filepath_or_buffer)

def read_timings(self, filepath_or_buffer):
"""
Expand All @@ -203,7 +204,7 @@ def read_timings(self, filepath_or_buffer):
pd.set_option('display.max_colwidth', 10000)

df = pd.read_csv(
filepath_or_buffer, delim_whitespace=True, index_col=[0, 1, 2, 3],
filepath_or_buffer, sep=r'\s+', index_col=[0, 1, 2, 3],
parse_dates=[4, 5, 6]
)
self.df = pd.DataFrame({
Expand All @@ -219,18 +220,13 @@ def read_timings(self, filepath_or_buffer):
level=['host', 'job_runner']
)

def write_summary(self, buf=None):
def write_summary(self, buf=sys.stdout):
"""Using the stored timings dataframe, output the data summary."""

if buf is None:
buf = sys.stdout
self.write_summary_header(buf)
for group, df in self.by_host_and_job_runner:
self.write_group_header(buf, group)
df_reshape = self._reshape_timings(df)
df_describe = df.groupby(level='name').describe()
if df_describe.index.nlevels > 1:
df_describe = df_describe.unstack() # for pandas < 0.20.0
df_describe.index.rename(None, inplace=True)
for timing_category in self.df.columns:
self.write_category(
Expand Down Expand Up @@ -286,17 +282,24 @@ def _reshape_timings(timings):
timings = timings.assign(retry=retry)
timings = timings.set_index('retry', append=True)

return timings.unstack('name').stack(level=0)
return timings.unstack('name').stack(level=0, future_stack=True)

@staticmethod
def _dt_to_s(dt):
return dt.total_seconds()


class PandasSummary(TimingSummary):
"""Generic Form designed to leverage the power of pandas.DataFrame.to*
methods.
"""
def read_timings(self, filepath_or_buffer):
self.data = [i.split() for i in filepath_or_buffer.readlines()]

def write_summary(self, buf, method, *args, **kwargs):
import pandas as pd
try:
return dt.total_seconds()
except AttributeError:
# Older versions of pandas have the timedelta as a numpy
# timedelta64 type, which didn't support total_seconds
return pd.Timedelta(dt).total_seconds()
df = pd.DataFrame(self.data[1:], columns=self.data[0])
buf.write(getattr(df, method)(*args, **kwargs))


class TextTimingSummary(TimingSummary):
Expand Down Expand Up @@ -359,7 +362,7 @@ def write_summary_header(self, buf):
}
"""

buf.write('<html><head><style>%s</style></head><body>' % css)
buf.write('<!DOCTYPE html><html><head><style>%s</style></head><body>' % css)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not having this showed as an warning when I looked at the HTML with dev tools on


def write_summary_footer(self, buf):
buf.write('</body></html>')
Expand All @@ -374,23 +377,16 @@ def write_category(self, buf, category, df_reshape, df_describe):
ax = (
df_reshape
.xs(category, level='timing_category')
.plot(kind='box', vert=False)
.plot(kind='box', orientation='vertical')
)
ax.invert_yaxis()
ax.set_xlabel('Seconds')
plt.xticks(rotation=90)
plt.tight_layout()
plt.gcf().savefig(buf, format='svg')
try:
table = df_describe[category].to_html(
classes="summary", index_names=False, border=0
)
except TypeError:
# older pandas don't support the "border" argument
# so explicitly remove it
table = df_describe[category].to_html(
classes="summary", index_names=False
)
table = table.replace('border="1"', '')
table = df_describe[category].to_html(
classes="summary", index_names=False, border=0
)
buf.write(table)
buf.write('</div>')
pass
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ main_loop-log_memory =
main_loop-log_db =
sqlparse
report-timings =
pandas==1.*
pandas==2.*
matplotlib
tests =
aiosmtpd
Expand Down Expand Up @@ -131,6 +131,7 @@ all =
%(main_loop-log_memory)s
%(tests)s
%(tutorials)s
%(report-timings)s

[options.entry_points]
# top level shell commands
Expand Down
Loading
Loading