Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions buildstockbatch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,3 @@ def process_results(self, skip_combine=False, use_dask_cluster=True, continue_up
finally:
if use_dask_cluster:
self.cleanup_dask()

keep_individual_timeseries = self.cfg.get("postprocessing", {}).get("keep_individual_timeseries", False)
postprocessing.remove_intermediate_files(fs, self.results_dir, keep_individual_timeseries)
15 changes: 9 additions & 6 deletions buildstockbatch/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
# want to automatically delete large number of files using current API for two reasons:
# 1. It is inefficient
# 2. It is easy to make mistakes and wipe out a significant run
MAX_STR_LEN = 100000 # some strings such as eplusout_err and step_errors can be very long, truncate to this length


def read_data_point_out_json(fs, reporting_measures, filename):
Expand All @@ -52,9 +53,9 @@ def read_data_point_out_json(fs, reporting_measures, filename):
except (FileNotFoundError, json.JSONDecodeError):
return None
else:
sim_out_report = "SimulationOutputReport"
if "ReportSimulationOutput" in d:
sim_out_report = "ReportSimulationOutput"
sim_out_report = "ReportSimulationOutput"
if "SimulationOutputReport" in d:
sim_out_report = "SimulationOutputReport"

if sim_out_report not in d:
d[sim_out_report] = {"applicable": False}
Expand Down Expand Up @@ -117,8 +118,10 @@ def read_out_osw(fs, filename):
keys_to_copy = ["started_at", "completed_at", "completed_status"]
for key in keys_to_copy:
out_d[key] = d.get(key, None)
if "eplusout_err" in d:
out_d["eplusout_err"] = d["eplusout_err"]
if "eplusout_err" in d and "EnergyPlus Terminated" in d["eplusout_err"]:
out_d["eplusout_err"] = d["eplusout_err"][:MAX_STR_LEN]
else:
out_d["eplusout_err"] = ""
step_errors = []
for step in d.get("steps", []):
measure_dir_name = step["measure_dir_name"]
Expand All @@ -131,7 +134,7 @@ def read_out_osw(fs, filename):
step_errors.append({"measure_dir_name": measure_dir_name, "step_errors": result.get("step_errors")})

if step_errors:
out_d["step_failures"] = step_errors
out_d["step_failures"] = json.dumps(step_errors)[:MAX_STR_LEN]

return out_d

Expand Down
41 changes: 1 addition & 40 deletions buildstockbatch/test/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,7 @@ def test_resstock_local_batch(project_filename):

batch.process_results()

if batch.cfg.get("postprocessing", {}).get("keep_individual_timeseries", False):
assert (simout_path / "timeseries").exists()
else:
assert not (simout_path / "timeseries").exists()

assert (simout_path / "timeseries").exists()
assert (simout_path / "simulations_job0.tar.gz").exists()
base_pq = out_path / "parquet" / "baseline" / "results_up00.parquet"
assert base_pq.exists()
Expand Down Expand Up @@ -167,38 +163,3 @@ def mocked_subprocess_run(run_cmd, **kwargs):
err_log_re.search(failed_job.read())

sleep_mock.assert_called_once_with(20)


@resstock_required
def test_low_disk_mode():
"""Test that the low-disk mode correctly skips tarball creation and returns early."""
project_filename = resstock_directory / "project_testing" / "testing_baseline.yml"
LocalBatch.validate_project(str(project_filename))
batch = LocalBatch(str(project_filename))

# Modify the number of datapoints to reduce simulation time
n_datapoints = 2
batch.cfg["sampler"]["args"]["n_datapoints"] = n_datapoints

# Handle weather files
local_weather_file = resstock_directory.parent / "weather" / batch.cfg["weather_files_url"].split("/")[-1]
if local_weather_file.exists():
del batch.cfg["weather_files_url"]
batch.cfg["weather_files_path"] = str(local_weather_file)

# Run batch with low_disk=True
batch.run_batch(low_disk=True)

# Check that results_job0.json.gz exists
out_path = pathlib.Path(batch.output_dir)
simout_path = out_path / "simulation_output"
assert (simout_path / "results_job0.json.gz").exists()

# Check that simulations_job0.tar.gz does NOT exist (should be skipped in low_disk mode)
assert not (simout_path / "simulations_job0.tar.gz").exists()

# Check that the timeseries directories still exist (they shouldn't be compressed into a tarball)
assert (simout_path / "timeseries").exists()

# Clean up
shutil.rmtree(out_path)
24 changes: 3 additions & 21 deletions buildstockbatch/test/test_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def test_report_additional_results_csv_columns(basic_residential_project_file):
for upgrade_id in (0, 1):
df = read_csv(str(results_dir / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz"))
assert "Measure Failed" in df[df["building_id"] == 3]["step_failures"].iloc[0]
assert "EnergyPlus Failed with Error: Building ID is 3" in df[df["building_id"] == 3]["eplusout_err"].iloc[0]
assert (
"EnergyPlus Terminated with Error: Building ID is 3" in df[df["building_id"] == 3]["eplusout_err"].iloc[0]
)
df = df[df["building_id"] != 3]
assert (df["reporting_measure1.column_1"] == 1).all()
assert (df["reporting_measure1.column_2"] == 2).all()
Expand Down Expand Up @@ -92,26 +94,6 @@ def test_large_parquet_combine(basic_residential_project_file):
bsb.process_results() # this would raise exception if the postprocessing could not handle the situation


@pytest.mark.parametrize("keep_individual_timeseries", [True, False])
def test_keep_individual_timeseries(keep_individual_timeseries, basic_residential_project_file, mocker):
project_filename, results_dir = basic_residential_project_file(
{"postprocessing": {"keep_individual_timeseries": keep_individual_timeseries}}
)

mocker.patch.object(BuildStockBatchBase, "weather_dir", None)
mocker.patch.object(BuildStockBatchBase, "get_dask_client")
mocker.patch.object(BuildStockBatchBase, "results_dir", results_dir)
bsb = BuildStockBatchBase(project_filename)
bsb.process_results()

results_path = pathlib.Path(results_dir)
simout_path = results_path / "simulation_output"
assert len(list(simout_path.glob("results_job*.json.gz"))) == 0

ts_path = simout_path / "timeseries"
assert ts_path.exists() == keep_individual_timeseries


def test_upgrade_missing_ts(basic_residential_project_file, mocker, caplog):
caplog.set_level(logging.WARNING, logger="buildstockbatch.postprocessing")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"completed_at" : "20241108T185509Z",
"completed_status" : "Fail",
"eplusout_err" : "EnergyPlus Failed with Error: Building ID is 3",
"eplusout_err" : "EnergyPlus Terminated with Error: Building ID is 3",
"created_at" : "2024-11-08T12:55:07.100147",
"current_step" : 1,
"file_paths" :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"completed_status" : "Fail",
"created_at" : "2024-11-08T12:55:07.099713",
"current_step" : 1,
"eplusout_err" : "EnergyPlus Failed with Error: Building ID is 3",
"eplusout_err" : "EnergyPlus Terminated with Error: Building ID is 3",
"file_paths" :
[
"/Users/radhikar/Documents/buildstock2025/resstock2025/project_national/national_upgrades/simulation_output/up01/bldg0000003/generated_files",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"include_annual_system_use_consumptions": False,
"include_annual_emissions": True,
"include_annual_emission_fuels": True,
"include_annual_emission_end_uses": True,
"include_annual_emission_end_uses": False,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rajeee @asparke2 Is this default value change necessary? This breaks a couple resstock CI tests (since the current residential yml schema does not support toggling including annual outputs or not).

This may be our workaround, but it's probably not the preferred path forward.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it make the most sense to expose all the include_annual_foo variables as arguments in the yaml? That way they can be turned on or off as needed, as is the case for the include_timeseries_foo variables.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. But more work.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but might save work in the long run if it avoids future situations like this.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We decided to expose all the include_annual_foo variables as arguments here.

"include_annual_total_loads": True,
"include_annual_unmet_hours": True,
"include_annual_peak_fuels": True,
Expand All @@ -64,7 +64,7 @@
"include_timeseries_system_use_consumptions": False,
"include_timeseries_unmet_hours": False,
"include_timeseries_resilience": False,
"timeseries_num_decimal_places": 3,
"timeseries_num_decimal_places": 5,
"user_output_variables": "",
"user_output_meters": "",
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def test_residential_hpxml(upgrade, dynamic_cfg):
assert simulation_output_step["arguments"]["include_annual_system_use_consumptions"] is False
assert simulation_output_step["arguments"]["include_annual_emissions"] is True
assert simulation_output_step["arguments"]["include_annual_emission_fuels"] is True
assert simulation_output_step["arguments"]["include_annual_emission_end_uses"] is True
assert simulation_output_step["arguments"]["include_annual_emission_end_uses"] is False
assert simulation_output_step["arguments"]["include_annual_total_loads"] is True
assert simulation_output_step["arguments"]["include_annual_unmet_hours"] is True
assert simulation_output_step["arguments"]["include_annual_peak_fuels"] is True
Expand All @@ -311,7 +311,7 @@ def test_residential_hpxml(upgrade, dynamic_cfg):
assert simulation_output_step["arguments"]["include_timeseries_weather"] is False
assert simulation_output_step["arguments"]["include_timeseries_resilience"] is False
assert simulation_output_step["arguments"]["timeseries_timestamp_convention"] == "end"
assert simulation_output_step["arguments"]["timeseries_num_decimal_places"] == 3
assert simulation_output_step["arguments"]["timeseries_num_decimal_places"] == 5
assert simulation_output_step["arguments"]["add_timeseries_dst_column"] is True
assert simulation_output_step["arguments"]["add_timeseries_utc_column"] is True
index += 1
Expand Down
12 changes: 12 additions & 0 deletions docs/changelog/changelog_dev.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,15 @@ Development Changelog

Added support for replacing existing results in s3 for buildstock_local. When --replace_existing is passed to buildstock_local,
it will replace existing results in s3.

.. change::
:tags: postprocessing, feature
:pullreq: 501

Introduce a bunch of changes originating from issues in SDR run.
1. Indermediate files are not cleaned up after postprocessing. This allows for re-running postprocessing.
2. eplusout_err and step_failures are truncted to 100000 chars. eplusout_err is only collected if E+ terminated.
3. Default for include_annual_emission_end_uses is changed to False.
4. Default for timeseries_num_decimal_places is changed to 5.
5. When SimulationOutputReport or ReportSimulationOutput is both missing in data_point_out.json (perhaps due to failure),
add a key of for ReportSimulationOutput instead of SimulationOutputReport as the later is outdated.