cylc · hjoliver · Mar 17, 2026 · Apr 12, 2026 · Apr 13, 2026 · Apr 14, 2026
diff --git a/changes.d/7237.fix.md b/changes.d/7237.fix.md
@@ -0,0 +1,2 @@
+Fixed a bug that could cause premature shutdown or stall if a parented instance 
+of a sometimes parentless task ended up at the runahead limit.
diff --git a/cylc/flow/clean.py b/cylc/flow/clean.py
@@ -446,8 +446,8 @@ def remote_clean(
             f"Remote clean failed for {id_} - could not clean on these "
             "install target(s):"
         )
-        for target, exc in failed_targets.items():
-            msg += f"\n[{target}]\n{exc}"
+        for target, excep in failed_targets.items():
+            msg += f"\n[{target}]\n{excep}"
         raise CylcError(msg)
 
 

diff --git a/cylc/flow/commands.py b/cylc/flow/commands.py
@@ -180,10 +180,6 @@ def _remove_matched_tasks(
                 continue
             removed[itask.tokens.task] = fnums_to_remove
             if fnums_to_remove == itask.flow_nums:
-                # Need to remove the task from the pool.
-                # Spawn next occurrence of xtrigger sequential task (otherwise
-                # this would not happen after removing this occurrence):
-                schd.pool.check_spawn_psx_task(itask)
                 schd.pool.remove(itask, 'request')
                 to_kill.append(itask)
                 itask.removed = True

diff --git a/cylc/flow/scheduler.py b/cylc/flow/scheduler.py
@@ -844,6 +844,12 @@ def _load_pool_from_tasks(self):
             flow=[FLOW_NEW],
             flow_descr=f"original flow from {self.options.starttask}"
         )
+        # Spawning to the runahead limit immediately is not strictly necessary
+        # as it would occur over several scheduler main loop iterations; we do
+        # it mainly for compatibility with integration tests pre PR #7237.
+        self.pool.spawn_to_runahead_limit()
+        for itask in self.pool.get_tasks():
+            self.pool.queue_if_ready(itask)
 
     def _load_pool_from_point(self):
         """Load task pool for a cycle point, for a new run.
@@ -881,13 +887,6 @@ def _load_pool_from_db(self):
         self.workflow_db_mgr.pri_dao.select_abs_outputs_for_restart(
             self.pool.load_abs_outputs_for_restart)
 
-        # Compute and release runahead tasks once after loading all tasks from
-        # the DB. This also causes spawning of parentless tasks out to the
-        # runahead limit, which may be necessary here if the stop point or
-        # runahead limit was changed for the restart.
-        self.pool.compute_runahead()
-        self.pool.release_runahead_tasks()
-
         self.pool.load_db_tasks_to_hold()
         self.pool.update_flow_mgr()
 
@@ -1622,8 +1621,13 @@ def update_profiler_logs(self, tinit):
 
     async def _main_loop(self) -> None:
         """A single iteration of the main loop."""
+
         tinit = time()
 
+        self.pool.compute_runahead()
+        self.pool.release_runahead_tasks()
+        await self.workflow_shutdown()
-        await self.workflow_shutdown()
+        # If applicable, set stop mode or shutdown on task failure:
+        await self.workflow_shutdown()
-        await self.workflow_shutdown()
+        # If applicable, set stop mode or shutdown on task failure:
+        await self.workflow_shutdown()
+
         # Useful for debugging core scheduler issues:
         # import logging
         # self.pool.log_task_pool(logging.CRITICAL)
@@ -1656,11 +1660,11 @@ async def _main_loop(self) -> None:
                 self.broadcast_mgr.check_ext_triggers(
                     itask, self.ext_trigger_queue)
 
-            if itask.is_ready_to_run() and not itask.is_manual_submit:
-                self.pool.queue_task(itask)
+            self.pool.queue_if_ready(itask)
 
         if self.xtrigger_mgr.do_housekeeping:
             self.xtrigger_mgr.housekeep(self.pool.get_tasks())
+
         self.pool.clock_expire_tasks()
         self.release_tasks_to_run()
 
@@ -1701,6 +1705,8 @@ async def _main_loop(self) -> None:
         # Update state summary, database, and uifeed
         self.workflow_db_mgr.put_task_event_timers(self.task_events_mgr)
 
+        self.pool.release_runahead_tasks()
+
         # List of task whose states have changed.
         updated_task_list = [
             t for t in self.pool.get_tasks() if t.state.is_updated]
@@ -1717,11 +1723,10 @@ async def _main_loop(self) -> None:
             await self.update_data_structure()
 
         if has_updated:
-            if not self.is_reloaded:
+            if not self.is_reloaded and self.is_stalled:
                 # (A reload cannot un-stall workflow by itself)
-                if self.is_stalled:
-                    self.is_stalled = False
-                    self.update_data_store()
+                self.is_stalled = False
+                self.update_data_store()
             self.is_reloaded = False
 
             # Reset workflow and task updated flags.
@@ -1743,9 +1748,6 @@ async def _main_loop(self) -> None:
         # Shutdown workflow if timeouts have occurred
         self.timeout_check()
 
-        # Does the workflow need to shutdown on task failure?
-        await self.workflow_shutdown()
-
         if self.options.profile_mode:
             self.update_profiler_logs(tinit)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fixed a bug that could cause premature shutdown or stall if a parented instance
		of a sometimes parentless task ended up at the runahead limit.
Comment thread hjoliver marked this conversation as resolved. Outdated