European-XFEL · philsmt · Apr 10, 2025 · Apr 10, 2025 · takluyver · Apr 14, 2025
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -17,6 +17,7 @@
     ```
 
 Added:
+- [PumpProbePattern.pumped_pulses_ratios()][extra.components.PumpProbePattern.pumped_pulses_ratios] determining the ratio of pumped pulses per train (!317).
 - [TOFResponse][extra.recipes.TOFResponse] to estimate, deconvolve and denoise the instrumental response in eTOFs (!304).
 - [VSLight][extra.recipes.VSLight] to calibrate and deconvolve eTOFs from a continuous monochromator scan (!304).
 - [CookieboxCalibration][extra.recipes.CookieboxCalibration] to calibrate data from eTOFs after taking a calibration run (!284).

diff --git a/src/extra/components/pulses.py b/src/extra/components/pulses.py
@@ -1507,6 +1507,59 @@
         else:
             raise ValueError(f"{field=!r} parameter was not 'fel'/'ppl'/None")
 
+    def pumped_pulses_ratios(self, ppl_only_value=np.nan, labelled=True):
+        """Determine ratio of pumped pulses per train.
+
+        Args:
+            ppl_only_value (float, optional): Value for trains only
+                containing PPL pulses, same value as for pulses without
+                any pulses (np.nan) by default.
+            labelled (bool, optional): Whether a labelled pandas Series
+                (default) or unlabelled numpy array is returned.
+
+        Returns:
+            (pandas.Series or numpy.ndarray): Number of pulses per
+                train, indexed by train ID if labelled is True.
+        """
+
+        pids = self.pulse_ids(copy=False)
+
+        try:
+            fel_count = pids[:, :, True, :].groupby('trainId').count()
+        except KeyError:
+            fel_count = pd.Series([])
+
+        try:
+            pumped_count = pids[:, :, True, True].groupby('trainId').count()
+        except KeyError:
+            pumped_count = pd.Series([])
+
+        # Compute the ratio for trains with at least one pumped pulse.
+        ratios = pumped_count / fel_count.loc[pumped_count.index]
+
+        # Extend the series to all expected trains, filling with NaN.
+        ratios = self._extend_all_trains(ratios, np.nan)
+
+        # Set those trains with all unpumped pulses to 0.0.
+        fel_only_index = fel_count.index.difference(pumped_count.index)
+        ratios.loc[fel_only_index] = 0.0
+
+        # Set those trains with no FEL pulses to the desired fill value.
+        if ppl_only_value is not np.nan:
+            # If one only cares for the index labels of a groupby,
+            # pd.SeriesGroupBy.count() is indeed faster than
+            # pd.SeriesGroupBy.groups, likely due additional objects
+            # created by the latter.
+            try:
+                ppl_only_index = pids[:, :, :, True].groupby('trainId') \
+                    .count().index.difference(fel_count.index)
+            except KeyError:
+                pass
+            else:
+                ratios.loc[ppl_only_index] = ppl_only_value
+
+        return ratios if labelled else ratios.to_numpy()
+
 
 class DldPulses(PulsePattern):
     """An interface to pulses from DLD reconstruction.
@@ -1629,3 +1682,12 @@
         warn("Use triggers() instead of get_triggers()",
              DeprecationWarning, stacklevel=2)
         return self.triggers(*args, **kwargs)
+
+    @wraps(PumpProbePulses.pumped_pulses_ratios)
+    def pumped_pulses_ratios(self, *args, **kwargs):
+        pids = self.pulse_ids(copy=False)
+
+        if 'ppl' not in pids.index.names:
+            raise ValueError('only available with PPL information')
+
+        return PumpProbePulses.pumped_pulses_ratios(self, *args, **kwargs)
diff --git a/tests/test_components_pulses.py b/tests/test_components_pulses.py
@@ -740,3 +740,28 @@ def test_pump_probe_specials(mock_spb_aux_run, mock_sqs_remi_run):
             names=('trainId', 'pulseIndex', 'fel', 'ppl')))
 
     assert not pulses.is_constant_pattern()
+
+    # Test pumped pulses ratios.
+    pulses = PumpProbePulses(mock_sqs_remi_run[10:], pulse_offset=0)
+    pulses._get_train_ids = lambda: [1000, 1001, 1002, 1003, 1004]
+    pulses._pulse_ids = pd.Series(
+        [300, 310, 300, 300, 300, 310],
+        index=pd.MultiIndex.from_tuples([
+            (1000, 0, True, True),
+            (1000, 0, True, False),
+            (1001, 0, True, False),
+            (1002, 0, False, True),
+            (1003, 0, True, True),
+            (1003, 0, True, True),
+        ], names=['trainId', 'pulseIndex', 'fel', 'ppl']))
+
+    ratios = pulses.pumped_pulses_ratios()
+    assert np.all(ratios.index == [1000, 1001, 1002, 1003, 1004])
+    assert np.all(ratios.loc[[1000, 1001, 1003]] == [0.5, 0.0, 1.0])
+    assert np.all(ratios.loc[[1002, 1004]].isna())
+
+    ratios = pulses.pumped_pulses_ratios(np.inf)
+    assert np.all(ratios.index == [1000, 1001, 1002, 1003, 1004])
+    assert np.all(
+        ratios.loc[[1000, 1001, 1002, 1003]] == [0.5, 0.0, np.inf, 1.0])
+    assert np.all(ratios.loc[[1004]].isna())