Hi,
I am hoping you could help me solve an issue that stems from multiprocessing as the task failed to serialize
input data:AnnData
AnnData object with n_obs × n_vars = 2619023 × 36
obs: 'Image', 'Centroid_X_um', 'Centroid_Y_um', 'Name', 'CD40_Pred', 'IFNG_Pred', 'Ki67_Pred', 'CellTypes', 'SiteLocation', 'image_location', 'cell_type', 'imageid'
obsm: 'spatial'
Here is my code
test = adata[adata.obs['imageid'] == "Slide4_C1"]
sample_size = min(1000, test.n_obs)
sampled_indices = np.random.choice(test.obs_names, size=sample_size, replace=False)
test_sampled = test[sampled_indices]
test_sampled_small = test_sampled[:100]
caseLymph = sm.tl.spatial_interaction(
test_sampled_small,
x_coordinate='Centroid_X_um',
y_coordinate='Centroid_Y_um',
z_coordinate=None,
imageid='imageid',
phenotype='cell_type',
method='radius',
radius=65,
label='spatial_interaction_radius',
permutation=200
)
Processing Image: ['Slide4_C1']
Categories (1, object): ['Slide4_C1']
Identifying neighbours within 65 pixels of every cell
Mapping phenotype to neighbors
Performing 200 permutations
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/joblib/externals/loky/process_executor.py", line 426, in _process_worker
call_item = call_queue.get(block=True, timeout=timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/multiprocessing/queues.py", line 122, in get
return _ForkingPickler.loads(res)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/init.py", line 49, in
from pandas.core.api import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/api.py", line 47, in
from pandas.core.groupby import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/groupby/init.py", line 1, in
from pandas.core.groupby.generic import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/groupby/generic.py", line 68, in
from pandas.core.frame import DataFrame
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/frame.py", line 149, in
from pandas.core.generic import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/generic.py", line 193, in
from pandas.core.window import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/window/init.py", line 1, in
from pandas.core.window.ewm import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/window/ewm.py", line 11, in
import pandas._libs.window.aggregations as window_aggregations
ImportError: /lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/_libs/window/aggregations.cpython-311-x86_64-linux-gnu.so)
"""
The above exception was the direct cause of the following exception:
BrokenProcessPool Traceback (most recent call last)
Cell In[41], line 8
5 test_sampled = test[sampled_indices]
7 test_sampled_small = test_sampled[:100] # Reduce the data further
----> 8 caseLymph = sm.tl.spatial_interaction(
9 test_sampled_small,
10 x_coordinate='Centroid_X_um',
11 y_coordinate='Centroid_Y_um',
12 z_coordinate=None,
13 imageid='imageid',
14 phenotype='cell_type',
15 method='radius',
16 radius=65,
17 label='spatial_interaction_radius',
18 permutation=200
19 )
File ~/.conda/envs/spatial/lib/python3.11/site-packages/scimap/tools/spatial_interaction.py:250, in spatial_interaction(adata, x_coordinate, y_coordinate, z_coordinate, phenotype, method, radius, knn, permutation, imageid, subset, pval_method, verbose, label)
246 # Apply function to all images and create a master dataframe
247 # Create lamda function
248 r_spatial_interaction_internal = lambda x: spatial_interaction_internal (adata_subset=x, x_coordinate=x_coordinate, y_coordinate=y_coordinate,
249 z_coordinate=z_coordinate, phenotype=phenotype, method=method, radius=radius, knn=knn, permutation=permutation, imageid=imageid,subset=subset,pval_method=pval_method)
--> 250 all_data = list(map(r_spatial_interaction_internal, adata_list)) # Apply function
253 # Merge all the results into a single dataframe
254 df_merged = reduce(lambda left,right: pd.merge(left,right,on=['phenotype', 'neighbour_phenotype'], how='outer'), all_data)
File ~/.conda/envs/spatial/lib/python3.11/site-packages/scimap/tools/spatial_interaction.py:248, in spatial_interaction..(x)
243 adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]
246 # Apply function to all images and create a master dataframe
247 # Create lamda function
--> 248 r_spatial_interaction_internal = lambda x: spatial_interaction_internal (adata_subset=x, x_coordinate=x_coordinate, y_coordinate=y_coordinate,
249 z_coordinate=z_coordinate, phenotype=phenotype, method=method, radius=radius, knn=knn, permutation=permutation, imageid=imageid,subset=subset,pval_method=pval_method)
250 all_data = list(map(r_spatial_interaction_internal, adata_list)) # Apply function
253 # Merge all the results into a single dataframe
File ~/.conda/envs/spatial/lib/python3.11/site-packages/scimap/tools/spatial_interaction.py:188, in spatial_interaction..spatial_interaction_internal(adata_subset, x_coordinate, y_coordinate, z_coordinate, phenotype, method, radius, knn, permutation, imageid, subset, pval_method)
185 return data_freq
187 # Apply function
--> 188 final_scores = Parallel(n_jobs=-1)(delayed(permutation_pval)(data=n) for i in range(permutation))
189 perm = pd.DataFrame(final_scores).T
191 # Consolidate the permutation results
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:2007, in Parallel.call(self, iterable)
2001 # The first item from the output is blank, but it makes the interpreter
2002 # progress until it enters the Try/Except block of the generator and
2003 # reaches the first yield statement. This starts the asynchronous
2004 # dispatch of the tasks to the workers.
2005 next(output)
-> 2007 return output if self.return_generator else list(output)
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:1650, in Parallel._get_outputs(self, iterator, pre_dispatch)
1647 yield
1649 with self._backend.retrieval_context():
-> 1650 yield from self._retrieve()
1652 except GeneratorExit:
1653 # The generator has been garbage collected before being fully
1654 # consumed. This aborts the remaining tasks if possible and warn
1655 # the user if necessary.
1656 self._exception = True
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:1754, in Parallel._retrieve(self)
1747 while self._wait_retrieval():
1748
1749 # If the callback thread of a worker has signaled that its task
1750 # triggered an exception, or if the retrieval loop has raised an
1751 # exception (e.g. GeneratorExit), exit the loop and surface the
1752 # worker traceback.
1753 if self._aborting:
-> 1754 self._raise_error_fast()
1755 break
1757 # If the next job is not ready for retrieval yet, we just wait for
1758 # async callbacks to progress.
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:1789, in Parallel._raise_error_fast(self)
1785 # If this error job exists, immediately raise the error by
1786 # calling get_result. This job might not exists if abort has been
1787 # called directly or if the generator is gc'ed.
1788 if error_job is not None:
-> 1789 error_job.get_result(self.timeout)
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:745, in BatchCompletionCallBack.get_result(self, timeout)
739 backend = self.parallel._backend
741 if backend.supports_retrieve_callback:
742 # We assume that the result has already been retrieved by the
743 # callback thread, and is stored internally. It's just waiting to
744 # be returned.
--> 745 return self._return_or_raise()
747 # For other backends, the main thread needs to run the retrieval step.
748 try:
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:763, in BatchCompletionCallBack._return_or_raise(self)
761 try:
762 if self.status == TASK_ERROR:
--> 763 raise self._result
764 return self._result
765 finally:
BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.
Hi,
I am hoping you could help me solve an issue that stems from multiprocessing as the task failed to serialize
input data:AnnData
AnnData object with n_obs × n_vars = 2619023 × 36
obs: 'Image', 'Centroid_X_um', 'Centroid_Y_um', 'Name', 'CD40_Pred', 'IFNG_Pred', 'Ki67_Pred', 'CellTypes', 'SiteLocation', 'image_location', 'cell_type', 'imageid'
obsm: 'spatial'
Here is my code
test = adata[adata.obs['imageid'] == "Slide4_C1"]
sample_size = min(1000, test.n_obs)
sampled_indices = np.random.choice(test.obs_names, size=sample_size, replace=False)
test_sampled = test[sampled_indices]
test_sampled_small = test_sampled[:100]
caseLymph = sm.tl.spatial_interaction(
test_sampled_small,
x_coordinate='Centroid_X_um',
y_coordinate='Centroid_Y_um',
z_coordinate=None,
imageid='imageid',
phenotype='cell_type',
method='radius',
radius=65,
label='spatial_interaction_radius',
permutation=200
)
Processing Image: ['Slide4_C1']
Categories (1, object): ['Slide4_C1']
Identifying neighbours within 65 pixels of every cell
Mapping phenotype to neighbors
Performing 200 permutations
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/joblib/externals/loky/process_executor.py", line 426, in _process_worker
call_item = call_queue.get(block=True, timeout=timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/multiprocessing/queues.py", line 122, in get
return _ForkingPickler.loads(res)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/init.py", line 49, in
from pandas.core.api import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/api.py", line 47, in
from pandas.core.groupby import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/groupby/init.py", line 1, in
from pandas.core.groupby.generic import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/groupby/generic.py", line 68, in
from pandas.core.frame import DataFrame
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/frame.py", line 149, in
from pandas.core.generic import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/generic.py", line 193, in
from pandas.core.window import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/window/init.py", line 1, in
from pandas.core.window.ewm import (
File "/home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/core/window/ewm.py", line 11, in
import pandas._libs.window.aggregations as window_aggregations
ImportError: /lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /home/ext_gnanaolivu_rohandavid_mayo_e/.conda/envs/spatial/lib/python3.11/site-packages/pandas/_libs/window/aggregations.cpython-311-x86_64-linux-gnu.so)
"""
The above exception was the direct cause of the following exception:
BrokenProcessPool Traceback (most recent call last)
Cell In[41], line 8
5 test_sampled = test[sampled_indices]
7 test_sampled_small = test_sampled[:100] # Reduce the data further
----> 8 caseLymph = sm.tl.spatial_interaction(
9 test_sampled_small,
10 x_coordinate='Centroid_X_um',
11 y_coordinate='Centroid_Y_um',
12 z_coordinate=None,
13 imageid='imageid',
14 phenotype='cell_type',
15 method='radius',
16 radius=65,
17 label='spatial_interaction_radius',
18 permutation=200
19 )
File ~/.conda/envs/spatial/lib/python3.11/site-packages/scimap/tools/spatial_interaction.py:250, in spatial_interaction(adata, x_coordinate, y_coordinate, z_coordinate, phenotype, method, radius, knn, permutation, imageid, subset, pval_method, verbose, label)
246 # Apply function to all images and create a master dataframe
247 # Create lamda function
248 r_spatial_interaction_internal = lambda x: spatial_interaction_internal (adata_subset=x, x_coordinate=x_coordinate, y_coordinate=y_coordinate,
249 z_coordinate=z_coordinate, phenotype=phenotype, method=method, radius=radius, knn=knn, permutation=permutation, imageid=imageid,subset=subset,pval_method=pval_method)
--> 250 all_data = list(map(r_spatial_interaction_internal, adata_list)) # Apply function
253 # Merge all the results into a single dataframe
254 df_merged = reduce(lambda left,right: pd.merge(left,right,on=['phenotype', 'neighbour_phenotype'], how='outer'), all_data)
File ~/.conda/envs/spatial/lib/python3.11/site-packages/scimap/tools/spatial_interaction.py:248, in spatial_interaction..(x)
243 adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]
246 # Apply function to all images and create a master dataframe
247 # Create lamda function
--> 248 r_spatial_interaction_internal = lambda x: spatial_interaction_internal (adata_subset=x, x_coordinate=x_coordinate, y_coordinate=y_coordinate,
249 z_coordinate=z_coordinate, phenotype=phenotype, method=method, radius=radius, knn=knn, permutation=permutation, imageid=imageid,subset=subset,pval_method=pval_method)
250 all_data = list(map(r_spatial_interaction_internal, adata_list)) # Apply function
253 # Merge all the results into a single dataframe
File ~/.conda/envs/spatial/lib/python3.11/site-packages/scimap/tools/spatial_interaction.py:188, in spatial_interaction..spatial_interaction_internal(adata_subset, x_coordinate, y_coordinate, z_coordinate, phenotype, method, radius, knn, permutation, imageid, subset, pval_method)
185 return data_freq
187 # Apply function
--> 188 final_scores = Parallel(n_jobs=-1)(delayed(permutation_pval)(data=n) for i in range(permutation))
189 perm = pd.DataFrame(final_scores).T
191 # Consolidate the permutation results
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:2007, in Parallel.call(self, iterable)
2001 # The first item from the output is blank, but it makes the interpreter
2002 # progress until it enters the Try/Except block of the generator and
2003 # reaches the first
yieldstatement. This starts the asynchronous2004 # dispatch of the tasks to the workers.
2005 next(output)
-> 2007 return output if self.return_generator else list(output)
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:1650, in Parallel._get_outputs(self, iterator, pre_dispatch)
1647 yield
1649 with self._backend.retrieval_context():
-> 1650 yield from self._retrieve()
1652 except GeneratorExit:
1653 # The generator has been garbage collected before being fully
1654 # consumed. This aborts the remaining tasks if possible and warn
1655 # the user if necessary.
1656 self._exception = True
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:1754, in Parallel._retrieve(self)
1747 while self._wait_retrieval():
1748
1749 # If the callback thread of a worker has signaled that its task
1750 # triggered an exception, or if the retrieval loop has raised an
1751 # exception (e.g.
GeneratorExit), exit the loop and surface the1752 # worker traceback.
1753 if self._aborting:
-> 1754 self._raise_error_fast()
1755 break
1757 # If the next job is not ready for retrieval yet, we just wait for
1758 # async callbacks to progress.
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:1789, in Parallel._raise_error_fast(self)
1785 # If this error job exists, immediately raise the error by
1786 # calling get_result. This job might not exists if abort has been
1787 # called directly or if the generator is gc'ed.
1788 if error_job is not None:
-> 1789 error_job.get_result(self.timeout)
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:745, in BatchCompletionCallBack.get_result(self, timeout)
739 backend = self.parallel._backend
741 if backend.supports_retrieve_callback:
742 # We assume that the result has already been retrieved by the
743 # callback thread, and is stored internally. It's just waiting to
744 # be returned.
--> 745 return self._return_or_raise()
747 # For other backends, the main thread needs to run the retrieval step.
748 try:
File ~/.conda/envs/spatial/lib/python3.11/site-packages/joblib/parallel.py:763, in BatchCompletionCallBack._return_or_raise(self)
761 try:
762 if self.status == TASK_ERROR:
--> 763 raise self._result
764 return self._result
765 finally:
BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.