11use crate :: cli:: { AccessionOptions , MultiInputOptions , Provider } ;
2- use anyhow:: { bail, Result } ;
2+ use anyhow:: { anyhow , bail, Result } ;
33use futures:: { future:: join_all, stream:: FuturesUnordered , StreamExt } ;
44use indicatif:: { MultiProgress , ProgressBar , ProgressStyle } ;
55use log:: { debug, error, info, trace, warn} ;
@@ -242,12 +242,21 @@ pub async fn identify_urls(
242242 // Wait for all tasks to complete
243243 let results = join_all ( tasks) . await ;
244244
245- // Process results, handling any JoinError from the spawned tasks
246- let mut processed_results = Vec :: new ( ) ;
247- for result in results {
245+ // Process results, handling any JoinError from the spawned tasks.
246+ // `join_all` preserves task order, which matches the input `accessions`
247+ // order, so a JoinError can be attributed to its accession rather than
248+ // being silently dropped.
249+ let mut processed_results = Vec :: with_capacity ( accessions. len ( ) ) ;
250+ for ( accession, result) in accessions. iter ( ) . zip ( results) {
248251 match result {
249252 Ok ( res) => processed_results. push ( res) ,
250- Err ( e) => error ! ( error: % = e; "Task join error" ) ,
253+ Err ( e) => {
254+ error ! ( accession = accession. as_str( ) , error: % = e; "Task join error" ) ;
255+ processed_results. push ( (
256+ accession. clone ( ) ,
257+ Err ( anyhow ! ( "task failed to complete: {e}" ) ) ,
258+ ) ) ;
259+ }
251260 }
252261 }
253262
@@ -372,6 +381,10 @@ pub async fn prefetch(input: &MultiInputOptions, output_dir: Option<&str>) -> Re
372381 // For GCP downloads, we'll use a separate Vec since gsutil has its own concurrency management
373382 let mut gcp_downloads = Vec :: new ( ) ;
374383
384+ // Accumulate per-accession failures so the command can fail loudly at the
385+ // end while still attempting every accession (best-effort behavior).
386+ let mut failures: Vec < String > = Vec :: new ( ) ;
387+
375388 for ( accession, url_result) in url_results {
376389 match url_result {
377390 Ok ( url) => {
@@ -385,7 +398,12 @@ pub async fn prefetch(input: &MultiInputOptions, output_dir: Option<&str>) -> Re
385398
386399 match input. options . provider {
387400 Provider :: Https => {
388- https_downloads. push ( download_url ( url, path, pb) ) ;
401+ // Carry the accession into the future so download
402+ // failures can be attributed back to it.
403+ https_downloads. push ( async move {
404+ let result = download_url ( url, path, pb) . await ;
405+ ( accession, result)
406+ } ) ;
389407 }
390408 Provider :: Gcp => {
391409 let project_id = match & input. options . gcp_project_id {
@@ -395,43 +413,61 @@ pub async fn prefetch(input: &MultiInputOptions, output_dir: Option<&str>) -> Re
395413 accession = accession. as_str( ) ;
396414 "GCP project ID is required for GCP downloads"
397415 ) ;
416+ failures. push ( format ! (
417+ "{accession}: GCP project ID is required for GCP downloads"
418+ ) ) ;
398419 continue ;
399420 }
400421 } ;
401422 // We'll collect GCP downloads and process them separately
402- gcp_downloads. push ( ( url, path, project_id, pb) ) ;
423+ gcp_downloads. push ( ( accession , url, path, project_id, pb) ) ;
403424 }
404425 _ => {
405426 error ! (
406427 accession = accession. as_str( ) ,
407428 provider: ? = input. options. provider;
408429 "Unsupported provider"
409430 ) ;
431+ failures. push ( format ! (
432+ "{accession}: unsupported provider: {:?}" ,
433+ input. options. provider
434+ ) ) ;
410435 continue ;
411436 }
412437 }
413438 }
414439 Err ( e) => {
415440 error ! ( accession = accession. as_str( ) , error: % = e; "Failed to identify URL" ) ;
441+ failures. push ( format ! ( "{accession}: URL resolution failed: {e}" ) ) ;
416442 }
417443 }
418444 }
419445
420446 // Process HTTPS downloads concurrently
421- while let Some ( result) = https_downloads. next ( ) . await {
447+ while let Some ( ( accession , result) ) = https_downloads. next ( ) . await {
422448 if let Err ( e) = result {
423- error ! ( error: % = e; "HTTPS download failed" ) ;
449+ error ! ( accession = accession. as_str( ) , error: % = e; "HTTPS download failed" ) ;
450+ failures. push ( format ! ( "{accession}: download failed: {e}" ) ) ;
424451 }
425452 }
426453
427454 // Process GCP downloads - since gsutil has its own concurrency management,
428455 // we'll run them sequentially to avoid overwhelming the terminal output
429- for ( url, path, project_id, pb) in gcp_downloads {
456+ for ( accession , url, path, project_id, pb) in gcp_downloads {
430457 if let Err ( e) = download_url_gcp ( url, path, project_id, pb) . await {
431- error ! ( error: % = e; "GCP download failed" ) ;
458+ error ! ( accession = accession. as_str( ) , error: % = e; "GCP download failed" ) ;
459+ failures. push ( format ! ( "{accession}: download failed: {e}" ) ) ;
432460 }
433461 }
434462
463+ if !failures. is_empty ( ) {
464+ bail ! (
465+ "prefetch failed for {} accession(s):\n {}" ,
466+ failures. len( ) ,
467+ failures. join( "\n " )
468+ ) ;
469+ }
470+
435471 Ok ( ( ) )
436472}
437473
@@ -844,4 +880,31 @@ mod tests {
844880 let err_msg = result. unwrap_err ( ) . to_string ( ) ;
845881 assert ! ( err_msg. contains( "GCP project ID is required for GCP downloads" ) ) ;
846882 }
883+
884+ #[ tokio:: test]
885+ async fn prefetch_multi_fails_when_any_url_resolution_fails ( ) {
886+ // Both accessions contain "INVALID", so the test `query_entrez` returns
887+ // "no urls found" for each and resolution fails with no network access.
888+ let input = MultiInputOptions {
889+ accessions : vec ! [ "INVALID_A" . to_string( ) , "INVALID_B" . to_string( ) ] ,
890+ options : create_test_accession_options ( ) ,
891+ } ;
892+
893+ let result = prefetch ( & input, None ) . await ;
894+ assert ! (
895+ result. is_err( ) ,
896+ "multi-accession prefetch must fail when an accession cannot be resolved"
897+ ) ;
898+
899+ // The summary error must identify every failed accession, not just the first.
900+ let err_msg = result. unwrap_err ( ) . to_string ( ) ;
901+ assert ! (
902+ err_msg. contains( "INVALID_A" ) ,
903+ "missing INVALID_A: {err_msg}"
904+ ) ;
905+ assert ! (
906+ err_msg. contains( "INVALID_B" ) ,
907+ "missing INVALID_B: {err_msg}"
908+ ) ;
909+ }
847910}
0 commit comments