11const { spawn } = require ( "child_process" ) ;
22const path = require ( "path" ) ;
33const fs = require ( "fs/promises" ) ;
4+ const https = require ( "https" ) ;
45
56const tempPath = path . join ( __dirname , ".." , ".." , ".." , "temp" ) ;
67const datasetsPath = path . join ( __dirname , ".." , ".." , ".." , "datasets" ) ;
@@ -16,22 +17,9 @@ const deleteFolderRecursive = async (path) => {
1617 }
1718} ;
1819
19- ( async ( ) => {
20- console . log ( "Reading config file from" , configPath ) ;
21- const config = JSON . parse ( await fs . readFile ( configPath , "utf-8" ) ) ;
22- console . log ( "Deleting temp and datasets folders" ) ;
23- await deleteFolderRecursive ( tempPath ) ;
24- await deleteFolderRecursive ( datasetsPath ) ;
25- console . log ( "Cloning repository" ) ;
26- await new Promise ( ( resolve , reject ) => {
27- const gitProcess = spawn ( "git" , [
28- "clone" ,
29- "-n" ,
30- "--depth=1" ,
31- "--filter=tree:0" ,
32- config . repository ,
33- tempPath ,
34- ] ) ;
20+ const runGit = ( args , options = { } ) => {
21+ return new Promise ( ( resolve , reject ) => {
22+ const gitProcess = spawn ( "git" , args , options ) ;
3523 gitProcess . stdout . on ( "data" , ( data ) => {
3624 console . log ( data . toString ( ) ) ;
3725 } ) ;
@@ -42,57 +30,107 @@ const deleteFolderRecursive = async (path) => {
4230 if ( code === 0 ) {
4331 resolve ( ) ;
4432 } else {
45- reject ( " Error cloning repository" ) ;
33+ reject ( new Error ( `Git command failed: git ${ args . join ( " " ) } ` ) ) ;
4634 }
4735 } ) ;
4836 } ) ;
37+ } ;
38+
39+ const downloadFile = async ( url , destination ) => {
40+ await fs . mkdir ( path . dirname ( destination ) , { recursive : true } ) ;
41+ await new Promise ( ( resolve , reject ) => {
42+ https . get ( url , ( response ) => {
43+ if ( [ 301 , 302 , 303 , 307 , 308 ] . includes ( response . statusCode ) ) {
44+ const nextUrl = new URL ( response . headers . location , url ) . toString ( ) ;
45+ response . resume ( ) ;
46+ downloadFile ( nextUrl , destination ) . then ( resolve , reject ) ;
47+ return ;
48+ }
49+ if ( response . statusCode !== 200 ) {
50+ response . resume ( ) ;
51+ reject ( new Error ( `Failed to download ${ url } : HTTP ${ response . statusCode } ` ) ) ;
52+ return ;
53+ }
54+
55+ const chunks = [ ] ;
56+ response . on ( "data" , ( chunk ) => chunks . push ( chunk ) ) ;
57+ response . on ( "end" , async ( ) => {
58+ try {
59+ await fs . writeFile ( destination , Buffer . concat ( chunks ) ) ;
60+ resolve ( ) ;
61+ } catch ( err ) {
62+ reject ( err ) ;
63+ }
64+ } ) ;
65+ } ) . on ( "error" , reject ) ;
66+ } ) ;
67+ } ;
68+
69+ const fetchUrlDataset = async ( dataset ) => {
70+ const datasetPath = path . join ( datasetsPath , dataset . path ) ;
71+ const baseUrl = dataset . source . baseUrl . replace ( / \/ $ / , "" ) ;
72+ console . log ( `Downloading ${ dataset . name } from ${ baseUrl } ` ) ;
73+ for ( const file of dataset . source . files ) {
74+ const url = `${ baseUrl } /${ dataset . path } /${ file } ` ;
75+ const destination = path . join ( datasetPath , file ) ;
76+ console . log ( `\t - ${ url } ` ) ;
77+ await downloadFile ( url , destination ) ;
78+ }
79+ } ;
80+
81+ const fetchGitDatasets = async ( config , datasets ) => {
82+ if ( datasets . length === 0 ) {
83+ return ;
84+ }
85+
86+ console . log ( "Cloning repository" ) ;
87+ await runGit ( [
88+ "clone" ,
89+ "-n" ,
90+ "--depth=1" ,
91+ "--filter=tree:0" ,
92+ config . repository ,
93+ tempPath ,
94+ ] ) ;
95+
4996 console . log ( "Gathering datasets paths" ) ;
50- const datasetPaths = config . datasets . map ( ( d ) => {
97+ const datasetPaths = datasets . map ( ( d ) => {
5198 console . log ( `\t - ${ d . name } at ${ d . path } ` ) ;
5299 return path . join ( config . datasetsRoot , d . path ) ;
53100 } ) ;
101+
54102 console . log ( "Initializing sparse checkout" ) ;
55- await new Promise ( ( resolve , reject ) => {
56- const gitProcess = spawn (
57- "git" ,
58- [ "sparse-checkout" , "set" , "--no-cone" , ...datasetPaths ] ,
59- { cwd : tempPath }
60- ) ;
61- gitProcess . stdout . on ( "data" , ( data ) => {
62- console . log ( data . toString ( ) ) ;
63- } ) ;
64- gitProcess . stderr . on ( "data" , ( data ) => {
65- console . error ( data . toString ( ) ) ;
66- } ) ;
67- gitProcess . on ( "close" , ( code ) => {
68- if ( code === 0 ) {
69- resolve ( ) ;
70- } else {
71- reject ( "Error initializing sparse checkout" ) ;
72- }
73- } ) ;
103+ await runGit ( [ "sparse-checkout" , "set" , "--no-cone" , ...datasetPaths ] , {
104+ cwd : tempPath ,
74105 } ) ;
106+
75107 console . log ( "Pulling repository" ) ;
76- await new Promise ( ( resolve , reject ) => {
77- const gitProcess = spawn ( "git" , [ "checkout" , config . commitHash ] , {
78- cwd : tempPath ,
79- } ) ;
80- gitProcess . stdout . on ( "data" , ( data ) => {
81- console . log ( data . toString ( ) ) ;
82- } ) ;
83- gitProcess . stderr . on ( "data" , ( data ) => {
84- console . error ( data . toString ( ) ) ;
85- } ) ;
86- gitProcess . on ( "close" , ( code ) => {
87- if ( code === 0 ) {
88- resolve ( ) ;
89- } else {
90- reject ( "Error pulling repository" ) ;
91- }
92- } ) ;
93- } ) ;
108+ await runGit ( [ "checkout" , config . commitHash ] , { cwd : tempPath } ) ;
109+
94110 console . log ( "Moving datasets" ) ;
95- await fs . rename ( path . join ( tempPath , config . datasetsRoot ) , datasetsPath ) ;
111+ for ( const dataset of datasets ) {
112+ const source = path . join ( tempPath , config . datasetsRoot , dataset . path ) ;
113+ const destination = path . join ( datasetsPath , dataset . path ) ;
114+ await fs . mkdir ( path . dirname ( destination ) , { recursive : true } ) ;
115+ await fs . rename ( source , destination ) ;
116+ }
117+ } ;
118+
119+ ( async ( ) => {
120+ console . log ( "Reading config file from" , configPath ) ;
121+ const config = JSON . parse ( await fs . readFile ( configPath , "utf-8" ) ) ;
122+ console . log ( "Deleting temp and datasets folders" ) ;
123+ await deleteFolderRecursive ( tempPath ) ;
124+ await deleteFolderRecursive ( datasetsPath ) ;
125+
126+ const urlDatasets = config . datasets . filter ( ( d ) => d . source ?. type === "url" ) ;
127+ const gitDatasets = config . datasets . filter ( ( d ) => ! d . source ) ;
128+
129+ for ( const dataset of urlDatasets ) {
130+ await fetchUrlDataset ( dataset ) ;
131+ }
132+ await fetchGitDatasets ( config , gitDatasets ) ;
133+
96134 console . log ( "Deleting temp folder" ) ;
97135 await deleteFolderRecursive ( tempPath ) ;
98136 console . log ( "Done" ) ;
0 commit comments