Fix Windows CUDA Plugin EP and CUDA CI azcopy authentication failures #3994
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CUDA Plugin Windows CI | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - rel-* | |
| pull_request: | |
| branches: | |
| - main | |
| - rel-* | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }} | |
| cancel-in-progress: true | |
| jobs: | |
| build: | |
| name: Windows CUDA Plugin EP Build | |
| runs-on: [ | |
| "self-hosted", | |
| "1ES.Pool=onnxruntime-github-vs2022-latest", | |
| "JobId=windows-cuda-plugin-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" | |
| ] | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| submodules: 'none' | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.14' | |
| architecture: x64 | |
| - name: Locate vcvarsall and Setup Env | |
| uses: ./.github/actions/locate-vcvarsall-and-setup-env | |
| with: | |
| architecture: x64 | |
| - name: Install python modules | |
| run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt | |
| working-directory: ${{ github.workspace }} | |
| shell: cmd | |
| - name: Download CUDA SDK v12.8 | |
| working-directory: ${{ runner.temp }} | |
| run: | | |
| $cudaSdkDir = Join-Path $env:RUNNER_TEMP "v12.8" | |
| $localCudaSdkDir = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" | |
| if (Test-Path (Join-Path $cudaSdkDir "bin\nvcc.exe")) { | |
| Write-Host "CUDA SDK already present at $cudaSdkDir" | |
| } else { | |
| azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" . | |
| $azcopyExitCode = $lastExitCode | |
| if (($azcopyExitCode -ne 0) -and (Test-Path (Join-Path $localCudaSdkDir "bin\nvcc.exe"))) { | |
| Write-Host "AzCopy download failed; falling back to local CUDA SDK at $localCudaSdkDir" | |
| New-Item -ItemType Directory -Path $cudaSdkDir -Force | Out-Null | |
| Copy-Item -Path (Join-Path $localCudaSdkDir "*") -Destination $cudaSdkDir -Recurse -Force -ErrorAction Stop | |
| $azcopyExitCode = 0 | |
| } | |
| if ($azcopyExitCode -ne 0) { | |
| exit $azcopyExitCode | |
| } | |
| } | |
| dir $cudaSdkDir | |
| shell: pwsh | |
| - name: Add CUDA to PATH | |
| shell: powershell | |
| run: | | |
| Write-Host "Adding CUDA to PATH" | |
| Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin" | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin" | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64" | |
| - name: Set OnnxRuntimeBuildDirectory | |
| shell: pwsh | |
| run: | | |
| $buildDir = Join-Path ${{ runner.temp }} "build" | |
| echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV | |
| - name: Build ONNX Runtime with CUDA Plugin EP | |
| working-directory: ${{ runner.temp }} | |
| run: | | |
| python.exe ${{ github.workspace }}\tools\ci_build\build.py ` | |
| --update --build --config Release ` | |
| --build_dir build ` | |
| --skip_submodule_sync ` | |
| --parallel ` | |
| --nvcc_threads 4 ` | |
| --flash_nvcc_threads 4 ` | |
| --use_binskim_compliant_compile_flags ` | |
| --cmake_generator "Visual Studio 17 2022" ` | |
| --build_shared_lib ` | |
| --build_wheel ` | |
| --use_cuda ` | |
| --cuda_home="$env:RUNNER_TEMP\v12.8" ` | |
| --skip_tests ` | |
| --use_vcpkg ` | |
| --use_vcpkg_ms_internal_asset_cache ` | |
| --enable_cuda_profiling ` | |
| --cmake_extra_defines onnxruntime_QUICK_BUILD=ON ` | |
| --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 ` | |
| --cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON | |
| if ($lastExitCode -ne 0) { | |
| exit $lastExitCode | |
| } | |
| # Clean up intermediate files before uploading artifacts | |
| $outputDir = "${{ runner.temp }}\build\Release" | |
| Write-Host "Cleaning up files from $outputDir..." | |
| Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue | |
| Remove-Item -Path $outputDir -Include "*.obj" -Recurse | |
| shell: pwsh | |
| - name: Stage CUDA runtime test dependencies | |
| shell: pwsh | |
| run: | | |
| $cudaStageDir = Join-Path $env:RUNNER_TEMP "cuda-test-deps\v12.8" | |
| $cudaBinDir = Join-Path $cudaStageDir "bin" | |
| $cuptiDir = Join-Path $cudaStageDir "extras\CUPTI\lib64" | |
| New-Item -ItemType Directory -Path $cudaBinDir -Force | Out-Null | |
| New-Item -ItemType Directory -Path $cuptiDir -Force | Out-Null | |
| Copy-Item -Path "$env:RUNNER_TEMP\v12.8\bin\*" -Destination $cudaBinDir -Recurse -Force -ErrorAction Stop | |
| Copy-Item -Path "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64\*" -Destination $cuptiDir -Recurse -Force -ErrorAction Stop | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: cuda-plugin-build-artifacts | |
| path: ${{ runner.temp }}\build | |
| - name: Upload CUDA runtime test dependencies | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: cuda-plugin-cuda-runtime | |
| path: ${{ runner.temp }}\cuda-test-deps | |
| if-no-files-found: error | |
| env: | |
| DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true | |
| setVcvars: true | |
| ALLOW_RELEASED_ONNX_OPSET_ONLY: '0' | |
| ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0' | |
| AZCOPY_AUTO_LOGIN_TYPE: MSI | |
| test: | |
| name: Windows CUDA Plugin EP Test | |
| needs: build | |
| timeout-minutes: 120 | |
| runs-on: [ | |
| "self-hosted", | |
| "1ES.Pool=onnxruntime-github-Win2022-GPU-A10", | |
| "JobId=windows-cuda-plugin-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" | |
| ] | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| submodules: 'none' | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v7 | |
| with: | |
| name: cuda-plugin-build-artifacts | |
| path: ${{ runner.temp }}\build | |
| - name: Download CUDA runtime test dependencies | |
| uses: actions/download-artifact@v7 | |
| with: | |
| name: cuda-plugin-cuda-runtime | |
| path: ${{ runner.temp }}\cuda-test-deps | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.14' | |
| architecture: x64 | |
| - name: Locate vcvarsall and Setup Env | |
| uses: ./.github/actions/locate-vcvarsall-and-setup-env | |
| with: | |
| architecture: x64 | |
| - name: Install python modules | |
| run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt | |
| working-directory: ${{ github.workspace }} | |
| shell: cmd | |
| - name: Install torch for CPU only | |
| run: python -m pip install torch | |
| working-directory: ${{ github.workspace }} | |
| shell: cmd | |
| - name: Add CUDA to PATH | |
| shell: powershell | |
| run: | | |
| Write-Host "Adding CUDA to PATH" | |
| Write-Host "CUDA Path: $env:RUNNER_TEMP\cuda-test-deps\v12.8\bin" | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\cuda-test-deps\v12.8\bin" | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\cuda-test-deps\v12.8\extras\CUPTI\lib64" | |
| - name: Set OnnxRuntimeBuildDirectory | |
| shell: pwsh | |
| run: | | |
| $buildDir = Join-Path ${{ runner.temp }} "build" | |
| echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV | |
| - name: Install ONNX Runtime Wheel | |
| uses: ./.github/actions/install-onnxruntime-wheel | |
| with: | |
| whl-directory: ${{ runner.temp }}\build\Release\Release\dist | |
| # Verify the GPU is accessible before running the full test suite. | |
| # If the NVIDIA driver is not available, tests will fail with | |
| # "CUDA failure 100" and waste significant time. | |
| - name: Verify GPU access | |
| shell: pwsh | |
| run: nvidia-smi | |
| - name: Run CUDA Plugin EP Python Tests | |
| working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers | |
| shell: pwsh | |
| run: | | |
| $env:ORT_CUDA_PLUGIN_PATH = "${{ runner.temp }}\build\Release\Release\onnxruntime_providers_cuda_plugin.dll" | |
| Write-Host "ORT_CUDA_PLUGIN_PATH=$env:ORT_CUDA_PLUGIN_PATH" | |
| if (-not (Test-Path $env:ORT_CUDA_PLUGIN_PATH)) { | |
| Write-Error "CUDA plugin EP library not found at $env:ORT_CUDA_PLUGIN_PATH" | |
| exit 1 | |
| } | |
| python test_cuda_plugin_ep.py | |
| if ($lastExitCode -ne 0) { | |
| exit $lastExitCode | |
| } | |
| env: | |
| DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true | |
| setVcvars: true | |
| ALLOW_RELEASED_ONNX_OPSET_ONLY: '0' | |
| ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0' |