-
Notifications
You must be signed in to change notification settings - Fork 463
Expand file tree
/
Copy pathscatter_example.c
More file actions
108 lines (101 loc) · 4.86 KB
/
Copy pathscatter_example.c
File metadata and controls
108 lines (101 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cuda_runtime_api.h> // cudaMalloc, cudaMemcpy, etc.
#include <cusparse.h> // cusparseScatter
#include <stdio.h> // printf
#include <stdlib.h> // EXIT_FAILURE
#define CHECK_CUDA(func) \
{ \
cudaError_t status = (func); \
if (status != cudaSuccess) { \
printf("CUDA API failed at line %d with error: %s (%d)\n", \
__LINE__, cudaGetErrorString(status), status); \
return EXIT_FAILURE; \
} \
}
#define CHECK_CUSPARSE(func) \
{ \
cusparseStatus_t status = (func); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \
__LINE__, cusparseGetErrorString(status), status); \
return EXIT_FAILURE; \
} \
}
int main(void) {
// Host problem definition
int size = 8;
int nnz = 4;
int hX_indices[] = { 0, 3, 4, 7 };
float hX_values[] = { 1.0f, 2.0f, 3.0f, 4.0f };
float hY[] = { 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f };
float hY_result[] = { 1.0f, 0.0f, 0.0f, 2.0f,
3.0f, 0.0f, 0.0f, 4.0f };
//--------------------------------------------------------------------------
// Device memory management
int *dX_indices;
float *dY, *dX_values;
CHECK_CUDA( cudaMalloc((void**) &dX_indices, nnz * sizeof(int)) )
CHECK_CUDA( cudaMalloc((void**) &dX_values, nnz * sizeof(float)) )
CHECK_CUDA( cudaMalloc((void**) &dY, size * sizeof(float)) )
CHECK_CUDA( cudaMemcpy(dX_indices, hX_indices, nnz * sizeof(int),
cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dX_values, hX_values, nnz * sizeof(float),
cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dY, hY, size * sizeof(float),
cudaMemcpyHostToDevice) )
//--------------------------------------------------------------------------
// CUSPARSE APIs
cusparseHandle_t handle = NULL;
cusparseSpVecDescr_t vecX;
cusparseDnVecDescr_t vecY;
CHECK_CUSPARSE( cusparseCreate(&handle) )
// Create sparse vector X
CHECK_CUSPARSE( cusparseCreateSpVec(&vecX, size, nnz, dX_indices, dX_values,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F) )
// Create dense vector y
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, size, dY, CUDA_R_32F) )
// execute Scatter
CHECK_CUSPARSE( cusparseScatter(handle, vecX, vecY) )
// destroy matrix/vector descriptors
CHECK_CUSPARSE( cusparseDestroySpVec(vecX) )
CHECK_CUSPARSE( cusparseDestroyDnVec(vecY) )
CHECK_CUSPARSE( cusparseDestroy(handle) )
//--------------------------------------------------------------------------
// device result check
CHECK_CUDA( cudaMemcpy(hY, dY, nnz * sizeof(float),
cudaMemcpyDeviceToHost) )
int correct = 1;
for (int i = 0; i < nnz; i++) {
if (hY[i] != hY_result[i]) {
correct = 0;
break;
}
}
if (correct)
printf("scatter_example test PASSED\n");
else
printf("scatter_example test FAILED: wrong result\n");
//--------------------------------------------------------------------------
// device memory deallocation
CHECK_CUDA( cudaFree(dX_indices) )
CHECK_CUDA( cudaFree(dX_values) )
CHECK_CUDA( cudaFree(dY) )
return EXIT_SUCCESS;
}