Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/acc/acc_projector.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class AccProjector
#endif
size_t pitch2D;
#else
#ifndef ALTCPU
XFLOAT *mdlReal, *mdlImag;
#else
#ifdef ALTCPU
std::complex<XFLOAT> *mdlComplex;
int externalFree;
#else
XFLOAT *mdlComplex;
#endif
#endif // PROJECTOR_NO_TEXTURES

Expand All @@ -69,12 +69,9 @@ class AccProjector
mdlReal = 0;
mdlImag = 0;
pitch2D = 0;
#else
#ifndef ALTCPU
mdlReal = 0;
mdlImag = 0;
#else
mdlComplex = 0;
#ifdef ALTCPU
externalFree = 0;
#endif
#endif
Expand All @@ -88,10 +85,13 @@ class AccProjector
int inity, int initz,
int maxr, XFLOAT paddingFactor);

#if defined(_CUDA_ENABLED) || defined(_HIP_ENABLED)
void initMdl(XFLOAT *real, XFLOAT *imag);
void initMdl(Complex *data);
#endif
#ifdef ALTCPU
void initMdl(std::complex<XFLOAT> *data);
#else
void initMdl(Complex *data);
#endif

void clear();
Expand Down
89 changes: 39 additions & 50 deletions src/acc/acc_projector_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,15 @@ bool AccProjector::setMdlDim(
DEBUG_HANDLE_ERROR(hipMalloc( (void**) &mdlImag, mdlXYZ * sizeof(XFLOAT)));
#elif _SYCL_ENABLED
devAcc = dev;
mdlReal = (XFLOAT*)devAcc->syclMalloc(mdlXYZ * sizeof(XFLOAT), syclMallocType::device, "mdlReal");
mdlImag = (XFLOAT*)devAcc->syclMalloc(mdlXYZ * sizeof(XFLOAT), syclMallocType::device, "mdlImag");
mdlComplex = (XFLOAT*)devAcc->syclMalloc(2 * mdlXYZ * sizeof(XFLOAT), syclMallocType::device, "mdlComplex");
#else
mdlComplex = NULL;
#endif
#endif
return true;
}

#if defined(_CUDA_ENABLED) || defined(_HIP_ENABLED)
void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
{
#if defined DEBUG_CUDA || defined DEBUG_HIP
Expand All @@ -201,18 +201,12 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
printf("DEBUG_ERROR: Model dimensions must be set with setMdlDim before call to setMdlData.");
CRITICAL(ERR_MDLDIM);
}
#if defined _CUDA_ENABLED || defined _HIP_ENABLED || defined _SYCL_ENABLED
#if defined _CUDA_ENABLED || defined _HIP_ENABLED
if (mdlReal == NULL)
{
printf("DEBUG_ERROR: initMdl called before call to setMdlData.");
CRITICAL(ERR_MDLSET);
}
#else
if (mdlComplex == NULL)
{
printf("DEBUG_ERROR: initMdl called before call to setMdlData.");
CRITICAL(ERR_MDLSET);
}
#endif
#endif

Expand Down Expand Up @@ -267,46 +261,25 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
#elif _HIP_ENABLED
DEBUG_HANDLE_ERROR(hipMemcpy( mdlReal, real, mdlXYZ * sizeof(XFLOAT), hipMemcpyHostToDevice));
DEBUG_HANDLE_ERROR(hipMemcpy( mdlImag, imag, mdlXYZ * sizeof(XFLOAT), hipMemcpyHostToDevice));
#elif _SYCL_ENABLED
devAcc->syclMemcpy(mdlReal, real, mdlXYZ * sizeof(XFLOAT));
devAcc->syclMemcpy(mdlImag, imag, mdlXYZ * sizeof(XFLOAT));
devAcc->waitAll();
#else
std::complex<XFLOAT> *pData = mdlComplex;
for(size_t i=0; i<mdlXYZ; i++) {
std::complex<XFLOAT> arrayval(*real ++, *imag ++);
pData[i] = arrayval;
}
#endif
#endif

}
#endif

#ifdef ALTCPU
void AccProjector::initMdl(std::complex<XFLOAT> *data)
{
mdlComplex = data; // No copy needed - everyone shares the complex reference arrays
externalFree = 1; // This is shared memory freed outside the projector
}
#endif

#else
void AccProjector::initMdl(Complex *data)
{
#ifdef _SYCL_ENABLED
XFLOAT *tmpReal = (XFLOAT*)devAcc->syclMalloc(mdlXYZ * sizeof(XFLOAT), syclMallocType::host);
XFLOAT *tmpImag = (XFLOAT*)devAcc->syclMalloc(mdlXYZ * sizeof(XFLOAT), syclMallocType::host);
if (nullptr == tmpReal || nullptr == tmpImag)
{
std::string str = "syclMalloc HOST error of size " + std::to_string(mdlXYZ * sizeof(XFLOAT)) + ".\n";
ACC_PTR_DEBUG_FATAL(str.c_str());
CRITICAL(RAMERR);
}
#else
#if defined(_CUDA_ENABLED) || defined(_HIP_ENABLED)
XFLOAT *tmpReal;
XFLOAT *tmpImag;
if (posix_memalign((void **)&tmpReal, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR);
if (posix_memalign((void **)&tmpImag, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR);
#endif

for (size_t i = 0; i < mdlXYZ; i ++)
{
Expand All @@ -316,18 +289,32 @@ void AccProjector::initMdl(Complex *data)

initMdl(tmpReal, tmpImag);

#ifdef _SYCL_ENABLED
devAcc->syclFree(tmpReal);
devAcc->syclFree(tmpImag);
#else
free(tmpReal);
free(tmpImag);
#elif _SYCL_ENABLED
XFLOAT *tmpComplex = (XFLOAT*)devAcc->syclMalloc(2 * mdlXYZ * sizeof(XFLOAT), syclMallocType::host);
if (nullptr == tmpComplex)
{
std::string str = "syclMalloc HOST error of size " + std::to_string(2*mdlXYZ * sizeof(XFLOAT)) + ".\n";
ACC_PTR_DEBUG_FATAL(str.c_str());
CRITICAL(RAMERR);
}

for (size_t i = 0; i < mdlXYZ; i++)
{
tmpComplex[2*i ] = (XFLOAT) data[i].real;
tmpComplex[2*i+1] = (XFLOAT) data[i].imag;
}
devAcc->syclMemcpy(mdlComplex, tmpComplex, 2 * mdlXYZ * sizeof(XFLOAT));
devAcc->waitAll();
devAcc->syclFree(tmpComplex);
#endif
}
#endif

void AccProjector::clear()
{
#ifndef ALTCPU
#if defined(_CUDA_ENABLED) || defined(_HIP_ENABLED)
if (mdlReal != 0)
{
#ifndef PROJECTOR_NO_TEXTURES
Expand Down Expand Up @@ -373,15 +360,25 @@ void AccProjector::clear()
#elif _HIP_ENABLED
hipFree(mdlReal);
hipFree(mdlImag);
#elif _SYCL_ENABLED
devAcc->waitAll();
devAcc->syclFree(mdlReal);
devAcc->syclFree(mdlImag);
#endif
#endif
mdlReal = 0;
mdlImag = 0;
}
#elif _SYCL_ENABLED
if (mdlComplex != NULL)
{
devAcc->waitAll();
devAcc->syclFree(mdlComplex);
mdlComplex = NULL;
}
#else
if ((mdlComplex != NULL) && (externalFree == 0))
{
delete [] mdlComplex;
mdlComplex = NULL;
}
#endif

mdlX = 0;
mdlY = 0;
Expand All @@ -392,12 +389,4 @@ void AccProjector::clear()
mdlMaxR = 0;
padding_factor = 0;
allocaton_size = 0;

#else // ifdef CUDA or HIP
if ((mdlComplex != NULL) && (externalFree == 0))
{
delete [] mdlComplex;
mdlComplex = NULL;
}
#endif // ifdef CUDA or HIP
}
60 changes: 24 additions & 36 deletions src/acc/acc_projectorkernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,39 @@ class AccProjectorKernel
maxR, maxR2, maxR2_padded;
XFLOAT padding_factor;

#if defined _CUDA_ENABLED || defined _HIP_ENABLED
PROJECTOR_PTR_TYPE mdlReal;
PROJECTOR_PTR_TYPE mdlImag;
#ifndef ALTCPU
#elif _SYCL_ENABLED
PROJECTOR_PTR_TYPE mdlComplex;
#else
std::complex<XFLOAT> *mdlComplex;
#endif

#if defined _CUDA_ENABLED || defined _HIP_ENABLED
AccProjectorKernel(
int mdlX, int mdlY, int mdlZ,
int imgX, int imgY, int imgZ,
int mdlInitY, int mdlInitZ,
XFLOAT padding_factor,
int maxR,
PROJECTOR_PTR_TYPE mdlReal, PROJECTOR_PTR_TYPE mdlImag
):
mdlX(mdlX), mdlXY(mdlX*mdlY), mdlZ(mdlZ),
imgX(imgX), imgY(imgY), imgZ(imgZ),
mdlInitY(mdlInitY), mdlInitZ(mdlInitZ),
padding_factor(padding_factor),
maxR(maxR), maxR2(maxR*maxR), maxR2_padded(maxR*maxR*padding_factor*padding_factor),
mdlReal(mdlReal), mdlImag(mdlImag)
{};
#else
AccProjectorKernel(
int mdlX, int mdlY, int mdlZ,
int imgX, int imgY, int imgZ,
int mdlInitY, int mdlInitZ,
XFLOAT padding_factor,
int maxR,
#ifndef ALTCPU
#if _SYCL_ENABLED
PROJECTOR_PTR_TYPE mdlComplex
#else
std::complex<XFLOAT> *mdlComplex
Expand All @@ -49,30 +67,7 @@ class AccProjectorKernel
maxR(maxR), maxR2(maxR*maxR), maxR2_padded(maxR*maxR*padding_factor*padding_factor),
mdlComplex(mdlComplex)
{};

AccProjectorKernel(
int mdlX, int mdlY, int mdlZ,
int imgX, int imgY, int imgZ,
int mdlInitY, int mdlInitZ,
XFLOAT padding_factor,
int maxR,
PROJECTOR_PTR_TYPE mdlReal, PROJECTOR_PTR_TYPE mdlImag
):
mdlX(mdlX), mdlXY(mdlX*mdlY), mdlZ(mdlZ),
imgX(imgX), imgY(imgY), imgZ(imgZ),
mdlInitY(mdlInitY), mdlInitZ(mdlInitZ),
padding_factor(padding_factor),
maxR(maxR), maxR2(maxR*maxR), maxR2_padded(maxR*maxR*padding_factor*padding_factor),
mdlReal(mdlReal), mdlImag(mdlImag)
{
#ifdef ALTCPU
std::complex<XFLOAT> *pData = mdlComplex;
for(size_t i=0; i<(size_t)mdlX * (size_t)mdlY * (size_t)mdlZ; i++) {
std::complex<XFLOAT> arrayval(*mdlReal ++, *mdlImag ++);
pData[i] = arrayval;
}
#endif
};

#if defined _CUDA_ENABLED || defined _HIP_ENABLED
__device__ __forceinline__
Expand Down Expand Up @@ -117,8 +112,8 @@ class AccProjectorKernel
real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
imag = - no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
#elif _SYCL_ENABLED
real = syclKernels::no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
imag = - syclKernels::no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
syclKernels::no_tex3D(mdlComplex, real, imag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
imag = -imag;
#else
CpuKernels::complex3D(mdlComplex, real, imag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
#endif
Expand Down Expand Up @@ -197,8 +192,7 @@ class AccProjectorKernel
real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
imag = no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
#elif _SYCL_ENABLED
real = syclKernels::no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
imag = syclKernels::no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
syclKernels::no_tex3D(mdlComplex, real, imag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
#else
CpuKernels::complex3D(mdlComplex, real, imag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ);
#endif
Expand Down Expand Up @@ -270,8 +264,7 @@ __device__ __forceinline__
real = no_tex2D(mdlReal, xp, yp, mdlX, mdlInitY);
imag = no_tex2D(mdlImag, xp, yp, mdlX, mdlInitY);
#elif _SYCL_ENABLED
real = syclKernels::no_tex2D(mdlReal, xp, yp, mdlX, mdlInitY);
imag = syclKernels::no_tex2D(mdlImag, xp, yp, mdlX, mdlInitY);
syclKernels::no_tex2D(mdlComplex, real, imag, xp, yp, mdlX, mdlInitY);
#else
CpuKernels::complex2D(mdlComplex, real, imag, xp, yp, mdlX, mdlInitY);
#endif
Expand Down Expand Up @@ -318,13 +311,8 @@ __device__ __forceinline__
#ifndef PROJECTOR_NO_TEXTURES
*p.mdlReal,
*p.mdlImag
#else
#ifndef ALTCPU
p.mdlReal,
p.mdlImag
#else
p.mdlComplex
#endif
#endif
);
return k;
Expand Down
12 changes: 6 additions & 6 deletions src/acc/sycl/sycl_kernels/diff2_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ void sycl_kernel_diff2_coarse(

const int xSize = projector.imgX;
const int ySize = projector.imgY;
const int xySize = xSize * ySize;
const int zSize = projector.imgZ;
const int maxR = projector.maxR;

Expand All @@ -42,7 +43,6 @@ void sycl_kernel_diff2_coarse(
if (i < eulers_per_block * 9)
s_eulers[i] = g_eulers[blockid*eulers_per_block*9 + i];


XFLOAT diff2s[eulers_per_block] {0.0f};

const XFLOAT tx {trans_x[tid % trans_num]};
Expand All @@ -51,18 +51,18 @@ void sycl_kernel_diff2_coarse(

//Step through data
const int max_block_pass_pixel {(image_size/block_sz + 1) * block_sz};
__group_barrier(nit);
for (int init_pixel = 0; init_pixel < max_block_pass_pixel; init_pixel += block_sz/prefetch_fraction)
{
__group_barrier(nit);

//Prefetch block-fraction-wise
if (init_pixel + tid/prefetch_fraction < image_size)
{
int x, y, z, xy;
if (DATA3D)
{
z = (init_pixel + tid/prefetch_fraction) / (xSize*ySize);
xy = (init_pixel + tid/prefetch_fraction) % (xSize*ySize);
z = (init_pixel + tid/prefetch_fraction) / xySize;
xy = (init_pixel + tid/prefetch_fraction) % xySize;
x = xy % xSize;
y = xy / xSize;
if (z > maxR)
Expand Down Expand Up @@ -126,8 +126,8 @@ void sycl_kernel_diff2_coarse(
int x, y, z, xy;
if (DATA3D)
{
z = (init_pixel + pix) / (xSize*ySize);
xy = (init_pixel + pix) % (xSize*ySize);
z = (init_pixel + pix) / xySize;
xy = (init_pixel + pix) % xySize;
x = xy % xSize;
y = xy / ySize;
if (z > maxR)
Expand Down
Loading
Loading