Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .codespell_words
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ ans
foto
fpr
ist
nd
thirdparty
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ set(CUCASCADE_WARNINGS_AS_ERRORS
CACHE BOOL "" FORCE)
add_subdirectory(cucascade "${CMAKE_BINARY_DIR}/cucascade" EXCLUDE_FROM_ALL)

# Simpatico GPU compression engine (Phase 1 integration) Tests disabled here —
# run them via the standalone simpatico_codegen build. Benchmark harness
# (compress_with_plan_benchmark) is built by default.
add_subdirectory(src/compression/simpatico_codegen
"${CMAKE_BINARY_DIR}/simpatico_codegen" EXCLUDE_FROM_ALL)

if(VCPKG_BUILD AND TARGET CUDA::cudart_static)
function(sirius_prefer_static_cudart target_name)
foreach(_prop LINK_LIBRARIES INTERFACE_LINK_LIBRARIES)
Expand Down
444 changes: 444 additions & 0 deletions docs/compression/simpatico-integration-plan.md

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pkg-config = "*"
mold = "*"
libprotobuf = "*"
rust = "*"
cuda-nvrtc-dev = ">=12.9.86,<14"

[feature.dev-libs.dependencies]
librmm = "*"
Expand Down
2 changes: 2 additions & 0 deletions src/compression/simpatico_codegen/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build/
build_sys/
185 changes: 185 additions & 0 deletions src/compression/simpatico_codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
cmake_minimum_required(VERSION 3.24)

set(CMAKE_CUDA_ARCHITECTURES
native
CACHE STRING "CUDA architectures (native = detect host GPU)")

project(simpatico_codegen LANGUAGES C CXX CUDA)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 20)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

if(DEFINED ENV{CONDA_PREFIX})
set(CONDA_PREFIX
"$ENV{CONDA_PREFIX}"
CACHE PATH "Conda/pixi prefix")
else()
message(FATAL_ERROR "CONDA_PREFIX must be set (run inside pixi shell)")
endif()

set(CMAKE_CUDA_COMPILER
"${CONDA_PREFIX}/bin/nvcc"
CACHE FILEPATH "Conda nvcc")

if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
set(CUDA_TARGET_TRIPLE "sbsa-linux")
else()
set(CUDA_TARGET_TRIPLE "x86_64-linux")
endif()
set(CUDA_TOOLKIT_INCLUDE
"${CONDA_PREFIX}/targets/${CUDA_TARGET_TRIPLE}/include")
set(CUDA_CCCL_INCLUDE "${CUDA_TOOLKIT_INCLUDE}/cccl")

include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src/util
"${CONDA_PREFIX}/include" "${CONDA_PREFIX}/include/rapids"
"${CUDA_TOOLKIT_INCLUDE}")

add_compile_definitions(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
add_compile_options($<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>)
add_compile_options($<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
add_link_options(-Wl,--enable-new-dtags)
link_directories("${CONDA_PREFIX}/lib")

add_library(
simpatico STATIC
# CUDA kernels: compact + operators + offsets
src/encode/compact.cu
src/bridge/offsets_cumsum.cu
src/operators/alp_compressor.cu
src/operators/alp_rd_compressor.cu
src/operators/ans_compressor.cu
src/operators/bitcomp_compressor.cu
src/operators/cascaded_compressor.cu
src/operators/snappy_compressor.cu
src/operators/lz4_compressor.cu
src/operators/deflate_compressor.cu
src/operators/bitjoin_bitextract.cu
src/operators/dictionary_compressor.cu
src/operators/for_compressor.cu
# Plain-CUDA JIT host
src/jit/fused_tree.cpp
src/jit/nvrtc_compiler.cpp
src/jit/kernel_cache.cpp
src/encode/jit/plain_compile.cpp
src/encode/jit/renderer.cpp
src/decode/jit/renderer.cpp
src/encode/compact.cpp
# Runtime: plan executor + bridge + API + rep glue
src/plan/plan_dsl.cpp
src/plan/plan_tree.cpp
src/plan/fusion.cpp
src/plan/operator_registry.cpp
src/plan/representation_factory.cpp
src/plan/column_copy.cpp
src/plan/bitjoin_layout.cpp
src/plan/compress.cpp
src/plan/decompress.cpp
src/rep/compact/bitpack.cpp
src/util/stream_pool.cpp
src/util/isoc23_compat.c
src/bridge/fused_tree_build.cpp
src/bridge/codegen_runtime.cpp
src/simpatico_codegen.cpp
src/api/compressed_table_io.cpp)
set_target_properties(simpatico PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_features(simpatico PUBLIC cxx_std_20)
target_include_directories(
simpatico PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src "${CUDA_TOOLKIT_INCLUDE}"
"${CUDA_CCCL_INCLUDE}")
target_compile_definitions(
simpatico
PRIVATE "CODEGEN_JIT_CUDA_INCLUDE=\"${CUDA_TOOLKIT_INCLUDE}\""
"CODEGEN_JIT_CCCL_INCLUDE=\"${CUDA_CCCL_INCLUDE}\""
"CODEGEN_JIT_PROJECT_INCLUDE=\"${CMAKE_CURRENT_SOURCE_DIR}/include\"")
target_link_libraries(
simpatico
PUBLIC cudf
rmm
cudart
nvcomp
nvrtc
cuda
dl)

function(add_codegen_host_test name)
add_executable(${name} tests/${name}.cpp)
target_include_directories(${name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
target_link_libraries(
${name}
PRIVATE simpatico
cudf
rmm
cudart
nvcomp
nvrtc
cuda
dl)
target_link_options(${name} PRIVATE -Wl,--defsym=__libc_single_threaded=0
"-Wl,-rpath,${CONDA_PREFIX}/lib")
endfunction()

function(add_codegen_cuda_test name)
add_executable(${name} tests/${name}.cu)
target_include_directories(${name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
target_link_libraries(
${name}
PRIVATE simpatico
cudf
rmm
cudart
nvcomp
nvrtc
cuda
dl)
target_link_options(${name} PRIVATE -Wl,--defsym=__libc_single_threaded=0
"-Wl,-rpath,${CONDA_PREFIX}/lib")
endfunction()

add_codegen_host_test(test_compress_with_plan_roundtrip)
add_codegen_host_test(test_compressed_table_io)
add_codegen_host_test(test_leaf_describe)
add_codegen_host_test(test_plan_tree)
add_codegen_host_test(test_fused_tree_build)
add_codegen_host_test(test_jit_kernel_cache_plain)
add_codegen_host_test(test_jit_kernel_cache)
add_codegen_host_test(test_shape_parity)
add_codegen_cuda_test(test_encode_bitpack_compact)

# Benchmark harness (not registered as ctest)
add_executable(compress_with_plan_benchmark
bench/compress_with_plan_benchmark.cpp)
target_include_directories(compress_with_plan_benchmark
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
target_link_libraries(
compress_with_plan_benchmark
PRIVATE simpatico
cudf
rmm
cudart
nvcomp
nvrtc
cuda
dl)
target_link_options(
compress_with_plan_benchmark PRIVATE -Wl,--defsym=__libc_single_threaded=0
"-Wl,-rpath,${CONDA_PREFIX}/lib")

enable_testing()
add_test(NAME compress_with_plan_roundtrip
COMMAND test_compress_with_plan_roundtrip)
add_test(NAME compressed_table_io COMMAND test_compressed_table_io)
add_test(NAME leaf_describe COMMAND test_leaf_describe)
add_test(NAME plan_tree COMMAND test_plan_tree)
add_test(NAME fused_tree_build COMMAND test_fused_tree_build)
add_test(NAME jit_kernel_cache_plain COMMAND test_jit_kernel_cache_plain)
add_test(NAME jit_kernel_cache COMMAND test_jit_kernel_cache)
add_test(NAME shape_parity COMMAND test_shape_parity)
# Keep the ctest fast: depth 3 (1+3+9 = 13 shapes) still exercises two-level
# nesting of every op. Run the binary directly (or override the env var) for the
# full depth-4 sweep.
set_tests_properties(shape_parity PROPERTIES ENVIRONMENT
"SIMPATICO_PARITY_DEPTH=3")
add_test(NAME encode_bitpack_compact COMMAND test_encode_bitpack_compact)
Loading
Loading