Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,23 @@ tokio-util = "0.7"
chrono = { version = "0.4" }

tempfile = { version = "3", optional = true }
pulp = "0.21.5"

[dev-dependencies]
tempfile = { version = "3" }
criterion = { version = "0.5.1", features = ["async_tokio"] }
rallo = { version = "0.5" }
approx = "0.5.1"

[features]
no_thread = []
generate_new_path = ["tempfile"]
track_allocations = []

[[bench]]
name = "simd_metrics"
harness = false

[[bench]]
name = "hnsw"
harness = false
182 changes: 182 additions & 0 deletions benches/simd_metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use oramacore_lib::hnsw2::core::{simd_metrics::SIMDOptmized, simd_metrics_old};
use rand::distr::{Distribution, Uniform};

fn generate_vectors_f32(size: usize) -> (Vec<f32>, Vec<f32>) {
let normal = Uniform::new(-1.0f32, 1.0f32).unwrap();
let a: Vec<f32> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
let b: Vec<f32> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
(a, b)
}

fn generate_vectors_f64(size: usize) -> (Vec<f64>, Vec<f64>) {
let normal = Uniform::new(-1.0f64, 1.0f64).unwrap();
let a: Vec<f64> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
let b: Vec<f64> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
(a, b)
}

fn bench_dot_product_f32(c: &mut Criterion) {
let mut group = c.benchmark_group("dot_product_f32");

for size in [64, 256, 1024, 4096].iter() {
let (a, b) = generate_vectors_f32(*size);

group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
bencher.iter(|| {
<f32 as simd_metrics_old::SIMDOptmizedOld>::dot_product(
black_box(&a),
black_box(&b),
)
.unwrap()
});
});

group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
bencher
.iter(|| <f32 as SIMDOptmized>::dot_product(black_box(&a), black_box(&b)).unwrap());
});
}

group.finish();
}

fn bench_dot_product_f64(c: &mut Criterion) {
let mut group = c.benchmark_group("dot_product_f64");

for size in [64, 256, 1024, 4096].iter() {
let (a, b) = generate_vectors_f64(*size);

group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
bencher.iter(|| {
<f64 as simd_metrics_old::SIMDOptmizedOld>::dot_product(
black_box(&a),
black_box(&b),
)
.unwrap()
});
});

group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
bencher
.iter(|| <f64 as SIMDOptmized>::dot_product(black_box(&a), black_box(&b)).unwrap());
});
}

group.finish();
}

fn bench_euclidean_distance_f32(c: &mut Criterion) {
let mut group = c.benchmark_group("euclidean_distance_f32");

for size in [64, 256, 1024, 4096].iter() {
let (a, b) = generate_vectors_f32(*size);

group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
bencher.iter(|| {
<f32 as simd_metrics_old::SIMDOptmizedOld>::euclidean_distance(
black_box(&a),
black_box(&b),
)
.unwrap()
});
});

group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
bencher.iter(|| {
<f32 as SIMDOptmized>::euclidean_distance(black_box(&a), black_box(&b)).unwrap()
});
});
}

group.finish();
}

fn bench_euclidean_distance_f64(c: &mut Criterion) {
let mut group = c.benchmark_group("euclidean_distance_f64");

for size in [64, 256, 1024, 4096].iter() {
let (a, b) = generate_vectors_f64(*size);

group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
bencher.iter(|| {
<f64 as simd_metrics_old::SIMDOptmizedOld>::euclidean_distance(
black_box(&a),
black_box(&b),
)
.unwrap()
});
});

group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
bencher.iter(|| {
<f64 as SIMDOptmized>::euclidean_distance(black_box(&a), black_box(&b)).unwrap()
});
});
}

group.finish();
}

fn bench_manhattan_distance_f32(c: &mut Criterion) {
let mut group = c.benchmark_group("manhattan_distance_f32");

for size in [64, 256, 1024, 4096].iter() {
let (a, b) = generate_vectors_f32(*size);

group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
bencher.iter(|| {
<f32 as simd_metrics_old::SIMDOptmizedOld>::manhattan_distance(
black_box(&a),
black_box(&b),
)
.unwrap()
});
});

group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
bencher.iter(|| {
<f32 as SIMDOptmized>::manhattan_distance(black_box(&a), black_box(&b)).unwrap()
});
});
}

group.finish();
}

fn bench_manhattan_distance_f64(c: &mut Criterion) {
let mut group = c.benchmark_group("manhattan_distance_f64");

for size in [64, 256, 1024, 4096].iter() {
let (a, b) = generate_vectors_f64(*size);

group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
bencher.iter(|| {
<f64 as simd_metrics_old::SIMDOptmizedOld>::manhattan_distance(
black_box(&a),
black_box(&b),
)
.unwrap()
});
});

group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
bencher.iter(|| {
<f64 as SIMDOptmized>::manhattan_distance(black_box(&a), black_box(&b)).unwrap()
});
});
}

group.finish();
}

criterion_group!(
benches,
bench_dot_product_f32,
bench_dot_product_f64,
bench_euclidean_distance_f32,
bench_euclidean_distance_f64,
bench_manhattan_distance_f32,
bench_manhattan_distance_f64
);
criterion_main!(benches);
6 changes: 6 additions & 0 deletions src/bin/check_simd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use oramacore_lib::data_structures::hnsw::IS_SIMD_SUPPORTED;

fn main() {
let is_supported = &*IS_SIMD_SUPPORTED;
println!("IS_SIMD_SUPPORTED: {is_supported}");
}
61 changes: 61 additions & 0 deletions src/data_structures/hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use std::hash::Hash;
use std::mem;
use std::ops::{Deref, Index};
use std::sync::LazyLock;

use ordered_float::OrderedFloat;
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
Expand All @@ -25,6 +26,53 @@ use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterato

use serde_big_array::BigArray;

/// Static boolean indicating whether SIMD instructions are supported on this platform.
/// This is initialized once at runtime and cached for all future accesses.
/// Returns `true` if AVX, AVX2, AVX512, NEON, or other SIMD instructions are available.
/// Returns `false` if only scalar operations are available.
///
/// The detection checks the target architecture:
/// - x86_64: Always true (SSE2 minimum standard)
/// - aarch64: Always true (NEON standard)
/// - x86: True if SSE2 is available
/// - arm with NEON: True
/// - Other architectures: False (conservative)
pub static IS_SIMD_SUPPORTED: LazyLock<bool> = LazyLock::new(|| {
#[cfg(target_arch = "x86_64")]
{
// x86_64 always has at least SSE2, so SIMD is available
true
}

#[cfg(target_arch = "x86")]
{
// x86 may or may not have SSE2, check for it
cfg!(target_feature = "sse2")
}

#[cfg(target_arch = "aarch64")]
{
// aarch64 always has NEON
true
}

#[cfg(all(target_arch = "arm", target_feature = "neon"))]
{
true
}

#[cfg(not(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "aarch64",
all(target_arch = "arm", target_feature = "neon")
)))]
{
// For other architectures, we conservatively report no SIMD
false
}
});

#[derive(Clone)]
/// Parameters for building the `Hnsw`
pub struct Builder {
Expand Down Expand Up @@ -1159,6 +1207,19 @@ mod tests {
assert_eq!(closest_point_1.value, closest_point_2.value);
}

#[test]
fn test_simd_supported() {
// Test that the IS_SIMD_SUPPORTED static is accessible
let supported = *IS_SIMD_SUPPORTED;

// On x86_64 and aarch64, SIMD should be supported
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
assert!(supported, "SIMD should be supported on x86_64 and aarch64");

// Just verify it's a valid boolean (this always passes, but shows usage)
assert!(supported == true || supported == false);
}

#[test]
#[allow(clippy::float_cmp, clippy::approx_constant)]
fn incremental_insert() {
Expand Down
1 change: 1 addition & 0 deletions src/hnsw2/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ pub mod metrics;
pub mod neighbor;
pub mod node;
pub mod simd_metrics;
pub mod simd_metrics_old;
Loading
Loading