oramasearch · allevo · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -43,13 +43,23 @@ tokio-util = "0.7"
 chrono = { version = "0.4" }
 
 tempfile = { version = "3", optional = true }
+pulp = "0.21.5"
 
 [dev-dependencies]
 tempfile = { version = "3" }
 criterion = { version = "0.5.1", features = ["async_tokio"] }
 rallo = { version = "0.5" }
+approx = "0.5.1"
 
 [features]
 no_thread = []
 generate_new_path = ["tempfile"]
 track_allocations = []
+
+[[bench]]
+name = "simd_metrics"
+harness = false
+
+[[bench]]
+name = "hnsw"
+harness = false
diff --git a/benches/simd_metrics.rs b/benches/simd_metrics.rs
@@ -0,0 +1,182 @@
+use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
+use oramacore_lib::hnsw2::core::{simd_metrics::SIMDOptmized, simd_metrics_old};
+use rand::distr::{Distribution, Uniform};
+
+fn generate_vectors_f32(size: usize) -> (Vec<f32>, Vec<f32>) {
+    let normal = Uniform::new(-1.0f32, 1.0f32).unwrap();
+    let a: Vec<f32> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
+    let b: Vec<f32> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
+    (a, b)
+}
+
+fn generate_vectors_f64(size: usize) -> (Vec<f64>, Vec<f64>) {
+    let normal = Uniform::new(-1.0f64, 1.0f64).unwrap();
+    let a: Vec<f64> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
+    let b: Vec<f64> = (0..size).map(|_| normal.sample(&mut rand::rng())).collect();
+    (a, b)
+}
+
+fn bench_dot_product_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dot_product_f32");
+
+    for size in [64, 256, 1024, 4096].iter() {
+        let (a, b) = generate_vectors_f32(*size);
+
+        group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f32 as simd_metrics_old::SIMDOptmizedOld>::dot_product(
+                    black_box(&a),
+                    black_box(&b),
+                )
+                .unwrap()
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
+            bencher
+                .iter(|| <f32 as SIMDOptmized>::dot_product(black_box(&a), black_box(&b)).unwrap());
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_dot_product_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dot_product_f64");
+
+    for size in [64, 256, 1024, 4096].iter() {
+        let (a, b) = generate_vectors_f64(*size);
+
+        group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f64 as simd_metrics_old::SIMDOptmizedOld>::dot_product(
+                    black_box(&a),
+                    black_box(&b),
+                )
+                .unwrap()
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
+            bencher
+                .iter(|| <f64 as SIMDOptmized>::dot_product(black_box(&a), black_box(&b)).unwrap());
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_euclidean_distance_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("euclidean_distance_f32");
+
+    for size in [64, 256, 1024, 4096].iter() {
+        let (a, b) = generate_vectors_f32(*size);
+
+        group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f32 as simd_metrics_old::SIMDOptmizedOld>::euclidean_distance(
+                    black_box(&a),
+                    black_box(&b),
+                )
+                .unwrap()
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f32 as SIMDOptmized>::euclidean_distance(black_box(&a), black_box(&b)).unwrap()
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_euclidean_distance_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("euclidean_distance_f64");
+
+    for size in [64, 256, 1024, 4096].iter() {
+        let (a, b) = generate_vectors_f64(*size);
+
+        group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f64 as simd_metrics_old::SIMDOptmizedOld>::euclidean_distance(
+                    black_box(&a),
+                    black_box(&b),
+                )
+                .unwrap()
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f64 as SIMDOptmized>::euclidean_distance(black_box(&a), black_box(&b)).unwrap()
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_manhattan_distance_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("manhattan_distance_f32");
+
+    for size in [64, 256, 1024, 4096].iter() {
+        let (a, b) = generate_vectors_f32(*size);
+
+        group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f32 as simd_metrics_old::SIMDOptmizedOld>::manhattan_distance(
+                    black_box(&a),
+                    black_box(&b),
+                )
+                .unwrap()
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f32 as SIMDOptmized>::manhattan_distance(black_box(&a), black_box(&b)).unwrap()
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_manhattan_distance_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("manhattan_distance_f64");
+
+    for size in [64, 256, 1024, 4096].iter() {
+        let (a, b) = generate_vectors_f64(*size);
+
+        group.bench_with_input(BenchmarkId::new("old", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f64 as simd_metrics_old::SIMDOptmizedOld>::manhattan_distance(
+                    black_box(&a),
+                    black_box(&b),
+                )
+                .unwrap()
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("new", size), size, |bencher, _| {
+            bencher.iter(|| {
+                <f64 as SIMDOptmized>::manhattan_distance(black_box(&a), black_box(&b)).unwrap()
+            });
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_dot_product_f32,
+    bench_dot_product_f64,
+    bench_euclidean_distance_f32,
+    bench_euclidean_distance_f64,
+    bench_manhattan_distance_f32,
+    bench_manhattan_distance_f64
+);
+criterion_main!(benches);
diff --git a/src/bin/check_simd.rs b/src/bin/check_simd.rs
@@ -0,0 +1,6 @@
+use oramacore_lib::data_structures::hnsw::IS_SIMD_SUPPORTED;
+
+fn main() {
+    let is_supported = &*IS_SIMD_SUPPORTED;
+    println!("IS_SIMD_SUPPORTED: {is_supported}");
+}
diff --git a/src/data_structures/hnsw.rs b/src/data_structures/hnsw.rs
@@ -9,6 +9,7 @@
 use std::hash::Hash;
 use std::mem;
 use std::ops::{Deref, Index};
+use std::sync::LazyLock;
 
 use ordered_float::OrderedFloat;
 use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
@@ -25,6 +26,53 @@ use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterato
 
 use serde_big_array::BigArray;
 
+/// Static boolean indicating whether SIMD instructions are supported on this platform.
+/// This is initialized once at runtime and cached for all future accesses.
+/// Returns `true` if AVX, AVX2, AVX512, NEON, or other SIMD instructions are available.
+/// Returns `false` if only scalar operations are available.
+///
+/// The detection checks the target architecture:
+/// - x86_64: Always true (SSE2 minimum standard)
+/// - aarch64: Always true (NEON standard)
+/// - x86: True if SSE2 is available
+/// - arm with NEON: True
+/// - Other architectures: False (conservative)
+pub static IS_SIMD_SUPPORTED: LazyLock<bool> = LazyLock::new(|| {
+    #[cfg(target_arch = "x86_64")]
+    {
+        // x86_64 always has at least SSE2, so SIMD is available
+        true
+    }
+
+    #[cfg(target_arch = "x86")]
+    {
+        // x86 may or may not have SSE2, check for it
+        cfg!(target_feature = "sse2")
+    }
+
+    #[cfg(target_arch = "aarch64")]
+    {
+        // aarch64 always has NEON
+        true
+    }
+
+    #[cfg(all(target_arch = "arm", target_feature = "neon"))]
+    {
+        true
+    }
+
+    #[cfg(not(any(
+        target_arch = "x86_64",
+        target_arch = "x86",
+        target_arch = "aarch64",
+        all(target_arch = "arm", target_feature = "neon")
+    )))]
+    {
+        // For other architectures, we conservatively report no SIMD
+        false
+    }
+});
+
 #[derive(Clone)]
 /// Parameters for building the `Hnsw`
 pub struct Builder {
@@ -1159,6 +1207,19 @@ mod tests {
         assert_eq!(closest_point_1.value, closest_point_2.value);
     }
 
+    #[test]
+    fn test_simd_supported() {
+        // Test that the IS_SIMD_SUPPORTED static is accessible
+        let supported = *IS_SIMD_SUPPORTED;
+
+        // On x86_64 and aarch64, SIMD should be supported
+        #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
+        assert!(supported, "SIMD should be supported on x86_64 and aarch64");
+
+        // Just verify it's a valid boolean (this always passes, but shows usage)
+        assert!(supported == true || supported == false);
+    }
+
     #[test]
     #[allow(clippy::float_cmp, clippy::approx_constant)]
     fn incremental_insert() {

diff --git a/src/hnsw2/core/mod.rs b/src/hnsw2/core/mod.rs
@@ -6,3 +6,4 @@ pub mod metrics;
 pub mod neighbor;
 pub mod node;
 pub mod simd_metrics;
+pub mod simd_metrics_old;