1- From 453e52a763043e95b23c88176792e065377189ad Mon Sep 17 00:00:00 2001
2- From: Charles Xu <chaxu01@e125126. arm.com>
3- Date: Tue, 9 Jul 2024 08:49:27 +0200
4- Subject: [PATCH] Updated to be able to build on Linux
1+ From 617486784d5394fbb54f4d99a4860a050318a4e8 Mon Sep 17 00:00:00 2001
2+ From: Gian Marco Iodice <gianmarco.iodice@ arm.com>
3+ Date: Tue, 16 Jul 2024 17:28:50 +0100
4+ Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
55
6+ - Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
7+ repository
8+ - Implement a KleidiAI backend for llama.cpp
9+
10+ Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
611---
712 CMakeLists.txt | 48 ++++
813 ggml-alloc.c | 13 ++
@@ -15,7 +20,7 @@ Subject: [PATCH] Updated to be able to build on Linux
1520 create mode 100644 ggml-kleidiai.h
1621
1722diff --git a/CMakeLists.txt b/CMakeLists.txt
18- index 08481334..99382573 100644
23+ index 08481334..22504ad2 100644
1924--- a/CMakeLists.txt
2025+++ b/CMakeLists.txt
2126@@ -548,6 +548,53 @@ if (LLAMA_VULKAN)
@@ -26,9 +31,9 @@ index 08481334..99382573 100644
2631+
2732+ # Fetch KleidiAI sources:
2833+ include(FetchContent)
29- + set(KLEIDIAI_COMMIT_SHA "d6c3b987e445e5e1daeda94e3c2888efaa07ca50 ")
34+ + set(KLEIDIAI_COMMIT_SHA "187d9aacddfb678c09f0831b18f87401b1b353c3 ")
3035+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
31- + set(KLEIDIAI_ARCHIVE_MD5 "8e94e73bfa00ea038fd6e3d13f59080f ")
36+ + set(KLEIDIAI_ARCHIVE_MD5 "4a1eee013cb20464b534cb01212d19c9 ")
3237+
3338+ if (POLICY CMP0135)
3439+ cmake_policy(SET CMP0135 NEW)
@@ -113,7 +118,7 @@ index bd367c42..ed4ce0ae 100644
113118 if (this_size > max_size) {
114119diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
115120new file mode 100644
116- index 00000000..aa53086d
121+ index 00000000..6800f63e
117122--- /dev/null
118123+++ b/ggml-kleidiai.cpp
119124@@ -0,0 +1,560 @@
@@ -171,8 +176,8 @@ index 00000000..aa53086d
171176+ static bool g_kai_loaded = false;
172177+
173178+ // Basic backend memory allocator
174- + static uint8_t* extra_mem [MAX_EXTRA_BUFFERS];
175- + static int32_t extra_mem_idx = 0;
179+ + static uint8_t* g_extra_mem [MAX_EXTRA_BUFFERS];
180+ + static int32_t g_extra_mem_idx = 0;
176181+
177182+ typedef void (*kai_matmul_func_t)(const struct ggml_compute_params * params, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
178183+
@@ -546,7 +551,7 @@ index 00000000..aa53086d
546551+ free(reshaped_data);
547552+ cur->extra = cur->data;
548553+ #else
549- + extra_mem[extra_mem_idx ++] = reshaped_data;
554+ + g_extra_mem[g_extra_mem_idx ++] = reshaped_data;
550555+ cur->extra = reshaped_data;
551556+ #endif
552557+ } else {
@@ -671,10 +676,10 @@ index 00000000..aa53086d
671676+ }
672677+
673678+ GGML_CALL void ggml_kai_free_extra_mem(void) {
674- + for(int32_t i = extra_mem_idx - 1; i >= 0; i--) {
675- + free(extra_mem [i]);
679+ + for(int32_t i = g_extra_mem_idx - 1; i >= 0; i--) {
680+ + free(g_extra_mem [i]);
676681+ }
677- + extra_mem_idx = 0;
682+ + g_extra_mem_idx = 0;
678683+ }
679684+ #endif // defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
680685diff --git a/ggml-kleidiai.h b/ggml-kleidiai.h
@@ -840,5 +845,5 @@ index 05591aa4..735dde04 100644
840845 }
841846
842847- -
843- 2.34 .1
848+ 2.25 .1
844849
0 commit comments