-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathknowledge-base-module.js
More file actions
357 lines (292 loc) · 10.9 KB
/
Copy pathknowledge-base-module.js
File metadata and controls
357 lines (292 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
/**
* 知识库功能模块
* 功能:文档管理、向量存储、语义检索
*/
// ================= 知识库数据库管理 =================
let kbDB = null;
const DB_NAME = 'SMAW_KnowledgeBase';
const DB_VERSION = 1;
// 初始化知识库数据库
async function initKnowledgeBaseDB() {
return new Promise((resolve, reject) => {
const request = indexedDB.open(DB_NAME, DB_VERSION);
request.onerror = (event) => {
console.error('知识库数据库打开失败:', event.target.error);
reject(event.target.error);
};
request.onsuccess = (event) => {
kbDB = event.target.result;
console.log('知识库数据库初始化成功');
resolve(kbDB);
};
request.onupgradeneeded = (event) => {
const db = event.target.result;
// 创建文档存储
if (!db.objectStoreNames.contains('documents')) {
const documentStore = db.createObjectStore('documents', { keyPath: 'id' });
documentStore.createIndex('uploadTime', 'uploadTime', { unique: false });
}
// 创建向量存储
if (!db.objectStoreNames.contains('chunks')) {
const chunkStore = db.createObjectStore('chunks', { keyPath: 'id' });
chunkStore.createIndex('documentId', 'documentId', { unique: false });
}
};
});
}
// ================= 文本分块 =================
function chunkText(text, chunkSize = 512, overlap = 100) {
const chunks = [];
let start = 0;
while (start < text.length) {
let end = start + chunkSize;
// 尝试在句子边界处分割
if (end < text.length) {
const lastSentenceEnd = Math.max(
text.lastIndexOf('。', end),
text.lastIndexOf('!', end),
text.lastIndexOf('?', end),
text.lastIndexOf(';', end),
text.lastIndexOf('\n', end)
);
if (lastSentenceEnd > start + chunkSize / 2) {
end = lastSentenceEnd + 1;
}
}
chunks.push(text.slice(start, end));
start = end - overlap;
}
return chunks;
}
// ================= 文档处理 =================
async function uploadDocument(file, embeddingConfig) {
if (!kbDB) {
await initKnowledgeBaseDB();
}
// 读取文件内容
const content = await file.text();
// 创建文档记录
const docId = 'doc_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
const document = {
id: docId,
name: file.name,
size: file.size,
type: file.type,
content: content,
uploadTime: Date.now(),
chunkCount: 0
};
// 分块处理
const chunks = chunkText(content, 512, 100);
document.chunkCount = chunks.length;
// 存储文档
const transaction = kbDB.transaction(['documents'], 'readwrite');
const documentStore = transaction.objectStore('documents');
documentStore.add(document);
// 批量生成嵌入向量并存储
for (let i = 0; i < chunks.length; i++) {
const chunkId = docId + '_chunk_' + i;
try {
// 生成嵌入向量
const embedding = await generateEmbedding(chunks[i], embeddingConfig);
const chunk = {
id: chunkId,
documentId: docId,
index: i,
content: chunks[i],
embedding: embedding,
createdAt: Date.now()
};
const chunkTransaction = kbDB.transaction(['chunks'], 'readwrite');
const chunkStore = chunkTransaction.objectStore('chunks');
chunkStore.add(chunk);
await new Promise((resolve, reject) => {
chunkTransaction.oncomplete = resolve;
chunkTransaction.onerror = reject;
});
} catch (err) {
console.error(`分块 ${i} 向量化失败:`, err);
// 继续处理其他分块
}
}
return document;
}
// ================= 嵌入API调用 =================
async function generateEmbedding(text, config) {
if (!config || !config.apiKey) {
throw new Error('嵌入API未配置');
}
const response = await fetch(config.baseUrl + '/embeddings', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${config.apiKey}`
},
body: JSON.stringify({
model: config.model,
input: text
})
});
if (!response.ok) {
const errorData = await response.json().catch(() => null);
const errorMessage = errorData?.error?.message || response.statusText;
throw new Error(`嵌入API调用失败 (${response.status}): ${errorMessage}`);
}
const data = await response.json();
return data.data[0].embedding;
}
// ================= 语义检索 =================
async function searchKnowledge(query, config, topK = 3, threshold = 0.7) {
if (!kbDB) {
await initKnowledgeBaseDB();
}
// 生成查询向量
const queryEmbedding = await generateEmbedding(query, config);
// 获取所有向量
const transaction = kbDB.transaction(['chunks'], 'readonly');
const chunkStore = transaction.objectStore('chunks');
const request = chunkStore.getAll();
const chunks = await new Promise((resolve, reject) => {
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
// 计算相似度
const results = chunks.map(chunk => {
const similarity = cosineSimilarity(queryEmbedding, chunk.embedding);
return {
...chunk,
similarity: similarity
};
});
// 过滤并排序
const filtered = results.filter(r => r.similarity >= threshold);
const sorted = filtered.sort((a, b) => b.similarity - a.similarity);
return sorted.slice(0, topK);
}
// 余弦相似度计算
function cosineSimilarity(vec1, vec2) {
if (vec1.length !== vec2.length) {
throw new Error('向量长度不匹配');
}
let dotProduct = 0;
let norm1 = 0;
let norm2 = 0;
for (let i = 0; i < vec1.length; i++) {
dotProduct += vec1[i] * vec2[i];
norm1 += vec1[i] * vec1[i];
norm2 += vec2[i] * vec2[i];
}
if (norm1 === 0 || norm2 === 0) {
return 0;
}
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
// ================= 文档管理 =================
async function getAllDocuments() {
if (!kbDB) {
await initKnowledgeBaseDB();
}
const transaction = kbDB.transaction(['documents'], 'readonly');
const documentStore = transaction.objectStore('documents');
const request = documentStore.getAll();
return new Promise((resolve, reject) => {
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
}
async function deleteDocument(docId) {
if (!kbDB) {
await initKnowledgeBaseDB();
}
// 删除文档
const docTransaction = kbDB.transaction(['documents'], 'readwrite');
const docStore = docTransaction.objectStore('documents');
docStore.delete(docId);
// 删除相关分块
const chunkTransaction = kbDB.transaction(['chunks'], 'readwrite');
const chunkStore = chunkTransaction.objectStore('chunks');
const index = chunkStore.index('documentId');
const range = IDBKeyRange.only(docId);
const request = index.openCursor(range);
const deletePromises = [];
request.onsuccess = (event) => {
const cursor = event.target.result;
if (cursor) {
deletePromises.push(cursor.delete());
cursor.continue();
}
};
await new Promise((resolve, reject) => {
chunkTransaction.oncomplete = resolve;
chunkTransaction.onerror = reject;
});
}
async function clearKnowledgeBase() {
if (!kbDB) {
await initKnowledgeBaseDB();
}
const transaction = kbDB.transaction(['documents', 'chunks'], 'readwrite');
transaction.objectStore('documents').clear();
transaction.objectStore('chunks').clear();
return new Promise((resolve, reject) => {
transaction.oncomplete = resolve;
transaction.onerror = reject;
});
}
// ================= 为智能体检索知识 =================
async function retrieveKnowledgeForAgent(query, agentId) {
try {
// 获取知识库配置
const config = JSON.parse(localStorage.getItem('smaw_config') || '{}');
const kbConfig = config.knowledgeBase || {};
if (!kbConfig.enabled || !kbConfig.baseUrl || !kbConfig.apiKey) {
console.log('知识库未启用或配置不完整,跳过检索');
return '';
}
// 检查是否有为该智能体配置的知识库
const agentKnowledgeConfig = kbConfig.agentKnowledge?.[agentId];
if (agentKnowledgeConfig && !agentKnowledgeConfig.enabled) {
console.log(`智能体 ${agentId} 未启用知识库检索`);
return '';
}
// 获取智能体库
const agentLibrary = JSON.parse(localStorage.getItem('smaw_agent_library') || '[]');
const agent = agentLibrary.find(a => a.id === agentId);
// 构建检索配置
const embeddingConfig = {
baseUrl: kbConfig.baseUrl,
apiKey: kbConfig.apiKey,
model: kbConfig.model || 'text-embedding-ada-002'
};
const topK = agentKnowledgeConfig?.topK || kbConfig.topK || 3;
const threshold = agentKnowledgeConfig?.threshold || kbConfig.threshold || 0.7;
// 检索知识
const results = await searchKnowledge(query, embeddingConfig, topK, threshold);
if (results.length === 0) {
console.log('未找到相关知识');
return '';
}
// 格式化检索结果
let knowledgeText = '';
results.forEach((result, index) => {
knowledgeText += `\n\n[知识片段 ${index + 1},相似度: ${(result.similarity * 100).toFixed(1)}%]\n${result.content}`;
});
console.log(`检索到 ${results.length} 条相关知识`);
return knowledgeText;
} catch (error) {
console.error('知识库检索失败:', error);
return '';
}
}
// ================= 导出模块 =================
window.KnowledgeBaseModule = {
init: initKnowledgeBaseDB,
uploadDocument,
searchKnowledge,
getAllDocuments,
deleteDocument,
clearKnowledgeBase,
retrieveKnowledgeForAgent
};
// 同时将retrieveKnowledgeForAgent函数暴露到全局作用域,以便在index.html中调用
window.retrieveKnowledgeForAgent = retrieveKnowledgeForAgent;