-
Notifications
You must be signed in to change notification settings - Fork 311
Expand file tree
/
Copy pathsemantic_search.js
More file actions
150 lines (133 loc) · 4.4 KB
/
semantic_search.js
File metadata and controls
150 lines (133 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/**
* Semantic Search Example (Node.js)
*
* Demonstrates building a semantic search system with Ruvector
*/
const { VectorDB } = require('ruvector');
// Mock embedding function (in production, use a real embedding model)
function mockEmbedding(text, dims = 384) {
// Simple hash-based mock embedding
let hash = 0;
for (let i = 0; i < text.length; i++) {
hash = ((hash << 5) - hash) + text.charCodeAt(i);
hash = hash & hash;
}
const embedding = new Float32Array(dims);
for (let i = 0; i < dims; i++) {
embedding[i] = Math.sin((hash + i) * 0.01);
}
return embedding;
}
async function main() {
console.log('🔍 Semantic Search Example\n');
// 1. Setup database
console.log('1. Setting up search index...');
const db = new VectorDB({
dimensions: 384,
storagePath: './semantic_search.db',
distanceMetric: 'cosine',
hnsw: {
m: 32,
efConstruction: 200,
efSearch: 100
}
});
console.log(' ✓ Database created\n');
// 2. Index documents
console.log('2. Indexing documents...');
const documents = [
{
id: 'doc_001',
text: 'The quick brown fox jumps over the lazy dog',
category: 'animals'
},
{
id: 'doc_002',
text: 'Machine learning is a subset of artificial intelligence',
category: 'technology'
},
{
id: 'doc_003',
text: 'Python is a popular programming language for data science',
category: 'technology'
},
{
id: 'doc_004',
text: 'The cat sat on the mat while birds sang outside',
category: 'animals'
},
{
id: 'doc_005',
text: 'Neural networks are inspired by biological neurons',
category: 'technology'
},
{
id: 'doc_006',
text: 'Dogs are loyal companions and great pets',
category: 'animals'
},
{
id: 'doc_007',
text: 'Deep learning requires large amounts of training data',
category: 'technology'
},
{
id: 'doc_008',
text: 'Birds migrate south during winter months',
category: 'animals'
}
];
const entries = documents.map(doc => ({
id: doc.id,
vector: mockEmbedding(doc.text),
metadata: {
text: doc.text,
category: doc.category
}
}));
await db.insertBatch(entries);
console.log(` ✓ Indexed ${documents.length} documents\n`);
// 3. Perform semantic searches
const queries = [
'artificial intelligence and neural networks',
'pets and domestic animals',
'programming and software development'
];
for (const query of queries) {
console.log(`Query: "${query}"`);
console.log('─'.repeat(60));
const queryEmbedding = mockEmbedding(query);
const results = await db.search({
vector: queryEmbedding,
k: 3,
includeMetadata: true
});
results.forEach((result, i) => {
console.log(`${i + 1}. ${result.metadata.text}`);
console.log(` Category: ${result.metadata.category}, Similarity: ${(1 - result.distance).toFixed(4)}`);
});
console.log();
}
// 4. Filtered semantic search
console.log('Filtered search (category: technology)');
console.log('─'.repeat(60));
const techQuery = mockEmbedding('computers and algorithms');
const filteredResults = await db.search({
vector: techQuery,
k: 3,
filter: { category: 'technology' },
includeMetadata: true
});
filteredResults.forEach((result, i) => {
console.log(`${i + 1}. ${result.metadata.text}`);
console.log(` Similarity: ${(1 - result.distance).toFixed(4)}`);
});
console.log();
console.log('✅ Semantic search example completed!');
console.log('\n💡 In production:');
console.log(' • Use a real embedding model (OpenAI, Sentence Transformers, etc.)');
console.log(' • Add more documents to your knowledge base');
console.log(' • Implement filters for category, date, author, etc.');
console.log(' • Add hybrid search (vector + keyword) for better results');
}
main().catch(console.error);