v0.1.3 • View on npm
Intelligent LLM response caching using semantic similarity matching.
npm install @neural-tools/semantic-cache
import { SemanticCache } from '@neural-tools/semantic-cache';
const cache = new SemanticCache({
similarityThreshold: 0.9,
ttl: 3600 // 1 hour
});
// Cache an LLM response
await cache.set(
'What is the capital of France?',
'The capital of France is Paris.'
);
// Retrieve similar queries
const cached = await cache.get(
'Tell me the French capital'
);
if (cached) {
console.log('Cache hit:', cached);
} else {
// Call your LLM
const response = await llm.generate(query);
await cache.set(query, response);
}
import OpenAI from 'openai';
import { SemanticCache } from '@neural-tools/semantic-cache';
const openai = new OpenAI();
const cache = new SemanticCache();
async function getChatResponse(message) {
// Check cache first
const cached = await cache.get(message);
if (cached) return cached;
// Call API if not cached
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: message }]
});
const content = response.choices[0].message.content;
await cache.set(message, content);
return content;
}
Semantic caching uses vector embeddings to match similar queries:
const cache = new SemanticCache({
// Minimum similarity score (0-1)
similarityThreshold: 0.9,
// Cache TTL in seconds
ttl: 3600,
// Vector database provider
vectorDB: 'pinecone', // or 'qdrant', 'chroma', 'local'
// Embedding model
embeddingModel: 'text-embedding-3-small'
});