searchSimilar method
Search for chunks similar to queryEmbedding within a topic.
Returns chunks sorted by similarity (most similar first).
Implementation
List<ContentChunkModel> searchSimilar(
List<double> queryEmbedding, {
String? topicId,
int limit = 8,
double threshold = 0.3,
}) {
if (_db == null) return [];
final queryJson = jsonEncode(queryEmbedding);
// Use brute-force cosine distance since we're searching within a topic
// (typically < 200 chunks). For larger datasets, consider vector index.
String sql;
List<Object?> params;
if (topicId != null) {
sql = '''
SELECT id, topic_id, book_id, chunk_index, content,
page_number, token_count, metadata,
vector_distance_cosine(
vector_as_f32(embedding),
vector_as_f32(?)
) as distance
FROM local_chunks
WHERE topic_id = ? AND embedding IS NOT NULL
ORDER BY distance ASC
LIMIT ?
''';
params = [queryJson, topicId, limit];
} else {
sql = '''
SELECT id, topic_id, book_id, chunk_index, content,
page_number, token_count, metadata,
vector_distance_cosine(
vector_as_f32(embedding),
vector_as_f32(?)
) as distance
FROM local_chunks
WHERE embedding IS NOT NULL
ORDER BY distance ASC
LIMIT ?
''';
params = [queryJson, limit];
}
final results = _db!.select(sql, params);
return results
.where((row) {
// Cosine distance: 0 = identical, 2 = opposite.
// Convert to similarity: 1 - distance.
final distance = (row['distance'] as num).toDouble();
return (1.0 - distance) >= threshold;
})
.map(
(row) => ContentChunkModel(
id: row['id'] as String,
topicId: row['topic_id'] as String,
bookId: row['book_id'] as String,
chunkIndex: row['chunk_index'] as int,
content: row['content'] as String,
pageNumber: row['page_number'] as int?,
tokenCount: row['token_count'] as int?,
metadata: _parseMetadata(row['metadata']),
),
)
.toList();
}