searchSimilar method

List<ContentChunkModel> searchSimilar(
  1. List<double> queryEmbedding, {
  2. String? topicId,
  3. int limit = 8,
  4. double threshold = 0.3,
})

Search for chunks similar to queryEmbedding within a topic.

Returns chunks sorted by similarity (most similar first).

Implementation

List<ContentChunkModel> searchSimilar(
  List<double> queryEmbedding, {
  String? topicId,
  int limit = 8,
  double threshold = 0.3,
}) {
  if (_db == null) return [];

  final queryJson = jsonEncode(queryEmbedding);

  // Use brute-force cosine distance since we're searching within a topic
  // (typically < 200 chunks). For larger datasets, consider vector index.
  String sql;
  List<Object?> params;

  if (topicId != null) {
    sql = '''
      SELECT id, topic_id, book_id, chunk_index, content,
             page_number, token_count, metadata,
             vector_distance_cosine(
               vector_as_f32(embedding),
               vector_as_f32(?)
             ) as distance
      FROM local_chunks
      WHERE topic_id = ? AND embedding IS NOT NULL
      ORDER BY distance ASC
      LIMIT ?
    ''';
    params = [queryJson, topicId, limit];
  } else {
    sql = '''
      SELECT id, topic_id, book_id, chunk_index, content,
             page_number, token_count, metadata,
             vector_distance_cosine(
               vector_as_f32(embedding),
               vector_as_f32(?)
             ) as distance
      FROM local_chunks
      WHERE embedding IS NOT NULL
      ORDER BY distance ASC
      LIMIT ?
    ''';
    params = [queryJson, limit];
  }

  final results = _db!.select(sql, params);

  return results
      .where((row) {
        // Cosine distance: 0 = identical, 2 = opposite.
        // Convert to similarity: 1 - distance.
        final distance = (row['distance'] as num).toDouble();
        return (1.0 - distance) >= threshold;
      })
      .map(
        (row) => ContentChunkModel(
          id: row['id'] as String,
          topicId: row['topic_id'] as String,
          bookId: row['book_id'] as String,
          chunkIndex: row['chunk_index'] as int,
          content: row['content'] as String,
          pageNumber: row['page_number'] as int?,
          tokenCount: row['token_count'] as int?,
          metadata: _parseMetadata(row['metadata']),
        ),
      )
      .toList();
}