Documentation
¶
Overview ¶
Package search provides code search and retrieval capabilities.
Package search provides code search and retrieval capabilities.
Index ¶
- func EuclideanDistance(a, b Embedding) float64
- type Chunker
- type CodeDocument
- type CodeIndex
- type CodeSymbol
- type Document
- type DocumentChunk
- type Embedding
- type EmbeddingProvider
- type FixedSizeChunker
- type IndexConfig
- type IndexOption
- type OpenAIEmbeddingProvider
- type OpenAIOption
- type SearchOptions
- type SearchResponse
- type SearchResult
- type SemanticIndex
- func (idx *SemanticIndex) Add(ctx context.Context, doc *Document) error
- func (idx *SemanticIndex) AddBatch(ctx context.Context, docs []*Document) error
- func (idx *SemanticIndex) Clear()
- func (idx *SemanticIndex) Count() int
- func (idx *SemanticIndex) Get(id string) *Document
- func (idx *SemanticIndex) HybridSearch(ctx context.Context, query string, topK int, keywordWeight float64) ([]*SearchResult, error)
- func (idx *SemanticIndex) KeywordSearch(query string, topK int) []*SearchResult
- func (idx *SemanticIndex) Remove(id string)
- func (idx *SemanticIndex) Search(ctx context.Context, query string, topK int) ([]*SearchResult, error)
- func (idx *SemanticIndex) SearchByEmbedding(query Embedding, topK int) []*SearchResult
- func (idx *SemanticIndex) SearchByEmbeddingWithOptions(query Embedding, opts *SearchOptions) *SearchResponse
- func (idx *SemanticIndex) SearchWithOptions(ctx context.Context, query string, opts *SearchOptions) (*SearchResponse, error)
- type SentenceChunker
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func EuclideanDistance ¶
EuclideanDistance calculates Euclidean distance between two vectors.
Types ¶
type Chunker ¶
type Chunker interface {
Chunk(ctx context.Context, doc *Document) []*DocumentChunk
}
Chunker splits documents into chunks.
type CodeDocument ¶
type CodeDocument struct {
*Document
FilePath string `json:"file_path"`
Language string `json:"language"`
Symbols []CodeSymbol `json:"symbols,omitempty"`
}
CodeDocument represents a code file for indexing.
type CodeIndex ¶
type CodeIndex struct {
*SemanticIndex
// contains filtered or unexported fields
}
CodeIndex indexes code files for semantic search.
func NewCodeIndex ¶
func NewCodeIndex(embedder EmbeddingProvider, opts ...IndexOption) *CodeIndex
NewCodeIndex creates a new code index.
func (*CodeIndex) SearchCode ¶
func (idx *CodeIndex) SearchCode(ctx context.Context, query string, topK int, language string) ([]*SearchResult, error)
SearchCode searches code with optional language filter.
type CodeSymbol ¶
type CodeSymbol struct {
Name string `json:"name"`
Kind string `json:"kind"` // "function", "class", "method", etc.
StartLine int `json:"start_line"`
EndLine int `json:"end_line"`
Signature string `json:"signature,omitempty"`
}
CodeSymbol represents a code symbol (function, class, etc.)
type Document ¶
type Document struct {
ID string `json:"id"`
Content string `json:"content"`
Metadata map[string]any `json:"metadata,omitempty"`
Embedding Embedding `json:"embedding,omitempty"`
Chunks []*DocumentChunk `json:"chunks,omitempty"`
}
Document represents a searchable document.
type DocumentChunk ¶
type DocumentChunk struct {
ID string `json:"id"`
DocumentID string `json:"document_id"`
Content string `json:"content"`
StartPos int `json:"start_pos"`
EndPos int `json:"end_pos"`
Embedding Embedding `json:"embedding,omitempty"`
}
DocumentChunk represents a chunk of a document.
type EmbeddingProvider ¶
type EmbeddingProvider interface {
// Embed generates an embedding for the given text.
Embed(ctx context.Context, text string) (Embedding, error)
// EmbedBatch generates embeddings for multiple texts.
EmbedBatch(ctx context.Context, texts []string) ([]Embedding, error)
// Dimension returns the embedding dimension.
Dimension() int
}
EmbeddingProvider generates embeddings for text.
type FixedSizeChunker ¶
FixedSizeChunker splits by character count.
func NewFixedSizeChunker ¶
func NewFixedSizeChunker(size, overlap int) *FixedSizeChunker
NewFixedSizeChunker creates a fixed size chunker.
func (*FixedSizeChunker) Chunk ¶
func (c *FixedSizeChunker) Chunk(ctx context.Context, doc *Document) []*DocumentChunk
Chunk splits a document into fixed-size chunks.
type IndexConfig ¶
IndexConfig configures the semantic index.
func DefaultIndexConfig ¶
func DefaultIndexConfig() *IndexConfig
DefaultIndexConfig returns sensible defaults.
type IndexOption ¶
type IndexOption func(*SemanticIndex)
IndexOption configures a SemanticIndex.
func WithMaxEntries ¶
func WithMaxEntries(max int) IndexOption
WithMaxEntries sets the maximum number of documents before LRU eviction. Default is 0 (unlimited). Recommended: 100000 for memory-constrained environments.
type OpenAIEmbeddingProvider ¶
type OpenAIEmbeddingProvider struct {
// contains filtered or unexported fields
}
OpenAIEmbeddingProvider generates embeddings using OpenAI's API.
func NewOpenAIEmbeddingProvider ¶
func NewOpenAIEmbeddingProvider(apiKey string, opts ...OpenAIOption) (*OpenAIEmbeddingProvider, error)
NewOpenAIEmbeddingProvider creates a new OpenAI embedding provider. Reads OPENAI_API_KEY from environment if apiKey is empty.
func (*OpenAIEmbeddingProvider) Dimension ¶
func (p *OpenAIEmbeddingProvider) Dimension() int
Dimension returns the embedding dimension.
func (*OpenAIEmbeddingProvider) EmbedBatch ¶
func (p *OpenAIEmbeddingProvider) EmbedBatch(ctx context.Context, texts []string) ([]Embedding, error)
EmbedBatch generates embeddings for multiple texts. Automatically batches requests to respect OpenAI's limits.
type OpenAIOption ¶
type OpenAIOption func(*OpenAIEmbeddingProvider)
OpenAIOption configures the OpenAI embedding provider.
func WithOpenAIBatchSize ¶
func WithOpenAIBatchSize(size int) OpenAIOption
WithOpenAIBatchSize sets the batch size for embedding requests. Default is 2048 (OpenAI max).
func WithOpenAIDimension ¶
func WithOpenAIDimension(dim int) OpenAIOption
WithOpenAIDimension sets the embedding dimension. Only applicable to text-embedding-3-* models.
func WithOpenAIHTTPClient ¶
func WithOpenAIHTTPClient(client *http.Client) OpenAIOption
WithOpenAIHTTPClient sets a custom HTTP client.
func WithOpenAIModel ¶
func WithOpenAIModel(model string) OpenAIOption
WithOpenAIModel sets the embedding model. Default is "text-embedding-3-small".
type SearchOptions ¶
type SearchOptions struct {
// Offset skips the first N results (for pagination).
Offset int
// Limit caps the number of returned results.
// If zero, uses the topK parameter as limit.
Limit int
}
SearchOptions configures search behavior.
type SearchResponse ¶
type SearchResponse struct {
Results []*SearchResult `json:"results"`
TotalCount int `json:"total_count"`
Offset int `json:"offset"`
Limit int `json:"limit"`
}
SearchResponse wraps search results with pagination info.
type SearchResult ¶
type SearchResult struct {
Document *Document `json:"document"`
Chunk *DocumentChunk `json:"chunk,omitempty"`
Score float64 `json:"score"`
Highlights []string `json:"highlights,omitempty"`
}
SearchResult represents a search result.
type SemanticIndex ¶
type SemanticIndex struct {
// contains filtered or unexported fields
}
SemanticIndex stores documents with embeddings for search.
func NewSemanticIndex ¶
func NewSemanticIndex(embedder EmbeddingProvider, chunker Chunker, opts ...IndexOption) *SemanticIndex
NewSemanticIndex creates a new semantic index.
func (*SemanticIndex) Add ¶
func (idx *SemanticIndex) Add(ctx context.Context, doc *Document) error
Add adds a document to the index.
func (*SemanticIndex) AddBatch ¶
func (idx *SemanticIndex) AddBatch(ctx context.Context, docs []*Document) error
AddBatch adds multiple documents.
func (*SemanticIndex) Count ¶
func (idx *SemanticIndex) Count() int
Count returns the number of indexed documents.
func (*SemanticIndex) Get ¶
func (idx *SemanticIndex) Get(id string) *Document
Get retrieves a document by ID and updates its LRU position.
func (*SemanticIndex) HybridSearch ¶
func (idx *SemanticIndex) HybridSearch(ctx context.Context, query string, topK int, keywordWeight float64) ([]*SearchResult, error)
HybridSearch combines semantic and keyword search.
func (*SemanticIndex) KeywordSearch ¶
func (idx *SemanticIndex) KeywordSearch(query string, topK int) []*SearchResult
KeywordSearch performs basic keyword matching.
func (*SemanticIndex) Remove ¶
func (idx *SemanticIndex) Remove(id string)
Remove removes a document from the index.
func (*SemanticIndex) Search ¶
func (idx *SemanticIndex) Search(ctx context.Context, query string, topK int) ([]*SearchResult, error)
Search performs semantic search.
func (*SemanticIndex) SearchByEmbedding ¶
func (idx *SemanticIndex) SearchByEmbedding(query Embedding, topK int) []*SearchResult
SearchByEmbedding searches by embedding vector.
func (*SemanticIndex) SearchByEmbeddingWithOptions ¶
func (idx *SemanticIndex) SearchByEmbeddingWithOptions(query Embedding, opts *SearchOptions) *SearchResponse
SearchByEmbeddingWithOptions searches by embedding vector with pagination.
func (*SemanticIndex) SearchWithOptions ¶
func (idx *SemanticIndex) SearchWithOptions(ctx context.Context, query string, opts *SearchOptions) (*SearchResponse, error)
SearchWithOptions performs semantic search with pagination.
type SentenceChunker ¶
SentenceChunker splits by sentences.
func NewSentenceChunker ¶
func NewSentenceChunker(maxSentences, overlap int) *SentenceChunker
NewSentenceChunker creates a sentence-based chunker.
func (*SentenceChunker) Chunk ¶
func (c *SentenceChunker) Chunk(ctx context.Context, doc *Document) []*DocumentChunk
Chunk splits a document by sentences.