Skip to content

feat: add embeddings service for codebase analysis and similarity search #26

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jun 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c712672
feat: add embeddings service for codebase analysis and similarity search
ChiragAgg5k Jun 6, 2025
5bfe7cf
chore: export embeddings
ChiragAgg5k Jun 9, 2025
86beb9a
feat: lazy load transformers pipeline in embeddings service
ChiragAgg5k Jun 9, 2025
00287cf
feat: integrate OpenAI API for embeddings generation and update envir…
ChiragAgg5k Jun 9, 2025
28687bd
feat: enhance embeddings service with result status and message for e…
ChiragAgg5k Jun 9, 2025
60442fc
feat: add message to findRelevantDocuments for better error handling …
ChiragAgg5k Jun 9, 2025
e73ec0d
feat: switch to local embedding model using Hugging Face Transformers…
ChiragAgg5k Jun 9, 2025
9fea92a
refactor: remove environment configuration for remote models in embed…
ChiragAgg5k Jun 9, 2025
490c1a3
fix: update default model name in embeddings service to use sentence-…
ChiragAgg5k Jun 9, 2025
db8c54b
fix: correct model name in embeddings tests to match sentence-transfo…
ChiragAgg5k Jun 9, 2025
6e4565a
fix: update default model name in embeddings service and tests to use…
ChiragAgg5k Jun 9, 2025
91e2476
refactor: remove dtype configuration for embedding pipeline in embedd…
ChiragAgg5k Jun 9, 2025
40b19ba
feat: add performance logging to embedding generation process in embe…
ChiragAgg5k Jun 9, 2025
2bca058
refactor: remove .env and .env.example files and enhance directory ig…
ChiragAgg5k Jun 10, 2025
7d669f1
feat: implement embedding adapters for Hugging Face and OpenAI, enhan…
ChiragAgg5k Jun 10, 2025
51e7de5
feat: auto embeddings
ChiragAgg5k Jun 12, 2025
f8325ca
Merge pull request #27 from appwrite/auto-embed
ChiragAgg5k Jun 13, 2025
6c56863
chore: update tests
ChiragAgg5k Jun 13, 2025
a61e110
chore: update fn names
ChiragAgg5k Jun 13, 2025
3cdd467
chore: add updateworkdir
ChiragAgg5k Jun 13, 2025
cb57150
chore: update error messages and use cleanup
ChiragAgg5k Jun 13, 2025
a58b8c8
chore: prevent initializing again
ChiragAgg5k Jun 13, 2025
a3da873
chore: fix initialize model
ChiragAgg5k Jun 13, 2025
bfbf377
chore: update error returned
ChiragAgg5k Jun 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
939 changes: 933 additions & 6 deletions package-lock.json

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
},
"license": "MIT",
"dependencies": {
"@huggingface/transformers": "^3.5.2",
"archiver": "^7.0.1",
"chokidar": "^4.0.3",
"eslint": "^9.27.0",
"ignore": "^7.0.4",
"mime-types": "^3.0.1",
"node-appwrite": "17.0.0",
"node-pty": "^1.0.0",
"prettier": "^3.5.3",
"ws": "^8.18.2",
"node-appwrite": "17.0.0"
"ws": "^8.18.2"
},
"devDependencies": {
"@types/archiver": "^6.0.3",
Expand Down
41 changes: 41 additions & 0 deletions src/adapters/embeddings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
export interface EmbeddingConfig {
[key: string]: any;
}

export abstract class EmbeddingAdapter {
protected config: EmbeddingConfig;
public isInitializing: boolean = false;

constructor(config: EmbeddingConfig = {}) {
this.config = config;
}

/**
* Initialize the embedding model/service
*/
abstract initialize(): Promise<void>;

/**
* Generate embeddings for the given text
* @param text The text to generate embeddings for
* @returns Promise<number[]> The embedding vector
*/
abstract generateEmbedding(text: string): Promise<number[]>;

/**
* Get the name/identifier of this adapter
*/
abstract getName(): string;

/**
* Check if the adapter is initialized and ready to use
*/
abstract isInitialized(): boolean;

/**
* Clean up resources when done
*/
async cleanup(): Promise<void> {
// Default implementation - can be overridden by specific adapters
}
}
74 changes: 74 additions & 0 deletions src/adapters/embeddings/huggingface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import {
pipeline,
type FeatureExtractionPipeline,
} from "@huggingface/transformers";
import { EmbeddingAdapter, EmbeddingConfig } from "../embeddings";

export interface HuggingFaceConfig extends EmbeddingConfig {
modelName?: string;
pooling?: "mean" | "none" | "cls" | undefined;
normalize?: boolean;
}

export class HuggingFaceEmbeddingAdapter extends EmbeddingAdapter {
private pipeline: FeatureExtractionPipeline | null = null;
private modelName: string;
private pooling: "mean" | "none" | "cls" | undefined;
private normalize: boolean;

constructor(config: HuggingFaceConfig = {}) {
super(config);
this.modelName = config.modelName || "jinaai/jina-embeddings-v2-base-code";
this.pooling = config.pooling || "mean";
this.normalize = config.normalize !== false; // default to true
}

async initialize(): Promise<void> {
if (!this.pipeline && !this.isInitializing) {
this.isInitializing = true;
console.log(`[HuggingFace] Initializing model: ${this.modelName}...`);
try {
this.pipeline = await pipeline("feature-extraction", this.modelName);
console.log("[HuggingFace] Model initialized successfully");
} catch (error) {
console.error(`[HuggingFace] Error initializing model: ${error}`);
throw error;
}
}
this.isInitializing = false;
}

async generateEmbedding(text: string): Promise<number[]> {
if (!this.pipeline) {
throw new Error(
"HuggingFace adapter not initialized. Call initialize() first.",
);
}

try {
const output = await this.pipeline(text, {
pooling: this.pooling,
normalize: this.normalize,
});

// Convert tensor to array if needed
return Array.from(output.data);
} catch (error) {
console.error(`[HuggingFace] Error generating embedding: ${error}`);
throw error;
}
}

getName(): string {
return `HuggingFace (${this.modelName})`;
}

isInitialized(): boolean {
return this.pipeline !== null;
}

async cleanup(): Promise<void> {
this.pipeline = null;
console.log("[HuggingFace] Adapter cleaned up");
}
}
95 changes: 95 additions & 0 deletions src/adapters/embeddings/openai.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { EmbeddingAdapter, EmbeddingConfig } from "../embeddings";

export interface OpenAIConfig extends EmbeddingConfig {
apiKey: string;
model?: string;
baseURL?: string;
}

export class OpenAIEmbeddingAdapter extends EmbeddingAdapter {
private apiKey: string;
private model: string;
private baseURL: string;
private initialized: boolean = false;

constructor(config: OpenAIConfig) {
super(config);
if (!config.apiKey) {
throw new Error("OpenAI API key is required");
}
this.apiKey = config.apiKey;
this.model = config.model || "text-embedding-3-small";
this.baseURL = config.baseURL || "https://api.openai.com/v1";
}

async initialize(): Promise<void> {
console.log(`[OpenAI] Initializing with model: ${this.model}...`);
this.isInitializing = true;
if (!this.apiKey.startsWith("sk-")) {
console.warn(
"[OpenAI] API key doesn't start with 'sk-', this might cause issues",
);
}
this.initialized = true;
this.isInitializing = false;
console.log("[OpenAI] Adapter initialized successfully");
}

async generateEmbedding(text: string): Promise<number[]> {
if (!this.initialized) {
throw new Error(
"OpenAI adapter not initialized. Call initialize() first.",
);
}

if (this.isInitializing) {
throw new Error(
"OpenAI adapter is initializing. Please wait a moment and try again.",
);
}

try {
const response = await fetch(`${this.baseURL}/embeddings`, {
method: "POST",
headers: {
Authorization: `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
input: text,
model: this.model,
}),
});

if (!response.ok) {
throw new Error(
`OpenAI API error: ${response.status} ${response.statusText}`,
);
}

const data = (await response.json()) as any;

if (!data.data || !data.data[0] || !data.data[0].embedding) {
throw new Error("Invalid response format from OpenAI API");
}

return data.data[0].embedding;
} catch (error) {
console.error(`[OpenAI] Error generating embedding: ${error}`);
throw error;
}
}

getName(): string {
return `OpenAI (${this.model})`;
}

isInitialized(): boolean {
return this.initialized;
}

async cleanup(): Promise<void> {
this.initialized = false;
console.log("[OpenAI] Adapter cleaned up");
}
}
6 changes: 6 additions & 0 deletions src/adapters/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export { EmbeddingAdapter } from "./embeddings";
export type { EmbeddingConfig } from "./embeddings";
export { HuggingFaceEmbeddingAdapter } from "./embeddings/huggingface";
export type { HuggingFaceConfig } from "./embeddings/huggingface";
export { OpenAIEmbeddingAdapter } from "./embeddings/openai";
export type { OpenAIConfig } from "./embeddings/openai";
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
export { Appwrite } from "./services/appwrite";
export { Code } from "./services/code";
export { Embeddings } from "./services/embeddings";
export { Filesystem } from "./services/filesystem";
export { Git } from "./services/git";
export { System } from "./services/system";
export { Terminal } from "./services/terminal";
export { Synapse } from "./synapse";
export { Appwrite } from "./services/appwrite";
Loading