diff --git a/package-lock.json b/package-lock.json
index 664cf80..538618a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,7 +10,9 @@
       "license": "MIT",
       "dependencies": {
         "@huggingface/transformers": "^3.8.1",
+        "hdbscan-ts": "^1.0.17",
         "js-tiktoken": "^1.0.21",
+        "umap-js": "^1.4.0",
         "vectra": "^0.12.3"
       },
       "devDependencies": {
@@ -3072,6 +3074,14 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/hdbscan-ts": {
+      "version": "1.0.17",
+      "resolved": "https://registry.npmjs.org/hdbscan-ts/-/hdbscan-ts-1.0.17.tgz",
+      "integrity": "sha512-I44z8twblt0FxbiCNVidpQ10lAXKyFwZxRz1CKd9/53qJoDJlJamXoOJF+N1ObhY4xGr1eBUXrfYyxiX9YuIJg==",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
     "node_modules/htmlparser2": {
       "version": "10.1.0",
       "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz",
@@ -3225,6 +3235,11 @@
         "node": ">= 0.10"
       }
     },
+    "node_modules/is-any-array": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-0.1.1.tgz",
+      "integrity": "sha512-qTiELO+kpTKqPgxPYbshMERlzaFu29JDnpB8s3bjg+JkxBpw29/qqSaOdKv2pCdaG92rLGeG/zG2GauX58hfoA=="
+    },
     "node_modules/is-core-module": {
       "version": "2.16.2",
       "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.2.tgz",
@@ -3632,6 +3647,70 @@
         "node": ">=10"
       }
     },
+    "node_modules/ml-array-max": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ml-array-max/-/ml-array-max-2.0.0.tgz",
+      "integrity": "sha512-QQZ4kENwpWmyNb98UXRDFXrmtIXuXtt1+bSbda/2KA85+F+rrJP8hZk6QOkCQXM2Th9mUDYdq/PNByPdT9ID4A==",
+      "dependencies": {
+        "is-any-array": "^3.0.0"
+      }
+    },
+    "node_modules/ml-array-max/node_modules/is-any-array": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-3.0.0.tgz",
+      "integrity": "sha512-o4h+tylWykC4BD1vaejp6gDxoM13bwW8FGuNs4yIKpj8xbBJcRxJx8vZpq0dCr7ZDEfeKjmsi/euolKhX6f/ww=="
+    },
+    "node_modules/ml-array-min": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ml-array-min/-/ml-array-min-2.0.0.tgz",
+      "integrity": "sha512-GRj6Ky6sW9vGL6yIjgsHmXZ9YgrdmcQ8nCxPqEGeKc6dkfYg1XDYxGFxADUjNuZyoCd5PUscWAS4N+cFaX6hFg==",
+      "dependencies": {
+        "is-any-array": "^3.0.0"
+      }
+    },
+    "node_modules/ml-array-min/node_modules/is-any-array": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-3.0.0.tgz",
+      "integrity": "sha512-o4h+tylWykC4BD1vaejp6gDxoM13bwW8FGuNs4yIKpj8xbBJcRxJx8vZpq0dCr7ZDEfeKjmsi/euolKhX6f/ww=="
+    },
+    "node_modules/ml-array-rescale": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ml-array-rescale/-/ml-array-rescale-2.0.0.tgz",
+      "integrity": "sha512-2GGtKfSno94/kIloWGvpp/U5Q5vLvLrza+SAaGsLeo6Xj4mEbA6Gqx+oTfZFkxnd1grT2X007HfJNs3T5BsiVg==",
+      "dependencies": {
+        "is-any-array": "^3.0.0",
+        "ml-array-max": "^2.0.0",
+        "ml-array-min": "^2.0.0"
+      }
+    },
+    "node_modules/ml-array-rescale/node_modules/is-any-array": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-3.0.0.tgz",
+      "integrity": "sha512-o4h+tylWykC4BD1vaejp6gDxoM13bwW8FGuNs4yIKpj8xbBJcRxJx8vZpq0dCr7ZDEfeKjmsi/euolKhX6f/ww=="
+    },
+    "node_modules/ml-levenberg-marquardt": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/ml-levenberg-marquardt/-/ml-levenberg-marquardt-2.1.1.tgz",
+      "integrity": "sha512-2+HwUqew4qFFFYujYlQtmFUrxCB4iJAPqnUYro3P831wj70eJZcANwcRaIMGUVaH9NDKzfYuA4N5u67KExmaRA==",
+      "dependencies": {
+        "is-any-array": "^0.1.0",
+        "ml-matrix": "^6.4.1"
+      }
+    },
+    "node_modules/ml-matrix": {
+      "version": "6.12.2",
+      "resolved": "https://registry.npmjs.org/ml-matrix/-/ml-matrix-6.12.2.tgz",
+      "integrity": "sha512-GC+BnW+pBh8Auap8goAxY0senAmF0IEoc3HNVSfnfbvGw0buuDIYb9kAKMS1l+GiwJ1rfK2bzJ8IHhwjzATSFA==",
+      "dependencies": {
+        "is-any-array": "^3.0.0",
+        "ml-array-rescale": "^2.0.0"
+      }
+    },
+    "node_modules/ml-matrix/node_modules/is-any-array": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-3.0.0.tgz",
+      "integrity": "sha512-o4h+tylWykC4BD1vaejp6gDxoM13bwW8FGuNs4yIKpj8xbBJcRxJx8vZpq0dCr7ZDEfeKjmsi/euolKhX6f/ww=="
+    },
     "node_modules/ms": {
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -4871,6 +4950,14 @@
         "node": ">=4.2.0"
       }
     },
+    "node_modules/umap-js": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/umap-js/-/umap-js-1.4.0.tgz",
+      "integrity": "sha512-xxpviF9wUO6Nxrx+C58SoDgea+h2PnVaRPKDelWv0HotmY6BeWeh0kAPJoumfqUkzUvowGsYfMbnsWI0b9do+A==",
+      "dependencies": {
+        "ml-levenberg-marquardt": "^2.0.0"
+      }
+    },
     "node_modules/undici": {
       "version": "7.26.0",
       "resolved": "https://registry.npmjs.org/undici/-/undici-7.26.0.tgz",
diff --git a/package.json b/package.json
index 0164100..76fc5a0 100644
--- a/package.json
+++ b/package.json
@@ -37,7 +37,9 @@
   },
   "dependencies": {
     "@huggingface/transformers": "^3.8.1",
+    "hdbscan-ts": "^1.0.17",
     "js-tiktoken": "^1.0.21",
+    "umap-js": "^1.4.0",
     "vectra": "^0.12.3"
   }
 }
diff --git a/src/commands/testEmbed.ts b/src/commands/testEmbed.ts
index 0853a5a..744d9a9 100644
--- a/src/commands/testEmbed.ts
+++ b/src/commands/testEmbed.ts
@@ -1,4 +1,6 @@
 import { fetchAllNotes } from '../pipeline/noteReader';
+import { benchmark } from '../pipeline/clustering/benchmark';
+import { CategorizationConfig } from '../types/cluster';
 import { averageVectors, blendVectors, computeTitleWeight, cosineSimilarity } from '../pipeline/vectorAggregator';
 import { NoteVector, WorkerMessage } from '../types/embed';
 import { isGenericTitle } from '../utils/titleFilter';
@@ -144,7 +146,50 @@ export const runTestEmbed = async (installDir: string) => {
 			await cache.endUpdate();
 
 			worker.terminate();
-			log('Worker terminated. Test complete.');
+			log('Worker terminated. Embedding complete.');
+
+			// ── Clustering Benchmark ─────────────────────────────
+			// Edit this config to compare different algorithms and dimensions.
+			// Results are printed as a comparison table in the console.
+			const clusterConfig: CategorizationConfig = {
+				seed: 42,
+				metric: 'cosine',
+				intermediateDim: 10,
+				intermediateNeighbors: 15,
+				strategies: [
+					{ name: 'kmeans-5', algorithm: 'kmeans', K: 5 },
+					{ name: 'kmedoids-5', algorithm: 'kmedoids', K: 5 },
+					{ name: 'hdbscan-3', algorithm: 'hdbscan', minClusterSize: 3 },
+					{ name: 'hdbscan-3-ms2', algorithm: 'hdbscan', minClusterSize: 3, minSamples: 2 },
+					{ name: 'hdbscan-5-ms2', algorithm: 'hdbscan', minClusterSize: 5, minSamples: 2 },
+				],
+			};
+
+			if (noteVectors.length >= 3) {
+				const vectors = noteVectors.map((nv) => nv.vector);
+				const results = benchmark(vectors, clusterConfig);
+
+				// Log note titles per cluster for all strategies, in order (best to worst)
+				for (const res of results) {
+					log(`\nCluster assignments (${res.strategyName}):`);
+					const clusterNotes = new Map<number, string[]>();
+					for (let i = 0; i < noteVectors.length; i++) {
+						const c = res.assignments[i];
+						if (!clusterNotes.has(c)) clusterNotes.set(c, []);
+						clusterNotes.get(c)!.push(noteVectors[i].title);
+					}
+					for (const [clusterId, titles] of clusterNotes) {
+						const label = clusterId < 0 ? 'Noise/Outliers' : `Cluster ${clusterId}`;
+						log(`  ${label} (${titles.length} notes):`);
+						for (const title of titles) {
+							log(`    - ${title}`);
+						}
+					}
+				}
+			} else {
+				log('Too few notes for clustering (need at least 3).');
+			}
+
 			return;
 		}
 
diff --git a/src/pipeline/UmapProjector.ts b/src/pipeline/UmapProjector.ts
new file mode 100644
index 0000000..72a4e18
--- /dev/null
+++ b/src/pipeline/UmapProjector.ts
@@ -0,0 +1,66 @@
+import { UMAP } from 'umap-js';
+import { log } from '../utils/logger';
+import { mulberry32 } from '../utils/prng';
+import { cosineDistance, euclideanDistance } from './clustering/metrics';
+import { UmapProjectorOptions } from '../types/projector';
+
+export class UmapProjector {
+	private readonly nComponents: number;
+	private readonly nNeighbors: number;
+	private readonly minDist: number;
+	private readonly metric: 'cosine' | 'euclidean';
+	private readonly seed: number;
+
+	constructor(options: UmapProjectorOptions = {}) {
+		this.nComponents = options.nComponents ?? 2;
+		this.nNeighbors = options.nNeighbors ?? 15;
+		this.minDist = options.minDist ?? 0.1;
+		this.metric = options.metric ?? 'cosine';
+		this.seed = options.seed ?? 42;
+	}
+
+	/**
+	 * Projects high-dimensional vectors to a lower-dimensional space using UMAP.
+	 * @param vectors N vectors of dimension D (N x D)
+	 * @returns N vectors of dimension nComponents
+	 */
+	public project(vectors: number[][]): number[][] {
+		if (vectors.length === 0) return [];
+
+		const dim = vectors[0].length;
+		for (let i = 0; i < vectors.length; i++) {
+			if (vectors[i].length !== dim) {
+				throw new Error(`Vector at index ${i} has dimension ${vectors[i].length}, expected ${dim}`);
+			}
+		}
+
+		// UMAP needs more points than output dimensions to be meaningful
+		if (vectors.length <= this.nComponents) {
+			log(`Too few vectors (${vectors.length}) for ${this.nComponents}D projection, padding with zeros.`);
+			return vectors.map((vec) => {
+				const out = vec.slice(0, this.nComponents);
+				while (out.length < this.nComponents) out.push(0);
+				return out;
+			});
+		}
+
+		// nNeighbors must be less than the number of data points
+		const nNeighbors = Math.max(2, Math.min(this.nNeighbors, vectors.length - 1));
+		const distanceFn = this.metric === 'euclidean' ? euclideanDistance : cosineDistance;
+
+		const umap = new UMAP({
+			nComponents: this.nComponents,
+			nNeighbors,
+			minDist: this.minDist,
+			distanceFn,
+			random: mulberry32(this.seed),
+		});
+
+		log(
+			`UMAP: projecting ${vectors.length} vectors (${dim}D → ${this.nComponents}D), ` +
+				`neighbors=${nNeighbors}, seed=${this.seed}`,
+		);
+
+		return umap.fit(vectors);
+	}
+}
diff --git a/src/pipeline/clustering/benchmark.ts b/src/pipeline/clustering/benchmark.ts
new file mode 100644
index 0000000..89aeabf
--- /dev/null
+++ b/src/pipeline/clustering/benchmark.ts
@@ -0,0 +1,150 @@
+import { CategorizationConfig, BenchmarkResult, ClusteringStrategy } from '../../types/cluster';
+import { DistanceFn, getDistanceFn, silhouetteScore } from './metrics';
+import { kmeans } from './kmeans';
+import { kmedoids } from './kmedoids';
+import { hdbscan } from './hdbscan';
+import { UmapProjector } from '../UmapProjector';
+import { log } from '../../utils/logger';
+
+const DEFAULT_K = 5;
+const DEFAULT_MIN_CLUSTER_SIZE = 3;
+
+/**
+ * Runs a single clustering strategy and returns the cluster assignments.
+ */
+function runStrategy(vectors: number[][], strategy: ClusteringStrategy, distFn: DistanceFn, seed: number): number[] {
+	switch (strategy.algorithm) {
+		case 'kmeans':
+			return kmeans(vectors, strategy.K ?? DEFAULT_K, distFn, seed);
+		case 'kmedoids':
+			return kmedoids(vectors, strategy.K ?? DEFAULT_K, distFn, seed);
+		case 'hdbscan':
+			return hdbscan(vectors, strategy.minClusterSize ?? DEFAULT_MIN_CLUSTER_SIZE, strategy.minSamples, distFn);
+		default:
+			throw new Error(`Unknown clustering algorithm: ${strategy.algorithm}`);
+	}
+}
+
+/**
+ * Counts the number of points in each cluster (excluding noise points labeled -1).
+ */
+function computeClusterSizes(assignments: number[]): number[] {
+	const counts = new Map<number, number>();
+	for (const c of assignments) {
+		if (c < 0) continue; // skip noise points
+		counts.set(c, (counts.get(c) ?? 0) + 1);
+	}
+	if (counts.size === 0) return [];
+	const maxCluster = Math.max(...counts.keys());
+	const sizes: number[] = [];
+	for (let c = 0; c <= maxCluster; c++) {
+		sizes.push(counts.get(c) ?? 0);
+	}
+	return sizes;
+}
+
+/**
+ * Formats the benchmark results as a readable comparison table.
+ */
+function logBenchmarkTable(results: BenchmarkResult[]): void {
+	log('=== Clustering Benchmark Results ===');
+	log('');
+
+	const header = `${'Strategy'.padEnd(20)} | ${'Algo'.padEnd(10)} | ${'Cls'.padStart(3)} | ${'Silhouette'.padStart(10)} | ${'Outliers'.padStart(8)} | ${'Time'.padStart(8)} | Cluster Sizes`;
+	log(header);
+	log('-'.repeat(header.length + 20));
+
+	for (const r of results) {
+		const sizesStr = `[${r.clusterSizes.join(', ')}]`;
+		log(
+			`${r.strategyName.padEnd(20)} | ${r.algorithm.padEnd(10)} | ${String(r.clusterCount).padStart(3)} | ${r.silhouetteScore.toFixed(4).padStart(10)} | ${String(r.outlierCount).padStart(8)} | ${(r.timeMs.toFixed(0) + 'ms').padStart(8)} | ${sizesStr}`,
+		);
+	}
+
+	log('');
+	if (results.length > 0) {
+		log(`Best: ${results[0].strategyName} (silhouette = ${results[0].silhouetteScore.toFixed(4)})`);
+	}
+	log('===================================');
+}
+
+/**
+ * Runs all clustering strategies in the config against the provided vectors,
+ * computes silhouette scores, and returns results sorted by quality (best first).
+ *
+ * If intermediateDim is set, vectors are first UMAP-reduced before clustering.
+ * The original high-dimensional vectors are passed in; reduction is handled here.
+ *
+ * For HDBSCAN, noise points (labeled -1) are excluded from the silhouette calculation
+ * since they intentionally don't belong to any cluster.
+ *
+ * @param vectors  High-dimensional note vectors (N x D)
+ * @param config   Categorization config with strategies to benchmark
+ * @returns        Benchmark results sorted by silhouette score (descending)
+ */
+export function benchmark(vectors: number[][], config: CategorizationConfig): BenchmarkResult[] {
+	if (vectors.length === 0) {
+		log('No vectors to cluster.');
+		return [];
+	}
+
+	const distFn = getDistanceFn(config.metric);
+
+	// Optionally reduce dimensionality before clustering
+	let clusteringVectors = vectors;
+	if (config.intermediateDim !== null) {
+		log(`Reducing ${vectors[0].length}D → ${config.intermediateDim}D for clustering...`);
+		const projector = new UmapProjector({
+			nComponents: config.intermediateDim,
+			nNeighbors: config.intermediateNeighbors,
+			metric: config.metric,
+			seed: config.seed,
+		});
+		clusteringVectors = projector.project(vectors);
+	}
+
+	const results: BenchmarkResult[] = [];
+
+	for (const strategy of config.strategies) {
+		log(`Running strategy: ${strategy.name} (${strategy.algorithm})...`);
+		const startTime = performance.now();
+
+		try {
+			const assignments = runStrategy(clusteringVectors, strategy, distFn, config.seed);
+			const timeMs = performance.now() - startTime;
+
+			const outlierCount = assignments.filter((a) => a < 0).length;
+			const clusterSizes = computeClusterSizes(assignments);
+			const clusterCount = clusterSizes.filter((s) => s > 0).length;
+
+			// For silhouette, exclude noise points (-1) since they're intentionally unassigned
+			let score = 0;
+			if (clusterCount >= 2) {
+				const clusteredIndices = assignments.map((a, i) => (a >= 0 ? i : -1)).filter((i) => i >= 0);
+				const clusteredVectors = clusteredIndices.map((i) => clusteringVectors[i]);
+				const clusteredAssignments = clusteredIndices.map((i) => assignments[i]);
+				score = silhouetteScore(clusteredVectors, clusteredAssignments, distFn);
+			}
+
+			results.push({
+				strategyName: strategy.name,
+				algorithm: strategy.algorithm,
+				clusterCount,
+				assignments,
+				clusterSizes,
+				silhouetteScore: score,
+				outlierCount,
+				timeMs,
+			});
+		} catch (err) {
+			log(`Strategy ${strategy.name} failed: ${err}`);
+		}
+	}
+
+	// Sort by silhouette score descending (best first)
+	results.sort((a, b) => b.silhouetteScore - a.silhouetteScore);
+
+	logBenchmarkTable(results);
+
+	return results;
+}
diff --git a/src/pipeline/clustering/hdbscan.ts b/src/pipeline/clustering/hdbscan.ts
new file mode 100644
index 0000000..681daff
--- /dev/null
+++ b/src/pipeline/clustering/hdbscan.ts
@@ -0,0 +1,61 @@
+import { HDBSCAN } from 'hdbscan-ts';
+import { DistanceFn, euclideanDistance } from './metrics';
+
+const DEFAULT_MIN_CLUSTER_SIZE = 3;
+
+/**
+ * HDBSCAN clustering using the hdbscan-ts library.
+ *
+ * Unlike K-Means, HDBSCAN:
+ * - Automatically determines the number of clusters
+ * - Identifies outlier/noise points (labeled -1)
+ * - Handles clusters of varying densities
+ *
+ * Note: hdbscan-ts only supports euclidean distance internally.
+ * When using cosine distance, we pre-normalize vectors so that
+ * euclidean distance in the normalized space approximates cosine distance.
+ * (For unit vectors: euclidean² = 2 * (1 - cosine_similarity))
+ *
+ * @param vectors         Input data points (N x D)
+ * @param minClusterSize  Minimum points to form a cluster (default: 3)
+ * @param minSamples      How many neighbors define a "core" point (default: minClusterSize). Lower = fewer outliers
+ * @param distFn          Distance function (used to determine if normalization is needed)
+ * @returns               Cluster assignments (length N). -1 = noise/outlier, 0..K = cluster IDs
+ */
+export function hdbscan(
+	vectors: number[][],
+	minClusterSize: number = DEFAULT_MIN_CLUSTER_SIZE,
+	minSamples: number | undefined,
+	distFn: DistanceFn,
+): number[] {
+	const n = vectors.length;
+	if (n === 0) throw new Error('Cannot cluster empty input');
+	if (minClusterSize < 2) throw new Error('minClusterSize must be at least 2');
+	if (n < minClusterSize) return new Array(n).fill(-1);
+
+	// hdbscan-ts only supports euclidean distance. If the user chose cosine,
+	// we L2-normalize the vectors first. In the normalized space, euclidean
+	// distance is monotonically related to cosine distance.
+	const isCosine = distFn !== euclideanDistance;
+	const inputVectors = isCosine ? vectors.map(normalize) : vectors;
+
+	const clusterer = new HDBSCAN({
+		minClusterSize,
+		minSamples: minSamples ?? minClusterSize,
+	});
+
+	return clusterer.fit(inputVectors);
+}
+
+/**
+ * L2-normalizes a vector to unit length.
+ */
+function normalize(vec: number[]): number[] {
+	let norm = 0;
+	for (let i = 0; i < vec.length; i++) {
+		norm += vec[i] * vec[i];
+	}
+	norm = Math.sqrt(norm);
+	if (norm === 0) return vec;
+	return vec.map((v) => v / norm);
+}
diff --git a/src/pipeline/clustering/kmeans.ts b/src/pipeline/clustering/kmeans.ts
new file mode 100644
index 0000000..27a0c32
--- /dev/null
+++ b/src/pipeline/clustering/kmeans.ts
@@ -0,0 +1,158 @@
+import { DistanceFn } from './metrics';
+import { mulberry32 } from '../../utils/prng';
+
+const MAX_ITERATIONS = 100;
+
+/**
+ * Selects initial centroids using k-means++ strategy.
+ * First centroid is chosen randomly; each subsequent centroid is chosen
+ * with probability proportional to its squared distance from the nearest
+ * existing centroid. This produces better initial clusters than random selection.
+ */
+function initCentroids(vectors: number[][], K: number, distFn: DistanceFn, rng: () => number): number[][] {
+	const n = vectors.length;
+	const centroids: number[][] = [];
+
+	// First centroid: random point
+	centroids.push([...vectors[Math.floor(rng() * n)]]);
+
+	for (let c = 1; c < K; c++) {
+		// Compute squared distance from each point to its nearest centroid
+		const distances = new Float64Array(n);
+		let totalDist = 0;
+
+		for (let i = 0; i < n; i++) {
+			let minDist = Infinity;
+			for (const centroid of centroids) {
+				const d = distFn(vectors[i], centroid);
+				if (d < minDist) minDist = d;
+			}
+			distances[i] = minDist * minDist;
+			totalDist += distances[i];
+		}
+
+		// Weighted random selection
+		let threshold = rng() * totalDist;
+		let selected = 0;
+		for (let i = 0; i < n; i++) {
+			threshold -= distances[i];
+			if (threshold <= 0) {
+				selected = i;
+				break;
+			}
+		}
+
+		centroids.push([...vectors[selected]]);
+	}
+
+	return centroids;
+}
+
+/**
+ * Assigns each vector to the index of the nearest centroid.
+ */
+function assignClusters(vectors: number[][], centroids: number[][], distFn: DistanceFn): number[] {
+	return vectors.map((vec) => {
+		let bestCluster = 0;
+		let bestDist = Infinity;
+		for (let c = 0; c < centroids.length; c++) {
+			const d = distFn(vec, centroids[c]);
+			if (d < bestDist) {
+				bestDist = d;
+				bestCluster = c;
+			}
+		}
+		return bestCluster;
+	});
+}
+
+/**
+ * Recomputes centroids as the element-wise mean of assigned points.
+ * If a cluster is empty, its centroid is re-seeded to a random point.
+ */
+function recomputeCentroids(
+	vectors: number[][],
+	assignments: number[],
+	K: number,
+	dim: number,
+	rng: () => number,
+): number[][] {
+	const centroids: number[][] = Array.from({ length: K }, () => new Array(dim).fill(0));
+	const counts = new Array(K).fill(0);
+
+	for (let i = 0; i < vectors.length; i++) {
+		const c = assignments[i];
+		counts[c]++;
+		for (let d = 0; d < dim; d++) {
+			centroids[c][d] += vectors[i][d];
+		}
+	}
+
+	for (let c = 0; c < K; c++) {
+		if (counts[c] === 0) {
+			// Empty cluster: re-seed to a random point to avoid dead centroids
+			const idx = Math.floor(rng() * vectors.length);
+			centroids[c] = [...vectors[idx]];
+		} else {
+			for (let d = 0; d < dim; d++) {
+				centroids[c][d] /= counts[c];
+			}
+		}
+	}
+
+	return centroids;
+}
+
+/**
+ * K-Means clustering using Lloyd's algorithm with k-means++ initialization.
+ *
+ * Note: centroid recomputation uses the arithmetic mean, which minimizes the
+ * squared Euclidean objective. With a non-Euclidean distFn (e.g. cosine) this
+ * becomes a heuristic — results are still useful but not mathematically optimal.
+ *
+ * @param vectors   Input data points (N x D)
+ * @param K         Number of clusters
+ * @param distFn    Distance function
+ * @param seed      Seed for reproducible initialization
+ * @param maxIter   Maximum iterations (default: 100)
+ * @returns         Cluster assignment for each vector (length N, values 0..K-1)
+ */
+export function kmeans(
+	vectors: number[][],
+	K: number,
+	distFn: DistanceFn,
+	seed: number,
+	maxIter: number = MAX_ITERATIONS,
+): number[] {
+	const n = vectors.length;
+	if (n === 0) throw new Error('Cannot cluster empty input');
+	if (K <= 0) throw new Error('K must be positive');
+
+	// If K >= N, each point gets its own cluster
+	if (K >= n) return vectors.map((_, i) => i);
+
+	const dim = vectors[0].length;
+	const rng = mulberry32(seed);
+
+	let centroids = initCentroids(vectors, K, distFn, rng);
+	let assignments = assignClusters(vectors, centroids, distFn);
+
+	for (let iter = 0; iter < maxIter; iter++) {
+		centroids = recomputeCentroids(vectors, assignments, K, dim, rng);
+		const newAssignments = assignClusters(vectors, centroids, distFn);
+
+		// Convergence check: stop if no assignments changed
+		let changed = false;
+		for (let i = 0; i < n; i++) {
+			if (newAssignments[i] !== assignments[i]) {
+				changed = true;
+				break;
+			}
+		}
+
+		assignments = newAssignments;
+		if (!changed) break;
+	}
+
+	return assignments;
+}
diff --git a/src/pipeline/clustering/kmedoids.ts b/src/pipeline/clustering/kmedoids.ts
new file mode 100644
index 0000000..565b1a9
--- /dev/null
+++ b/src/pipeline/clustering/kmedoids.ts
@@ -0,0 +1,131 @@
+import { DistanceFn } from './metrics';
+import { mulberry32 } from '../../utils/prng';
+
+const MAX_ITERATIONS = 100;
+
+/**
+ * Finds the index of the point that is farthest from any existing medoid.
+ * Used for greedy medoid initialization (BUILD phase of PAM).
+ */
+function findFarthestPoint(vectors: number[][], medoidIndices: number[], distFn: DistanceFn): number {
+	const medoidSet = new Set(medoidIndices);
+	let bestIdx = 0;
+	let bestMinDist = -1;
+
+	for (let i = 0; i < vectors.length; i++) {
+		if (medoidSet.has(i)) continue;
+
+		let minDist = Infinity;
+		for (const m of medoidIndices) {
+			const d = distFn(vectors[i], vectors[m]);
+			if (d < minDist) minDist = d;
+		}
+
+		if (minDist > bestMinDist) {
+			bestMinDist = minDist;
+			bestIdx = i;
+		}
+	}
+
+	return bestIdx;
+}
+
+/**
+ * Assigns each point to the nearest medoid.
+ */
+function assignToMedoids(vectors: number[][], medoidIndices: number[], distFn: DistanceFn): number[] {
+	return vectors.map((vec) => {
+		let bestCluster = 0;
+		let bestDist = Infinity;
+		for (let c = 0; c < medoidIndices.length; c++) {
+			const d = distFn(vec, vectors[medoidIndices[c]]);
+			if (d < bestDist) {
+				bestDist = d;
+				bestCluster = c;
+			}
+		}
+		return bestCluster;
+	});
+}
+
+/**
+ * Computes the total cost (sum of distances from each point to its medoid).
+ */
+function totalCost(vectors: number[][], assignments: number[], medoidIndices: number[], distFn: DistanceFn): number {
+	let cost = 0;
+	for (let i = 0; i < vectors.length; i++) {
+		cost += distFn(vectors[i], vectors[medoidIndices[assignments[i]]]);
+	}
+	return cost;
+}
+
+/**
+ * K-Medoids clustering using a simplified PAM (Partitioning Around Medoids).
+ *
+ * Unlike K-Means, medoids are always actual data points rather than
+ * computed means. This makes K-Medoids more robust to outliers and
+ * works naturally with any distance metric (not just Euclidean).
+ *
+ * @param vectors   Input data points (N x D)
+ * @param K         Number of clusters
+ * @param distFn    Distance function
+ * @param seed      Seed for reproducible initialization
+ * @param maxIter   Maximum iterations (default: 100)
+ * @returns         Cluster assignments (length N, values 0..K-1)
+ */
+export function kmedoids(
+	vectors: number[][],
+	K: number,
+	distFn: DistanceFn,
+	seed: number,
+	maxIter: number = MAX_ITERATIONS,
+): number[] {
+	const n = vectors.length;
+	if (n === 0) throw new Error('Cannot cluster empty input');
+	if (K <= 0) throw new Error('K must be positive');
+	if (K >= n) return vectors.map((_, i) => i);
+
+	const rng = mulberry32(seed);
+
+	// BUILD phase: initialize medoids greedily
+	// First medoid is random, subsequent ones maximize distance from existing medoids
+	const medoidIndices: number[] = [Math.floor(rng() * n)];
+	for (let c = 1; c < K; c++) {
+		medoidIndices.push(findFarthestPoint(vectors, medoidIndices, distFn));
+	}
+
+	let assignments = assignToMedoids(vectors, medoidIndices, distFn);
+	let currentCost = totalCost(vectors, assignments, medoidIndices, distFn);
+
+	// SWAP phase: try swapping each medoid with each non-medoid
+	for (let iter = 0; iter < maxIter; iter++) {
+		let improved = false;
+
+		for (let m = 0; m < K; m++) {
+			for (let i = 0; i < n; i++) {
+				if (medoidIndices.includes(i)) continue;
+
+				// Try swapping medoid m with point i
+				const oldMedoid = medoidIndices[m];
+				medoidIndices[m] = i;
+
+				const newAssignments = assignToMedoids(vectors, medoidIndices, distFn);
+				const newCost = totalCost(vectors, newAssignments, medoidIndices, distFn);
+
+				if (newCost < currentCost) {
+					// Keep the swap
+					assignments = newAssignments;
+					currentCost = newCost;
+					improved = true;
+				} else {
+					// Revert the swap
+					medoidIndices[m] = oldMedoid;
+				}
+			}
+		}
+
+		if (!improved) break;
+	}
+
+	return assignments;
+}
diff --git a/src/pipeline/clustering/metrics.ts b/src/pipeline/clustering/metrics.ts
new file mode 100644
index 0000000..9d2588b
--- /dev/null
+++ b/src/pipeline/clustering/metrics.ts
@@ -0,0 +1,89 @@
+export type DistanceFn = (a: number[], b: number[]) => number;
+
+export function cosineDistance(a: number[], b: number[]): number {
+	let dot = 0;
+	let normA = 0;
+	let normB = 0;
+	for (let i = 0; i < a.length; i++) {
+		dot += a[i] * b[i];
+		normA += a[i] * a[i];
+		normB += b[i] * b[i];
+	}
+	const denom = Math.sqrt(normA) * Math.sqrt(normB);
+	if (denom === 0) return 1;
+	return 1 - dot / denom;
+}
+
+export function euclideanDistance(a: number[], b: number[]): number {
+	let sum = 0;
+	for (let i = 0; i < a.length; i++) {
+		const d = a[i] - b[i];
+		sum += d * d;
+	}
+	return Math.sqrt(sum);
+}
+
+export function getDistanceFn(metric: 'cosine' | 'euclidean'): DistanceFn {
+	return metric === 'euclidean' ? euclideanDistance : cosineDistance;
+}
+
+/**
+ * Computes the mean silhouette coefficient for a clustering.
+ *
+ * For each point i:
+ *   a(i) = mean distance to other points in the same cluster
+ *   b(i) = mean distance to points in the nearest other cluster
+ *   s(i) = (b(i) - a(i)) / max(a(i), b(i))
+ *
+ * Returns the mean of s(i) across all points.
+ * Range: -1 (poor) to +1 (well-separated clusters).
+ */
+export function silhouetteScore(vectors: number[][], assignments: number[], distFn: DistanceFn): number {
+	const n = vectors.length;
+	if (n <= 1) return 0;
+
+	const uniqueClusters = [...new Set(assignments)];
+	if (uniqueClusters.length <= 1) return 0;
+
+	// Group point indices by cluster
+	const clusterIndices = new Map<number, number[]>();
+	for (let i = 0; i < n; i++) {
+		const c = assignments[i];
+		if (!clusterIndices.has(c)) clusterIndices.set(c, []);
+		clusterIndices.get(c)!.push(i);
+	}
+
+	let totalScore = 0;
+
+	for (let i = 0; i < n; i++) {
+		const myCluster = assignments[i];
+		const myClusterMembers = clusterIndices.get(myCluster)!;
+
+		// a(i): mean distance to same-cluster points
+		let a = 0;
+		if (myClusterMembers.length > 1) {
+			for (const j of myClusterMembers) {
+				if (j !== i) a += distFn(vectors[i], vectors[j]);
+			}
+			a /= myClusterMembers.length - 1;
+		}
+
+		// b(i): mean distance to nearest other cluster
+		let b = Infinity;
+		for (const [clusterId, members] of clusterIndices) {
+			if (clusterId === myCluster) continue;
+			let meanDist = 0;
+			for (const j of members) {
+				meanDist += distFn(vectors[i], vectors[j]);
+			}
+			meanDist /= members.length;
+			if (meanDist < b) b = meanDist;
+		}
+
+		const maxAB = Math.max(a, b);
+		const s = maxAB === 0 ? 0 : (b - a) / maxAB;
+		totalScore += s;
+	}
+
+	return totalScore / n;
+}
diff --git a/src/types/cluster.ts b/src/types/cluster.ts
new file mode 100644
index 0000000..52028dd
--- /dev/null
+++ b/src/types/cluster.ts
@@ -0,0 +1,44 @@
+export type ClusteringAlgorithm = 'kmeans' | 'kmedoids' | 'hdbscan';
+
+export interface ClusteringStrategy {
+	/** Human-readable label for this run, e.g. 'kmeans-5' */
+	name: string;
+	algorithm: ClusteringAlgorithm;
+	/** Number of clusters (kmeans / kmedoids) */
+	K?: number;
+	/** Minimum points to form a cluster (hdbscan, default: 3) */
+	minClusterSize?: number;
+	/** How many neighbors define a "core" point (hdbscan, default: minClusterSize). Lower = fewer outliers */
+	minSamples?: number;
+}
+
+export interface CategorizationConfig {
+	/** Seed for UMAP and clustering reproducibility */
+	seed: number;
+	/** Distance metric for clustering and UMAP */
+	metric: 'cosine' | 'euclidean';
+	/**
+	 * If set, UMAP-reduce to this dimensionality before clustering.
+	 * null = cluster directly on the raw embedding vectors (e.g. 384D).
+	 */
+	intermediateDim: number | null;
+	/** Number of nearest neighbors for UMAP intermediate projection */
+	intermediateNeighbors: number;
+	/** Clustering strategies to benchmark side-by-side */
+	strategies: ClusteringStrategy[];
+}
+
+export interface BenchmarkResult {
+	strategyName: string;
+	algorithm: ClusteringAlgorithm;
+	clusterCount: number;
+	/** Cluster ID per note, in the same order as the input vectors */
+	assignments: number[];
+	/** Number of notes in each cluster, indexed by cluster ID */
+	clusterSizes: number[];
+	/** Mean silhouette coefficient: -1 (poor) to +1 (excellent) */
+	silhouetteScore: number;
+	/** Number of points classified as noise/outliers (HDBSCAN only) */
+	outlierCount: number;
+	timeMs: number;
+}
diff --git a/src/types/projector.ts b/src/types/projector.ts
new file mode 100644
index 0000000..aa9b972
--- /dev/null
+++ b/src/types/projector.ts
@@ -0,0 +1,12 @@
+export interface UmapProjectorOptions {
+	/** Number of dimensions in the output (default: 2) */
+	nComponents?: number;
+	/** Number of nearest neighbors for manifold approximation (default: 15) */
+	nNeighbors?: number;
+	/** Minimum distance between points in output space (default: 0.1) */
+	minDist?: number;
+	/** Distance metric: 'cosine' or 'euclidean' (default: 'cosine') */
+	metric?: 'cosine' | 'euclidean';
+	/** Seed for reproducible results (default: 42) */
+	seed?: number;
+}
diff --git a/src/utils/prng.ts b/src/utils/prng.ts
new file mode 100644
index 0000000..5b36bfc
--- /dev/null
+++ b/src/utils/prng.ts
@@ -0,0 +1,14 @@
+/**
+ * Mulberry32: a fast, seedable 32-bit PRNG.
+ * Produces deterministic values in [0, 1) for a given seed.
+ * Used instead of Math.random() so UMAP and clustering results are reproducible.
+ */
+export function mulberry32(seed: number): () => number {
+	return () => {
+		seed |= 0;
+		seed = (seed + 0x6d2b79f5) | 0;
+		let t = Math.imul(seed ^ (seed >>> 15), 1 | seed);
+		t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+		return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+	};
+}
diff --git a/tsconfig.json b/tsconfig.json
index 4474cab..1a5b5bd 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -5,6 +5,7 @@
 		"target": "es2015",
 		"jsx": "react",
 		"allowJs": true,
-		"baseUrl": "."
+		"baseUrl": ".",
+		"skipLibCheck": true
 	}
 }