diff --git a/inc/limits.h b/inc/limits.h
index c4f781d..15626ad 100644
--- a/inc/limits.h
+++ b/inc/limits.h
@@ -28,6 +28,7 @@
 
 /* Limits */
 #define MAX_ARGLN 512       // Max command line argument length
+#define MAX_COMPONENT_SCAN_HASHES 1000  // Max number of url hashes accepted by -C
 #define MAX_PATH 1024
 #define MAX_HASHES_READ 65535
 #define MAX_FILE_SIZE (1024 * 1024 * 4)
@@ -51,7 +52,9 @@
 #define SNIPPETS_DEFAULT_ADJUST_TOLERANCE true /** Adjust tolerance based on file size */
 #define SNIPPETS_DEFAULT_HONOR_FILE_EXTENSION true /** Honor file extension during snippet matching */
 #define DEFAULT_FETCH_MAX_FILES 12000 /** Maximum number of files to fetch during component matching */
+#define MAX_FILE_CONTENT_SIZE_DEFAULT (1024ULL * 1024 * 50) /** Default maximum file content size (50MB) printed by -k */
 /* Variables */
 extern int fetch_max_files; // Maximum number of files to fetch during component matching
+extern uint64_t max_file_content_size; // Maximum file content size printed by -k
 
 #endif
diff --git a/inc/match_list.h b/inc/match_list.h
index 6520724..de20c11 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -147,5 +147,6 @@ void component_list_destroy(component_list_t *list);
 bool component_list_add_binary(component_list_t *list, component_data_t *new_comp, bool (*val)(component_data_t *a, component_data_t *b), bool remove_a);
 bool match_list_eval(match_list_t *list, match_data_t * in,  bool (*eval)(match_data_t *fpa, match_data_t *fpb));
 void match_list_tolerance_set(float in);
+float match_list_tolerance_get(void);
 
 #endif
diff --git a/inc/purl_scan.h b/inc/purl_scan.h
new file mode 100644
index 0000000..6533d33
--- /dev/null
+++ b/inc/purl_scan.h
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * inc/purl_scan.h
+ *
+ * SCANOSS Inventory Scanner
+ *
+ * Copyright (C) 2018-2024 SCANOSS.COM
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PURL_SCAN_H
+#define __PURL_SCAN_H
+
+/**
+ * @brief Resolve the purls and url hashes related to a file MD5.
+ *
+ * Looks up the given file MD5 in the KB (url and file tables) and prints, in
+ * JSON, the unique purls associated with that file along with every url hash
+ * (url_id) where the file was seen and the best (lowest) KB rank found for
+ * each purl.
+ *
+ * @param file_md5_hex file MD5 in hex (32 chars)
+ * @return EXIT_SUCCESS on success, EXIT_FAILURE on invalid input
+ */
+int purl_scan(char *file_md5_hex);
+
+/**
+ * @brief Report the details of one or more components identified by url hash.
+ *
+ * Accepts a single url_hash (url_id) or a comma-separated list. Each hash is
+ * looked up in the KB and its component details are rendered in JSON, reusing
+ * the same rendering used in regular scan reports. Output is always an array
+ * under the "results" key, one entry per valid hash:
+ * {"results": [{"url_hash": "...", "component": {...}}, ...]}.
+ * Invalid hashes are skipped with a stderr warning.
+ *
+ * @param url_hash_list comma-separated url hashes in hex (32 chars each)
+ * @return EXIT_SUCCESS if at least one valid hash was processed,
+ *         EXIT_FAILURE if input is null/empty or no hash was valid
+ */
+int component_scan(char *url_hash_list);
+
+/**
+ * @brief Run a snippet-only scan whose WFP input comes from stdin.
+ *
+ * Reads a WFP block (same format used by `-w` scans) from stdin, runs the
+ * snippet selection pipeline (no full-file lookup, no component resolution)
+ * and prints a JSON report listing the file_md5 candidates grouped by snippet
+ * region, together with their input/oss line ranges. Candidate cohort size is
+ * controlled by the tolerance set via -T (match_list_tolerance_set).
+ *
+ * @return EXIT_SUCCESS on success, EXIT_FAILURE on invalid/empty input
+ */
+int snippet_scan_stdin(void);
+
+/**
+ * @brief Run a snippet-only scan whose WFP input is passed as a string.
+ *
+ * Same behavior as snippet_scan_stdin() but reads the WFP block from the
+ * provided in-memory buffer. Used by `-S "<wfp>"` so callers (e.g. FlexAPI)
+ * can pass the WFP directly as an argv value instead of piping it via stdin.
+ *
+ * @param wfp NUL-terminated buffer holding the WFP block
+ * @return EXIT_SUCCESS on success, EXIT_FAILURE on invalid/empty input
+ */
+int snippet_scan_string(const char *wfp);
+
+#endif
diff --git a/inc/report.h b/inc/report.h
index da70165..b55bc50 100644
--- a/inc/report.h
+++ b/inc/report.h
@@ -16,4 +16,5 @@ void json_open();
 void json_close(void);
 void kb_version_get(void);
 bool print_json_component(component_data_t * component);
+void print_purl_array(component_data_t * component);
 #endif
diff --git a/inc/scanoss.h b/inc/scanoss.h
index 8985597..1f4a2be 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -33,7 +33,7 @@
 #define WFP_LN 4
 #define WFP_REC_LN 18
 
-#define SCANOSS_VERSION "5.4.25"
+#define SCANOSS_VERSION "5.4.26"
 
 /* Log files */
 #define SCAN_LOG "/tmp/scanoss_scan.log"
diff --git a/src/copyright.c b/src/copyright.c
index 3e1edb1..c7d7a74 100644
--- a/src/copyright.c
+++ b/src/copyright.c
@@ -35,28 +35,27 @@
 #include "util.h"
 #include "decrypt.h"
 #include "debug.h"
-const char *copyright_sources[] = {"component_declared", "file_header", "license_file", "scancode"};
 
-/**
- * @brief get fisrt copyright LDB function pointer. Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
- * @param key //TODO
- * @param subkey //TODO
- * @param subkey_ln //TODO
- * @param[out] data //TODO
- * @param datalen //TODO
- * @param iteration //TODO
- * @param ptr output pointer, returns the fisrt copyright obtained from the database
- * @return //TODO
- */
-/* static bool get_first_copyright(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+static char * copyright_id_to_source_name(int id)
 {
-	char * result = decrypt_data(data, datalen, oss_copyright, key, subkey);
-	if (result)
-		strncpy(ptr, skip_first_comma((char *) result), MAX_COPYRIGHT);
-	
-	free(result);
-	return true;
-}*/
+	switch (id)
+	{
+		case 1:
+		case 5:
+			return "file_header";
+		case 0:
+		case 2:
+		case 6:
+		case 8:
+			return "license_file";
+		case 3:
+		case 4:
+		case 7:
+			return "scancode";
+		default:
+			return NULL;
+	}
+}
 
 /**
  * @brief //Remove undesired characteres from a copyright
@@ -105,13 +104,13 @@ static bool print_copyrights_item(uint8_t *key, uint8_t *subkey, int subkey_ln,
 
 	char result[MAX_FIELD_LN] = "\0";
 	int len = 0;
-
-	if (!dup && (*copyright) && (src <= (sizeof(copyright_sources) / sizeof(copyright_sources[0]))))
+	char * source_id = copyright_id_to_source_name(src);
+	if (!dup && (*copyright) && source_id)
 	{
 		if (comp->copyright_text) 
 			len += sprintf(result+len,",");
 		len += sprintf(result+len,"{\"name\": \"%s\",", copyright);
-		len += sprintf(result+len,"\"source\": \"%s\"}", copyright_sources[atoi(source)]);
+		len += sprintf(result+len,"\"source\": \"%s\"}", source_id);
 	}
 	if (*result)
 		str_cat_realloc(&comp->copyright_text, result);
diff --git a/src/help.c b/src/help.c
index 441656a..df67845 100644
--- a/src/help.c
+++ b/src/help.c
@@ -65,6 +65,12 @@ Configuration:\n\
 -a, --attribution FILE   Show attribution notices for the provided SBOM.json file.\n\
 -c, --component HINT     Add a component HINT to guide scan results.\n\
 -k, --key KEY            Show contents of the specified KEY file from MZ sources archive.\n\
+    --max-file-content-size MB Set maximum file content size in MB printed by -k (default: 50).\n\
+-P, --purl MD5           Return the purls and versions related to the given file MD5 (JSON).\n\
+-C, --url-hash MD5       Return the details of the component identified by the given url hash (JSON).\n\
+-S, --snippet-scan WFP   Snippet-only scan: take a single-file WFP block as argument and return JSON with candidate\n\
+                         file_md5s and their line ranges, filtered by the tolerance set via -T (precede -S with -T\n\
+                         to apply). Use -S - to read the WFP from stdin instead of passing it as argument.\n\
 -l, --license LICENSE    Display OSADL metadata for the given SPDX license ID.\n\
 -L, --full-license       Enable full license report.\n\
 -F, --flags FLAGS        Set engine scanning flags (see below).\n\
diff --git a/src/ignorelist.c b/src/ignorelist.c
index 2fdf920..5ad11bf 100644
--- a/src/ignorelist.c
+++ b/src/ignorelist.c
@@ -46,10 +46,13 @@ char *extension(char *path)
 {
 	char *dot   = strrchr(path, '.');
 	char *slash = strrchr(path, '/');
+	if (!slash) slash = path;
 
-	if (!dot && !slash) return NULL;
+	/* No dot, or the last dot belongs to a parent directory (e.g. "a.b/file"):
+	   the file has no extension. Returning the basename here (as the previous
+	   implementation did) wrongly treated a plain filename as an extension. */
+	if (!dot) return NULL;
 	if (dot > slash) return dot + 1;
-	if (slash != path) return slash + 1;
 	return NULL;
 }
 
diff --git a/src/limits.c b/src/limits.c
index 67b6cfd..ac3787f 100644
--- a/src/limits.c
+++ b/src/limits.c
@@ -10,3 +10,4 @@
   */
 
 int fetch_max_files = 12000; /** Maximum number of files to fetch during component matching */
+uint64_t max_file_content_size = MAX_FILE_CONTENT_SIZE_DEFAULT; /** Maximum file content size printed by -k */
diff --git a/src/main.c b/src/main.c
index 75d0ed0..fb91fe2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -40,6 +40,7 @@
 #include "parse.h"
 #include "report.h"
 #include "scan.h"
+#include "purl_scan.h"
 #include "scanoss.h"
 #include "util.h"
 #include "component.h"
@@ -278,8 +279,11 @@ static struct option long_options[] = {
 	{"sbom",              required_argument, 0, 's'},
 	{"blacklist",         required_argument, 0, 'b'},
 	{"force-snippet",     required_argument, 0, 256}, /* Long option only, no short form */
+	{"snippet-scan",      required_argument, 0, 'S'},
 	{"component",         required_argument, 0, 'c'},
 	{"key",               required_argument, 0, 'k'},
+	{"purl",              required_argument, 0, 'P'},
+	{"url-hash",          required_argument, 0, 'C'},
 	{"attribution",       required_argument, 0, 'a'},
 	{"flags",             required_argument, 0, 'F'},
 	{"license",           required_argument, 0, 'l'},
@@ -292,6 +296,7 @@ static struct option long_options[] = {
 	{"min-snippet-lines",   required_argument, 0, 259}, /* Long option only */
 	{"ignore-file-ext",   no_argument,		 0, 260}, /* Long option only */
 	{"range-tolerance",   required_argument, 0, 261}, /* Long option only */
+	{"max-file-content-size", required_argument, 0, 262}, /* Long option only */
 	{"wfp",               no_argument,       0, 'w'},
 	{"test",              no_argument,       0, 't'},
 	{"version",           no_argument,       0, 'v'},
@@ -334,18 +339,8 @@ int main(int argc, char **argv)
 	bool invalid_argument = false;
 	char * ldb_db_name = NULL;
 
-	while ((option = getopt_long(argc, argv, ":r:T:s:b:c:k:a:F:l:n:M:N:wtLvhdqH", long_options, &option_index)) != -1)
+	while ((option = getopt_long(argc, argv, ":r:T:s:b:c:k:a:F:l:n:M:N:P:C:S:wtLvhdqH", long_options, &option_index)) != -1)
 	{
-		/* Check valid alpha is entered */
-		if (optarg)
-		{
-			if ((strlen(optarg) > MAX_ARGLN))
-			{
-				invalid_argument = true;
-				break;
-			}
-		}
-
 		switch (option)
 		{
 			case 's':
@@ -371,6 +366,16 @@ int main(int argc, char **argv)
 				exit(EXIT_SUCCESS);
 				break;
 
+			case 'P':
+				initialize_ldb_tables(ldb_db_name);
+				exit(purl_scan(optarg));
+				break;
+
+			case 'C':
+				initialize_ldb_tables(ldb_db_name);
+				exit(component_scan(optarg));
+				break;
+
 			case 'a':
 				if (declared_components) 
 				{
@@ -414,6 +419,12 @@ int main(int argc, char **argv)
 			case 256: /* --force-snippet (long option only) */
 				force_snippet_scan = true;
 				break;
+			case 'S':
+				initialize_ldb_tables(ldb_db_name);
+				if (optarg && optarg[0] == '-' && optarg[1] == '\0')
+					exit(snippet_scan_stdin());
+				exit(snippet_scan_string(optarg));
+				break;
 			case 't':
 				initialize_ldb_tables(ldb_db_name);
 				scan_benchmark();
@@ -480,6 +491,11 @@ int main(int argc, char **argv)
 				scanlog("Range tolerance set to %d\n", scan_range_tolerance);
 				break;
 
+			case 262: /* --max-file-content-size (value in MB) */
+				max_file_content_size = strtoull(optarg, NULL, 10) * 1024 * 1024;
+				scanlog("Max file content size set to %lu MB\n", (unsigned long) (max_file_content_size / (1024 * 1024)));
+				break;
+
 			case 'H':
 				if (hpsm_lib_load())
 					hpsm_enabled = true;
diff --git a/src/match.c b/src/match.c
index c20d927..6ae56c7 100644
--- a/src/match.c
+++ b/src/match.c
@@ -301,26 +301,19 @@ int compare_file_extension(component_data_t *a, component_data_t *b)
 	char *ext_a = extension(a->file);
 	char *ext_b = extension(b->file);
 
-	if (!ext_a && ext_b)
-		return 1;
-
-	if (ext_a && !ext_b)
-		return -1;
-	
-	if (!ext_a && !ext_b)
+	/* A candidate is preferred only when its extension actually matches the
+	   scanned file's extension. The mere presence/absence of an extension is
+	   not a valid criterion: doing so would prefer any extended path over a
+	   plain filename even when neither matches the scanned file. */
+	bool match_a = ext_a && !strcmp(ext_a, ext_file);
+	bool match_b = ext_b && !strcmp(ext_b, ext_file);
+
+	if (match_a == match_b)
 		return 0;
-
-	int result_a = strcmp(ext_a, ext_file);
-	int result_b = strcmp(ext_b, ext_file);
-	
-	if (result_a == result_b)
-		return 0;
-	else if (!result_a)
+	else if (match_a)
 		return -1;
-	else  if (!result_b)
+	else
 		return 1;
-
-	return 0;
 }
 
 /**
diff --git a/src/match_list.c b/src/match_list.c
index 134e429..596a031 100644
--- a/src/match_list.c
+++ b/src/match_list.c
@@ -215,11 +215,16 @@ void match_list_tolerance_set(float in)
 {
     if (in > 99)
         in = 99;
-    
+
     match_list_tolerance = 100.0-in;
     scanlog("setting match list tolerance to %.1f\n", match_list_tolerance);
 }
 
+float match_list_tolerance_get(void)
+{
+    return 100.0 - match_list_tolerance;
+}
+
 bool tolerance_eval(int a, int b)
 {
     int relative_error = (abs(a - b) * 100) / ((a + b) / 2);
diff --git a/src/mz.c b/src/mz.c
index c3ab15a..3d0404c 100644
--- a/src/mz.c
+++ b/src/mz.c
@@ -36,6 +36,7 @@
 #include "decrypt.h"
 #include <ldb.h>
 #include "debug.h"
+#include "limits.h"
 #include <unistd.h>
 
 /**
@@ -100,6 +101,16 @@ void mz_get_key(struct ldb_table kb, char *key)
 			/* Decompress */
 			MZ_DEFLATE(&job);
 
+			/* Reject files whose content exceeds the configured maximum size */
+			if (job.data_ln > max_file_content_size)
+			{
+				fprintf(stderr, "File content size (%.2f MB) exceeds the maximum allowed (%lu MB). Use --max-file-content-size to change the limit.\n", (double) job.data_ln / (1024 * 1024), (unsigned long) (max_file_content_size / (1024 * 1024)));
+				free(job.data);
+				free(job.key);
+				free(job.mz);
+				exit(EXIT_FAILURE);
+			}
+
 			job.data[job.data_ln] = 0;
 			printf("%s", job.data);
 			return;
diff --git a/src/purl_scan.c b/src/purl_scan.c
new file mode 100644
index 0000000..2c12442
--- /dev/null
+++ b/src/purl_scan.c
@@ -0,0 +1,706 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * src/purl_scan.c
+ *
+ * SCANOSS Inventory Scanner
+ *
+ * Copyright (C) 2018-2024 SCANOSS.COM
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+  * @file purl_scan.c
+  * @brief Resolve the purls and url hashes related to a given file MD5.
+  *
+  * This implements the "-P <file_md5>" command: given a file MD5 it walks the
+  * KB (url and file tables) and reports, in JSON, the unique purls associated
+  * with that file, the url hashes (url_id) where the file was seen for each
+  * purl and the best (lowest) KB rank found. It does not use the best-match
+  * selection logic.
+  */
+
+#include <libgen.h>
+#include "scanoss.h"
+#include "debug.h"
+#include "decrypt.h"
+#include "parse.h"
+#include "util.h"
+#include "limits.h"
+#include "component.h"
+#include "report.h"
+#include "url.h"
+#include "license.h"
+#include "health.h"
+#include "dependency.h"
+#include "copyright.h"
+#include "vulnerability.h"
+#include "scan.h"
+#include "match.h"
+#include "match_list.h"
+#include "snippets.h"
+#include "purl_scan.h"
+
+/* Snippet-scan configuration globals (defined in main.c) */
+extern int scan_max_snippets;
+extern int scan_max_components;
+extern bool scan_adjust_tolerance;
+extern int scan_ranking_threshold;
+extern int scan_min_match_hits;
+extern int scan_min_match_lines;
+extern int scan_range_tolerance;
+extern bool scan_honor_file_extension;
+
+/* Single purl entry: the purl, the set of url hashes seen and the best rank */
+typedef struct purl_entry_t
+{
+	char *purl;
+	char **url_hashes;
+	int n_url_hashes;
+	int url_hashes_cap;
+	int rank;
+	struct purl_entry_t *next;
+} purl_entry_t;
+
+/* Context passed through the ldb recordset handlers */
+typedef struct purl_scan_ctx_t
+{
+	purl_entry_t *head;
+	int count;
+	uint32_t files_processed;
+} purl_scan_ctx_t;
+
+/* MD5 of the empty string, used as a sentinel in the file table */
+static const uint8_t empty_string_md5[MD5_LEN] =
+	{0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e};
+
+/**
+ * @brief Find an existing purl entry or create a new one.
+ */
+static purl_entry_t * purl_entry_get(purl_scan_ctx_t *ctx, const char *purl)
+{
+	for (purl_entry_t *e = ctx->head; e; e = e->next)
+		if (!strcmp(e->purl, purl))
+			return e;
+
+	purl_entry_t *e = calloc(1, sizeof(*e));
+	e->purl = strdup(purl);
+	e->rank = COMPONENT_DEFAULT_RANK;
+	e->next = ctx->head;
+	ctx->head = e;
+	ctx->count++;
+	return e;
+}
+
+/**
+ * @brief Add a url hash to a purl entry, ignoring duplicates and empty values.
+ */
+static void purl_entry_add_url_hash(purl_entry_t *e, const char *url_hash)
+{
+	if (!url_hash || !*url_hash)
+		return;
+
+	for (int i = 0; i < e->n_url_hashes; i++)
+		if (!strcmp(e->url_hashes[i], url_hash))
+			return;
+
+	if (e->n_url_hashes >= e->url_hashes_cap)
+	{
+		e->url_hashes_cap = e->url_hashes_cap ? e->url_hashes_cap * 2 : 8;
+		e->url_hashes = realloc(e->url_hashes, e->url_hashes_cap * sizeof(char *));
+	}
+	e->url_hashes[e->n_url_hashes++] = strdup(url_hash);
+}
+
+/* qsort comparators for deterministic output */
+static int url_hash_cmp(const void *a, const void *b)
+{
+	return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static int purl_entry_ptr_cmp(const void *a, const void *b)
+{
+	const purl_entry_t *ea = *(const purl_entry_t **) a;
+	const purl_entry_t *eb = *(const purl_entry_t **) b;
+	if (ea->rank != eb->rank)
+		return ea->rank - eb->rank;
+	return strcmp(ea->purl, eb->purl);
+}
+
+/**
+ * @brief url table recordset handler. Extracts the purl and rank from the url
+ * record and stores the url_id (= url_hash) under the matching purl entry.
+ */
+static bool handle_url_for_purls(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
+{
+	if (!datalen)
+		return false;
+
+	char *data = decrypt_data(raw_data, datalen, oss_url, key, subkey);
+	if (!data)
+		return false;
+
+	char purl[MAX_FILE_PATH];
+	char rank[MAX_FIELD_LN];
+	extract_csv(purl, data, 6, sizeof(purl));
+	extract_csv(rank, data, -1, sizeof(rank));
+	free(data);
+
+	if (!*purl)
+		return false;
+
+	purl_scan_ctx_t *ctx = (purl_scan_ctx_t *) ptr;
+	purl_entry_t *e = purl_entry_get(ctx, purl);
+
+	char url_hash_hex[MD5_LEN * 2 + 1];
+	ldb_bin_to_hex(key, MD5_LEN, url_hash_hex);
+	purl_entry_add_url_hash(e, url_hash_hex);
+
+	if (*rank)
+	{
+		int r = atoi(rank);
+		if (r > 0 && r < e->rank)
+			e->rank = r;
+	}
+
+	return false;
+}
+
+/**
+ * @brief file table recordset handler. Each record holds a 16 byte url id
+ * followed by the (encrypted) path; for every url id we query the url table.
+ */
+static bool handle_file_for_purls(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
+{
+	/* Bound the amount of files processed (same limit used during matching) */
+	if (iteration >= fetch_max_files)
+	{
+		scanlog("purl_scan: max file iterations reached: %d\n", fetch_max_files);
+		return true;
+	}
+
+	if (datalen < MD5_LEN)
+		return false;
+
+	/* Skip records pointing to the empty string md5 */
+	if (!memcmp(raw_data, empty_string_md5, MD5_LEN))
+		return false;
+
+	uint8_t url_id[MD5_LEN];
+	memcpy(url_id, raw_data, MD5_LEN);
+
+	ldb_fetch_recordset(NULL, oss_url, url_id, false, handle_url_for_purls, ptr);
+
+	((purl_scan_ctx_t *) ptr)->files_processed++;
+	return false;
+}
+
+int purl_scan(char *file_md5_hex)
+{
+	if (!file_md5_hex || !valid_md5(file_md5_hex))
+	{
+		fprintf(stdout, "Invalid file MD5: %s\n", file_md5_hex ? file_md5_hex : "(null)");
+		return EXIT_FAILURE;
+	}
+
+	uint8_t file_md5[MD5_LEN];
+	ldb_hex_to_bin(file_md5_hex, MD5_LEN * 2, file_md5);
+
+	purl_scan_ctx_t ctx;
+	memset(&ctx, 0, sizeof(ctx));
+
+	ldb_fetch_recordset(NULL, oss_file, file_md5, false, handle_file_for_purls, &ctx);
+
+	scanlog("purl_scan: %d unique purls found across %u files for %s\n", ctx.count, ctx.files_processed, file_md5_hex);
+
+	/* Sort entries (rank asc, then purl) and url hashes for a deterministic output */
+	purl_entry_t **sorted = NULL;
+	if (ctx.count)
+	{
+		sorted = malloc(ctx.count * sizeof(purl_entry_t *));
+		int i = 0;
+		for (purl_entry_t *e = ctx.head; e; e = e->next)
+		{
+			if (e->n_url_hashes > 1)
+				qsort(e->url_hashes, e->n_url_hashes, sizeof(char *), url_hash_cmp);
+			sorted[i++] = e;
+		}
+		qsort(sorted, ctx.count, sizeof(purl_entry_t *), purl_entry_ptr_cmp);
+	}
+
+	if (!quiet)
+	{
+		printf("{\"file_md5\": \"%s\", \"matches\": [", file_md5_hex);
+		for (int i = 0; i < ctx.count; i++)
+		{
+			purl_entry_t *e = sorted[i];
+			if (i)
+				printf(", ");
+			printf("{\"purl\": \"%s\", \"url_hashes\": [", e->purl);
+			for (int v = 0; v < e->n_url_hashes; v++)
+			{
+				if (v)
+					printf(", ");
+				printf("\"%s\"", e->url_hashes[v]);
+			}
+			printf("], \"rank\": %d}", e->rank);
+		}
+		printf("]}\n");
+		fflush(stdout);
+	}
+
+	/* Cleanup */
+	free(sorted);
+	purl_entry_t *e = ctx.head;
+	while (e)
+	{
+		purl_entry_t *next = e->next;
+		for (int v = 0; v < e->n_url_hashes; v++)
+			free(e->url_hashes[v]);
+		free(e->url_hashes);
+		free(e->purl);
+		free(e);
+		e = next;
+	}
+
+	return EXIT_SUCCESS;
+}
+
+/**
+ * @brief Resolve and report a single component identified by its url hash.
+ * Emits one JSON object of the form {"url_hash": "...", "component": {...}}.
+ */
+static void component_scan_one(const char *url_hash_hex)
+{
+	uint8_t url_hash[MD5_LEN];
+	ldb_hex_to_bin((char *) url_hash_hex, MD5_LEN * 2, url_hash);
+
+	scanlog("component_scan_one: looking up url_hash=%s (declared_components=%p)\n",
+		url_hash_hex, (void *) declared_components);
+
+	component_data_t *component = NULL;
+	int records = ldb_fetch_recordset(NULL, oss_url, url_hash, false, get_oldest_url, &component);
+	scanlog("component_scan_one: url_hash=%s -> %d records, selected purl=%s identified=%d\n",
+		url_hash_hex, records,
+		(component && component->purls[0]) ? component->purls[0] : "(none)",
+		component ? component->identified : -99);
+
+	printf("{\"url_hash\": \"%s\", \"component\": ", url_hash_hex);
+	if (!component)
+	{
+		printf("null");
+	}
+	else
+	{
+		/* Fill missing purl md5s (lazy step also done by print_json_component) */
+		for (int i = 0; i < MAX_PURLS; i++)
+		{
+			if (component->purls[i] && !component->purls_md5[i])
+			{
+				component->purls_md5[i] = malloc(MD5_LEN);
+				MD5((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
+			}
+		}
+
+		/* print_licenses (and other enrichers) look up comp->file_md5_ref
+		   unconditionally. We don't have a matched file in this mode, so
+		   point it at the url_md5 to avoid a NULL deref; the lookup will
+		   simply return no extra records. */
+		if (!component->file_md5_ref)
+			component->file_md5_ref = component->url_md5;
+
+		fetch_related_purls(component);
+		fill_main_url(component);
+
+		printf("{");
+		print_purl_array(component);
+
+		printf("\"vendor\": \"%s\",", component->vendor ? component->vendor : "");
+		printf("\"component\": \"%s\",", component->component ? component->component : "");
+
+		char *version_clean = string_clean(component->version);
+		printf("\"version\": \"%s\",", version_clean ? version_clean : "");
+
+		char *latest_clean = string_clean(component->latest_version);
+		printf("\"latest\": \"%s\",", latest_clean ? latest_clean : "");
+
+		printf("\"url\": \"%s\",", component->main_url ? component->main_url : (component->url ? component->url : ""));
+		printf("\"release_date\": \"%s\",", component->release_date ? component->release_date : "");
+
+		/* The lookup is by url hash, so report the url basename as the file */
+		char *file_field = NULL;
+		if (component->url)
+		{
+			char *url_copy = strdup(component->url);
+			file_field = strdup(basename(url_copy));
+			free(url_copy);
+		}
+		printf("\"file\": \"%s\",", file_field ? file_field : "");
+		free(file_field);
+
+		printf("\"rank\": %d", component->rank);
+
+		if (!(engine_flags & DISABLE_LICENSES))
+		{
+			print_licenses(component);
+			if (component->license_text)
+				printf(",%s", json_remove_invalid_char(component->license_text));
+		}
+
+		if (!(engine_flags & DISABLE_HEALTH))
+		{
+			if (!component->health_text)
+				print_health(component);
+			if (component->health_text)
+				printf(",%s", json_remove_invalid_char(component->health_text));
+
+			printf(",\"url_stats\":{");
+			if (component->url_stats[0] > 0)
+			{
+				printf("\"total_files\":%d,"
+					   "\"indexed_files\":%d,"
+					   "\"source_files\":%d,"
+					   "\"ignored_files\":%d,"
+					   "\"package_size\":%d",
+					   component->url_stats[0], component->url_stats[1], component->url_stats[2],
+					   component->url_stats[3], component->url_stats[4]);
+			}
+			printf("}");
+		}
+
+		if (!(engine_flags & DISABLE_DEPENDENCIES))
+		{
+			if (!component->dependency_text)
+				print_dependencies(component);
+			if (component->dependency_text)
+				printf(",%s", json_remove_invalid_char(component->dependency_text));
+		}
+
+		if (!(engine_flags & DISABLE_COPYRIGHTS))
+		{
+			print_copyrights(component);
+			if (component->copyright_text)
+				printf(",%s", component->copyright_text);
+		}
+
+		if (!(engine_flags & DISABLE_VULNERABILITIES))
+		{
+			print_vulnerabilities(component);
+			if (component->vulnerabilities_text)
+				printf(",%s", json_remove_invalid_char(component->vulnerabilities_text));
+		}
+
+		printf("}");
+	}
+	printf("}");
+
+	if (component)
+		component_data_free(component);
+}
+
+/**
+ * @brief Resolve and report the details of one or more components identified
+ * by url hash (url_id). Accepts a single hash or a comma-separated list.
+ *
+ * Output is always an array under the "results" key, one entry per input
+ * hash: {"results": [{"url_hash": "...", "component": {...}}, ...]}.
+ * Invalid hashes are skipped with a stderr warning.
+ *
+ * @param url_hash_list comma-separated url hashes in hex (32 chars each)
+ * @return EXIT_SUCCESS if at least one valid hash was processed,
+ *         EXIT_FAILURE if input is null/empty or no hash was valid
+ */
+int component_scan(char *url_hash_list)
+{
+	if (!url_hash_list || !*url_hash_list)
+	{
+		fprintf(stdout, "Invalid url hash list: %s\n", url_hash_list ? url_hash_list : "(null)");
+		return EXIT_FAILURE;
+	}
+
+	char *input = strdup(url_hash_list);
+	int emitted = 0;
+
+	if (!quiet)
+		printf("{\"results\": [");
+
+	char *saveptr = NULL;
+	for (char *tok = strtok_r(input, ",", &saveptr); tok; tok = strtok_r(NULL, ",", &saveptr))
+	{
+		while (*tok == ' ' || *tok == '\t') tok++;
+		char *end = tok + strlen(tok);
+		while (end > tok && (end[-1] == ' ' || end[-1] == '\t' || end[-1] == '\n' || end[-1] == '\r'))
+			*--end = '\0';
+
+		if (!*tok)
+			continue;
+
+		if (!valid_md5(tok))
+		{
+			fprintf(stderr, "Invalid url hash MD5, skipping: %s\n", tok);
+			continue;
+		}
+
+		if (!quiet)
+		{
+			if (emitted)
+				printf(", ");
+			component_scan_one(tok);
+		}
+		emitted++;
+	}
+
+	if (!quiet)
+	{
+		printf("]}\n");
+		fflush(stdout);
+	}
+
+	free(input);
+	return emitted > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+/**
+ * @brief Snippet-only scan with WFP coming from an arbitrary FILE*. Emits
+ * JSON listing the candidate file_md5s grouped by snippet region, with
+ * input/oss line ranges, filtered by the cohort tolerance set via -T.
+ */
+static int snippet_scan_stream(FILE *in)
+{
+	char *line = NULL;
+	size_t len = 0;
+	ssize_t lineln;
+
+	scan_data_t *scan = NULL;
+	char file_md5_hex[MD5_LEN * 2 + 1] = "";
+	char file_path[MAX_FILE_PATH] = "";
+	uint64_t file_size = 0;
+	bool got_file = false;
+
+	scanlog("--- SNIPPET SCAN ---\n");
+
+	while ((lineln = getline(&line, &len, in)) != -1)
+	{
+		trim(line);
+
+		if (!*line)
+			continue;
+
+		bool is_file = (memcmp(line, "file=", 5) == 0);
+		bool is_fh2  = (memcmp(line, "fh2=",  4) == 0);
+		bool is_hpsm = (memcmp(line, "hpsm=", 5) == 0);
+		bool is_bin  = (memcmp(line, "bin=",  4) == 0);
+		bool is_wfp  = (!is_file && !is_fh2 && !is_hpsm && !is_bin);
+
+		/* Snippet-only mode: ignore hpsm/bin payloads */
+		if (is_hpsm || is_bin)
+			continue;
+
+		if (is_file)
+		{
+			if (got_file)
+			{
+				fprintf(stderr, "snippet-scan: multiple file= entries received, ignoring extras\n");
+				continue;
+			}
+
+			const int tagln = 5;
+			if (strlen(line) < (size_t)(tagln + MD5_LEN * 2 + 1))
+			{
+				fprintf(stderr, "snippet-scan: malformed file= line\n");
+				free(line);
+				return EXIT_FAILURE;
+			}
+
+			char *hexmd5 = strndup(line + tagln, MD5_LEN * 2);
+			if (!hexmd5 || strlen(hexmd5) < MD5_LEN * 2 || !valid_md5(hexmd5))
+			{
+				fprintf(stderr, "snippet-scan: invalid md5 in file= line\n");
+				free(hexmd5);
+				free(line);
+				return EXIT_FAILURE;
+			}
+			strcpy(file_md5_hex, hexmd5);
+			free(hexmd5);
+
+			uint8_t *rec = (uint8_t *) strdup(line + tagln + MD5_LEN * 2 + 1);
+			char *target_path = field_n(2, (char *) rec);
+			if (!target_path)
+			{
+				fprintf(stderr, "snippet-scan: malformed file= line (missing path)\n");
+				free(rec);
+				free(line);
+				return EXIT_FAILURE;
+			}
+
+			strncpy(file_path, target_path, sizeof(file_path) - 1);
+			file_path[sizeof(file_path) - 1] = '\0';
+
+			char size_field[MAX_FIELD_LN] = "0";
+			extract_csv(size_field, (char *) rec, 1, sizeof(size_field));
+			file_size = strtoull(size_field, NULL, 10);
+
+			scan = scan_data_init(file_path,
+			                      scan_max_snippets,
+			                      scan_max_components,
+			                      scan_adjust_tolerance,
+			                      scan_ranking_threshold,
+			                      scan_min_match_hits,
+			                      scan_min_match_lines,
+			                      scan_range_tolerance,
+			                      scan_honor_file_extension);
+			scan->preload = true;
+			/* The input WFP has no reliable extension, and oss_file lookup
+			   inside snippet_extension_discard would discard valid hits. */
+			scan->snippet_honor_file_extension = false;
+			/* scan->file_size is a fixed 32-byte buffer; write the parsed
+			   numeric value, which always fits, instead of the raw field */
+			snprintf(scan->file_size, 32, "%llu", (unsigned long long) file_size);
+			ldb_hex_to_bin(file_md5_hex, MD5_LEN * 2, scan->md5);
+			strcpy(scan->source_md5, file_md5_hex);
+			free(rec);
+			got_file = true;
+			continue;
+		}
+
+		if (is_fh2 && scan && strlen(line) == MD5_LEN_HEX + 4)
+		{
+			ldb_hex_to_bin(&line[4], MD5_LEN_HEX, scan->md5_fh2);
+			scan->windows_line_endings = true;
+			continue;
+		}
+
+		if (is_wfp && scan && (scan->hash_count < MAX_HASHES_READ))
+		{
+			int line_ln = strlen(line);
+			for (int e = 0; e < line_ln; e++)
+				if (line[e] == '=' || line[e] == ',') line[e] = 0;
+
+			int line_nr = atoi(line);
+			char *hexhash = line + strlen(line) + 1;
+
+			while (*hexhash)
+			{
+				ldb_hex_to_bin(hexhash, 8, (uint8_t *) &scan->hashes[scan->hash_count]);
+				uint32_reverse((uint8_t *) &scan->hashes[scan->hash_count]);
+				scan->lines[scan->hash_count] = line_nr;
+				hexhash += strlen(hexhash) + 1;
+				scan->hash_count++;
+				if (scan->hash_count >= MAX_HASHES_READ)
+					break;
+			}
+		}
+	}
+	free(line);
+
+	if (!scan)
+	{
+		fprintf(stderr, "snippet-scan: no file= entry in WFP input\n");
+		return EXIT_FAILURE;
+	}
+
+	if (scan->hash_count == 0)
+	{
+		fprintf(stderr, "snippet-scan: no WFP hashes in WFP input\n");
+		scan_data_free(scan);
+		return EXIT_FAILURE;
+	}
+
+	scan->total_lines = scan->lines[scan->hash_count - 1];
+	scan->timer = microseconds_now();
+	scan->match_type = ldb_scan_snippets(scan);
+
+	if (scan->match_type != MATCH_NONE)
+		biggest_snippet(scan);
+
+	if (!quiet)
+	{
+		char *escaped_path = scape_slashes(file_path);
+		printf("{");
+		printf("\"file_md5\":\"%s\",", file_md5_hex);
+		printf("\"file_path\":\"%s\",", escaped_path ? escaped_path : "");
+		printf("\"file_size\":%llu,", (unsigned long long) file_size);
+		printf("\"total_lines\":%d,", scan->total_lines);
+		printf("\"tolerance_pct\":%.1f,", match_list_tolerance_get());
+		printf("\"snippet_groups\":[");
+
+		bool first_group = true;
+		for (int g = 0; g < scan->matches_list_array_index; g++)
+		{
+			match_list_t *list = scan->matches_list_array[g];
+			if (!list || !list->items)
+				continue;
+
+			if (!first_group)
+				printf(",");
+			first_group = false;
+
+			printf("{\"group_index\":%d,\"candidates\":[", g);
+			bool first_cand = true;
+			for (struct entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)
+			{
+				match_data_t *m = np->match;
+				if (!m)
+					continue;
+
+				char md5_hex[MD5_LEN * 2 + 1];
+				ldb_bin_to_hex(m->file_md5, MD5_LEN, md5_hex);
+
+				if (!first_cand)
+					printf(",");
+				first_cand = false;
+
+				printf("{\"file_md5\":\"%s\",",    md5_hex);
+				printf("\"hits\":%d,",              m->hits);
+				printf("\"lines_matched\":%d,",     m->lines_matched);
+				printf("\"matched_percent\":%d,",   m->matched_percent);
+				printf("\"input_line_ranges\":\"%s\",", m->line_ranges ? m->line_ranges : "");
+				printf("\"oss_line_ranges\":\"%s\"",    m->oss_ranges ? m->oss_ranges : "");
+				printf("}");
+			}
+			printf("]}");
+		}
+		printf("]}\n");
+		fflush(stdout);
+		free(escaped_path);
+	}
+
+	scan_data_free(scan);
+	return EXIT_SUCCESS;
+}
+
+int snippet_scan_stdin(void)
+{
+	return snippet_scan_stream(stdin);
+}
+
+int snippet_scan_string(const char *wfp)
+{
+	if (!wfp || !*wfp)
+	{
+		fprintf(stderr, "snippet-scan: empty WFP argument\n");
+		return EXIT_FAILURE;
+	}
+
+	FILE *in = fmemopen((void *) wfp, strlen(wfp), "r");
+	if (!in)
+	{
+		fprintf(stderr, "snippet-scan: failed to open WFP argument as stream\n");
+		return EXIT_FAILURE;
+	}
+
+	int rc = snippet_scan_stream(in);
+	fclose(in);
+	return rc;
+}
diff --git a/src/url.c b/src/url.c
index cbb21c1..eee229f 100644
--- a/src/url.c
+++ b/src/url.c
@@ -335,10 +335,10 @@ void purl_release_date(char *purl, char *date)
 bool get_oldest_url(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	char * url = decrypt_data(data, datalen, oss_url, key, subkey);
-	if (!url) 
+	if (!url)
 		return false;
 
-	//scanlog("url: %s\n", url);
+	scanlog("get_oldest_url iter=%d url_record='%s'\n", iteration, url);
 
 	/* Get oldest */
 	component_data_t **comp_address = ptr;
@@ -357,6 +357,9 @@ bool get_oldest_url(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data,
 		}
 		comp->identified = IDENTIFIED_NONE;
 		asset_declared(comp);
+		scanlog("get_oldest_url iter=%d purl[0]=%s identified=%d rank=%d release=%s\n",
+			iteration, comp->purls[0] ? comp->purls[0] : "(null)", comp->identified, comp->rank,
+			comp->release_date ? comp->release_date : "(null)");
 		purl_latest_version_add(comp);
 
 		if (component_rank_max > 0 && comp->rank > component_rank_max)