diff --git a/.github/workflows/test.yml b/.github/workflows/dependabot.yml similarity index 95% rename from .github/workflows/test.yml rename to .github/workflows/dependabot.yml index a5df2b5..94650a0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/dependabot.yml @@ -1,5 +1,5 @@ --- -name: Test +name: Dependabot Auto-Merge on: pull_request: @@ -8,8 +8,8 @@ on: jobs: test: + if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'HTTPArchive/tech-report-apis' runs-on: ubuntu-latest - if: github.head_ref != 'development' steps: - uses: actions/checkout@v6 - run: | @@ -19,7 +19,6 @@ jobs: dependabot: name: Dependabot auto-merge - if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'HTTPArchive/tech-report-apis' runs-on: ubuntu-latest needs: test diff --git a/README.md b/README.md index 3947ac2..53f7f42 100644 --- a/README.md +++ b/README.md @@ -279,52 +279,6 @@ curl --request GET \ ] ``` -### `GET /cwv-distribution` - -Provides per-bucket CWV metric distribution histograms for technologies, optionally filtered by geo and rank. - -#### CWV Distribution Parameters - -- `technology` (required): Technology name(s) - comma-separated list, e.g. `Wix,WordPress` -- `date` (required): Crawl date in `YYYY-MM-DD` format, e.g. `2026-02-01` -- `geo` (optional): Geographic filter (defaults to `ALL`). Use a country name such as `United States of America` for country-level data. -- `rank` (optional): Numeric rank ceiling, e.g. `10000`. Omit or set to `ALL` to include all ranks. - -#### CWV Distribution Response - -```bash -curl --request GET \ - --url 'https://{{HOST}}/v1/cwv-distribution?technology=WordPress&date=2026-02-01&geo=ALL' -``` - -Returns a JSON array where each element represents one histogram bucket for a technology/client/geo combination: - -```json -[ - { - "geo": "ALL", - "client": "mobile", - "technology": "WordPress", - "loading_bucket": 0, - "inp_bucket": 0, - "cls_bucket": 0, - "lcp_origins": 12345, - "inp_origins": 23456, - "cls_origins": 34567, - "fcp_origins": 11111, - "ttfb_origins": 22222 - }, - ... -] -``` - -Bucket semantics: - -- `loading_bucket` / `lcp_bucket` / `fcp_bucket` / `ttfb_bucket`: millisecond value (0–10000 in steps of 100) -- `inp_bucket`: `loading_bucket / 4` (INP scale) -- `cls_bucket`: `loading_bucket / 2000` (CLS scale) -- `*_origins`: count of distinct origins whose p75 value equals that bucket - ### `GET /lighthouse` Provides Lighthouse scores for technologies. @@ -432,6 +386,7 @@ Returns a JSON object with the following schema: ] ``` + ### `GET /audits` Provides Lighthouse audits for technologies. @@ -495,50 +450,6 @@ Returns a JSON object with the following schema: ] ``` -### `GET /geo-breakdown` - -Provides Core Web Vitals breakdown by geography for a given technology and rank. Returns a single month snapshot of CWV data (LCP, CLS, INP, TTFB) across all geographies. - -#### Geo Breakdown Parameters - -- `technology` (optional): Technology name(s) - comma-separated list (defaults to `ALL`) -- `rank` (optional): Traffic rank segment, e.g. `top 1000`, `top 10000`. Defaults to `ALL`. -- `end` (optional): Snapshot date in `YYYY-MM-DD` format. Defaults to the latest available date. - -#### Geo Breakdown Response - -```bash -curl --request GET \ - --url 'https://{{HOST}}/v1/geo-breakdown?technology=WordPress&rank=top%2010000' -``` - -Returns a JSON array where each element represents CWV data for a technology on a given date and geographic region: - -```json -[ - { - "date": "2026-02-01", - "geo": "United States of America", - "technology": "WordPress", - "vitals": [ - { - "mobile": { - "good_number": 12345, - "tested": 56789 - }, - "desktop": { - "good_number": 6789, - "tested": 10000 - }, - "name": "lcp" - }, - ... - ] - }, - ... -] -``` - ### `GET /ranks` Lists all available ranks. diff --git a/src/benchmarks/gcs_metadata.benchmark.js b/src/benchmarks/gcs_metadata.benchmark.js new file mode 100644 index 0000000..992d75b --- /dev/null +++ b/src/benchmarks/gcs_metadata.benchmark.js @@ -0,0 +1,97 @@ +import { performance } from 'perf_hooks'; + +// Simulate GCS file mock +class MockFile { + constructor(exists) { + this._exists = exists; + } + + async exists() { + // Simulate network delay + await new Promise(resolve => setTimeout(resolve, 50)); + return [this._exists]; + } + + async getMetadata() { + // Simulate network delay + await new Promise(resolve => setTimeout(resolve, 50)); + if (!this._exists) { + const error = new Error('Not Found'); + error.code = 404; + throw error; + } + return [{ contentType: 'application/json', size: 1234 }]; + } +} + +async function runBenchmark() { + console.log('--- GCS Metadata Optimization Benchmark ---'); + console.log('Simulating network delay of 50ms per GCS request\\n'); + + const iterations = 100; + console.log(`Running ${iterations} iterations for each scenario...\\n`); + + // --- Scenario 1: File Exists --- + console.log('SCENARIO 1: File Exists'); + const existingFile = new MockFile(true); + + // Old approach (exists() + getMetadata()) + let startOldExists = performance.now(); + for (let i = 0; i < iterations; i++) { + const [exists] = await existingFile.exists(); + if (exists) { + await existingFile.getMetadata(); + } + } + let endOldExists = performance.now(); + let oldExistsTime = endOldExists - startOldExists; + + // New approach (try-catch getMetadata()) + let startNewExists = performance.now(); + for (let i = 0; i < iterations; i++) { + try { + await existingFile.getMetadata(); + } catch (error) { + if (error.code !== 404) throw error; + } + } + let endNewExists = performance.now(); + let newExistsTime = endNewExists - startNewExists; + + console.log(`Old Approach (exists + getMetadata): ${oldExistsTime.toFixed(2)}ms`); + console.log(`New Approach (getMetadata only): ${newExistsTime.toFixed(2)}ms`); + console.log(`Improvement: ${((oldExistsTime - newExistsTime) / oldExistsTime * 100).toFixed(2)}% faster\\n`); + + // --- Scenario 2: File Does Not Exist --- + console.log('SCENARIO 2: File Does Not Exist'); + const missingFile = new MockFile(false); + + // Old approach (exists() + getMetadata()) + let startOldMissing = performance.now(); + for (let i = 0; i < iterations; i++) { + const [exists] = await missingFile.exists(); + if (exists) { + await missingFile.getMetadata(); + } + } + let endOldMissing = performance.now(); + let oldMissingTime = endOldMissing - startOldMissing; + + // New approach (try-catch getMetadata()) + let startNewMissing = performance.now(); + for (let i = 0; i < iterations; i++) { + try { + await missingFile.getMetadata(); + } catch (error) { + if (error.code !== 404) throw error; + } + } + let endNewMissing = performance.now(); + let newMissingTime = endNewMissing - startNewMissing; + + console.log(`Old Approach (exists only): ${oldMissingTime.toFixed(2)}ms`); + console.log(`New Approach (getMetadata only): ${newMissingTime.toFixed(2)}ms`); + console.log(`Difference: ${((oldMissingTime - newMissingTime) / oldMissingTime * 100).toFixed(2)}% (Expected to be ~0% as both make 1 network call)\\n`); +} + +runBenchmark().catch(console.error); diff --git a/src/controllers/categoriesController.js b/src/controllers/categoriesController.js index d2dfb22..5b2ed8a 100644 --- a/src/controllers/categoriesController.js +++ b/src/controllers/categoriesController.js @@ -1,13 +1,60 @@ -import { handleControllerError, sendJSONResponse } from '../utils/controllerHelpers.js'; -import { queryCategories } from '../utils/reportService.js'; +import { firestore } from '../utils/db.js'; +import { executeQuery, validateArrayParameter } from '../utils/controllerHelpers.js'; +/** + * List categories with optional filtering and field selection + */ const listCategories = async (req, res) => { - try { - const data = await queryCategories(req.query); - sendJSONResponse(req, res, data); - } catch (error) { - handleControllerError(res, error, 'fetching categories'); - } + const queryBuilder = async (params) => { + /* + // Validate parameters + const supportedParams = ['category', 'onlyname', 'fields']; + const providedParams = Object.keys(params); + const unsupportedParams = providedParams.filter(param => !supportedParams.includes(param)); + + if (unsupportedParams.length > 0) { + const error = new Error(`Unsupported parameters: ${unsupportedParams.join(', ')}.`); + error.statusCode = 400; + throw error; + } + */ + + const isOnlyNames = params.onlyname || typeof params.onlyname === 'string'; + const hasCustomFields = params.fields && !isOnlyNames; + + let query = firestore.collection('categories').orderBy('category', 'asc'); + + // Apply category filter with validation + const categoryParam = params.category || 'ALL'; + if (categoryParam !== 'ALL') { + const categories = validateArrayParameter(categoryParam, 'category'); + if (categories.length > 0) { + query = query.where('category', 'in', categories); + } + } + + // Apply field selection + if (isOnlyNames) { + query = query.select('category'); + } else if (hasCustomFields) { + const requestedFields = params.fields.split(',').map(f => f.trim()); + query = query.select(...requestedFields); + } + + return query; + }; + + const dataProcessor = (data, params) => { + const isOnlyNames = params.onlyname || typeof params.onlyname === 'string'; + + if (isOnlyNames) { + return data.map(item => item.category); + } + + return data; + }; + + await executeQuery(req, res, 'categories', queryBuilder, dataProcessor); }; export { listCategories }; diff --git a/src/controllers/cdnController.js b/src/controllers/cdnController.js index b44576b..28e5619 100644 --- a/src/controllers/cdnController.js +++ b/src/controllers/cdnController.js @@ -53,16 +53,19 @@ export const proxyReportsFile = async (req, res, filePath) => { const bucket = storage.bucket(BUCKET_NAME); const file = bucket.file(objectPath); - // Check if file exists - const [exists] = await file.exists(); - if (!exists) { - res.statusCode = 404; - res.end(JSON.stringify({ error: 'File not found' })); - return; - } - // Get file metadata for content type and caching - const [metadata] = await file.getMetadata(); + // This implicitly checks if the file exists, saving a network request + let metadata; + try { + [metadata] = await file.getMetadata(); + } catch (error) { + if (error.code === 404) { + res.statusCode = 404; + res.end(JSON.stringify({ error: 'File not found' })); + return; + } + throw error; + } // Determine content type const contentType = metadata.contentType || getMimeType(objectPath); diff --git a/src/controllers/cwvDistributionController.js b/src/controllers/cwvDistributionController.js deleted file mode 100644 index 1995c4d..0000000 --- a/src/controllers/cwvDistributionController.js +++ /dev/null @@ -1,52 +0,0 @@ -import { queryCWVDistribution } from '../utils/reportService.js'; -import { - handleControllerError, - generateETag, - isModified, - sendValidationError -} from '../utils/controllerHelpers.js'; - -/** - * GET /v1/cwv-distribution - * - * Query parameters: - * technology (required) - comma-separated list of technologies, e.g. "Wix,WordPress" - * date (required) - crawl date in YYYY-MM-DD format, e.g. "2026-02-01" - * rank (optional) - numeric rank ceiling, e.g. "10000". Omit or set to "ALL" to include all ranks. - * geo (optional) - geographic filter, e.g. "United States of America". Defaults to "ALL". - */ -export const listCWVDistributionData = async (req, res) => { - try { - const params = req.query; - - const errors = []; - if (!params.technology) errors.push(['technology', 'missing technology parameter']); - if (!params.date) errors.push(['date', 'missing date parameter']); - if (errors.length > 0) { - sendValidationError(res, errors); - return; - } - - const rows = await queryCWVDistribution({ - technology: params.technology, - date: params.date, - geo: params.geo || 'ALL', - rank: params.rank && params.rank !== 'ALL' ? params.rank : null, - }); - - const jsonData = JSON.stringify(rows); - const etag = generateETag(jsonData); - res.setHeader('ETag', `"${etag}"`); - if (!isModified(req, etag)) { - res.statusCode = 304; - res.end(); - return; - } - - res.statusCode = 200; - res.end(jsonData); - - } catch (error) { - handleControllerError(res, error, 'fetching CWV distribution data'); - } -}; diff --git a/src/controllers/geosController.js b/src/controllers/geosController.js index 839aaaa..c670aea 100644 --- a/src/controllers/geosController.js +++ b/src/controllers/geosController.js @@ -1,13 +1,17 @@ -import { handleControllerError, sendJSONResponse } from '../utils/controllerHelpers.js'; -import { queryGeos } from '../utils/reportService.js'; +import { firestore } from '../utils/db.js'; +import { executeQuery } from '../utils/controllerHelpers.js'; +/** + * List all geographic locations from database + */ const listGeos = async (req, res) => { - try { - const data = await queryGeos(); - sendJSONResponse(req, res, data); - } catch (error) { - handleControllerError(res, error, 'fetching geos'); - } + const queryBuilder = async () => { + return firestore.collection('geos').orderBy('mobile_origins', 'desc').select('geo'); + }; + + await executeQuery(req, res, 'geos', queryBuilder); }; -export { listGeos }; +export { + listGeos +}; diff --git a/src/controllers/ranksController.js b/src/controllers/ranksController.js index 98d9f4f..5fd4aae 100644 --- a/src/controllers/ranksController.js +++ b/src/controllers/ranksController.js @@ -1,13 +1,17 @@ -import { handleControllerError, sendJSONResponse } from '../utils/controllerHelpers.js'; -import { queryRanks } from '../utils/reportService.js'; +import { firestore } from '../utils/db.js'; +import { executeQuery } from '../utils/controllerHelpers.js'; +/** + * List all rank options from database + */ const listRanks = async (req, res) => { - try { - const data = await queryRanks(); - sendJSONResponse(req, res, data); - } catch (error) { - handleControllerError(res, error, 'fetching ranks'); - } + const queryBuilder = async () => { + return firestore.collection('ranks').orderBy('mobile_origins', 'desc').select('rank'); + }; + + await executeQuery(req, res, 'ranks', queryBuilder); }; -export { listRanks }; +export { + listRanks +}; diff --git a/src/controllers/reportController.js b/src/controllers/reportController.js index 25e0969..71ea30a 100644 --- a/src/controllers/reportController.js +++ b/src/controllers/reportController.js @@ -1,17 +1,149 @@ -import { handleControllerError, sendJSONResponse } from '../utils/controllerHelpers.js'; -import { queryReport } from '../utils/reportService.js'; +import { firestoreOld } from '../utils/db.js'; +const firestore = firestoreOld; + +import { + REQUIRED_PARAMS, + validateRequiredParams, + sendValidationError, + getLatestDate, + handleControllerError, + validateArrayParameter, + generateETag, + isModified +} from '../utils/controllerHelpers.js'; + +/** + * Configuration for different report types + */ +const REPORT_CONFIGS = { + adoption: { + table: 'adoption', + dataField: 'adoption' + }, + pageWeight: { + table: 'page_weight', + dataField: 'pageWeight' // TODO: change to page_weight once migrated to new Firestore DB + }, + lighthouse: { + table: 'lighthouse', + dataField: 'lighthouse' + }, + cwv: { + table: 'core_web_vitals', + dataField: 'vitals' + }, + audits: { + table: 'audits', + dataField: 'audits' + } +}; + +/** + * Generic report data controller factory + * Creates controllers for adoption, pageWeight, lighthouse, and cwv data. + * Pass { crossGeo: true } to get a cross-geography snapshot (omits geo filter, + * includes geo in projection, returns a single month of data). + */ +const createReportController = (reportType, { crossGeo = false } = {}) => { + const config = REPORT_CONFIGS[reportType]; + if (!config) { + throw new Error(`Unknown report type: ${reportType}`); + } -const createReportController = (reportType, defaults = {}) => { return async (req, res) => { try { - const data = await queryReport(reportType, { ...defaults, ...req.query }); - sendJSONResponse(req, res, data); + const params = req.query; + + /* + // Validate supported parameters + const supportedParams = ['technology', 'geo', 'rank', 'start', 'end']; + const providedParams = Object.keys(params); + const unsupportedParams = providedParams.filter(param => !supportedParams.includes(param)); + + if (unsupportedParams.length > 0) { + const error = new Error(`Unsupported parameters: ${unsupportedParams.join(', ')}.`); + error.statusCode = 400; + throw error; + } + */ + + // Validate required parameters using shared utility + const errors = validateRequiredParams(params, []); + + if (errors) { + sendValidationError(res, errors); + return; + } + + // Default technology, geo, and rank to 'ALL' if missing or empty + const technologyParam = params.technology || 'ALL'; + const geoParam = params.geo || 'ALL'; + const rankParam = params.rank || 'ALL'; + + // Validate and process technology array + const techArray = validateArrayParameter(technologyParam, 'technology'); + + // Build Firestore query + let query = firestore.collection(config.table); + + query = query.where('rank', '==', rankParam); + query = query.where('technology', 'in', techArray); + + // Apply version filter with special handling for 'ALL' case + if (params.version && techArray.length === 1) { + //query = query.where('version', '==', params.version); // TODO: Uncomment when migrating to a new data schema + } else { + //query = query.where('version', '==', 'ALL'); + } + + if (crossGeo) { + // Cross-geo: single-month snapshot, all geographies included. + // Use 'end' param if provided, otherwise default to latest available date. + const snapshotDate = params.end || await getLatestDate(firestore, config.table); + query = query.where('date', '==', snapshotDate); + query = query.select('date', 'technology', 'geo', config.dataField); + } else { + // Normal time-series: filter by geo, apply date range, no geo in projection. + query = query.where('geo', '==', geoParam); + + // Handle 'latest' date substitution + let startDate = params.start; + if (startDate === 'latest') { + startDate = await getLatestDate(firestore, config.table); + } + + if (startDate) query = query.where('date', '>=', startDate); + if (params.end) query = query.where('date', '<=', params.end); + + query = query.select('date', 'technology', config.dataField); + } + + // Execute query + const snapshot = await query.get(); + const data = []; + snapshot.forEach(doc => { + data.push(doc.data()); + }); + + // Send response with ETag support + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } + res.statusCode = 200; + res.end(jsonData); + } catch (error) { handleControllerError(res, error, `fetching ${reportType} data`); } }; }; +// Export individual controller functions export const listAuditsData = createReportController('audits'); export const listAdoptionData = createReportController('adoption'); export const listCWVTechData = createReportController('cwv'); diff --git a/src/controllers/technologiesController.js b/src/controllers/technologiesController.js index b991160..85c443e 100644 --- a/src/controllers/technologiesController.js +++ b/src/controllers/technologiesController.js @@ -1,13 +1,75 @@ -import { handleControllerError, sendJSONResponse } from '../utils/controllerHelpers.js'; -import { queryTechnologies } from '../utils/reportService.js'; +import { firestore } from '../utils/db.js'; +import { executeQuery, validateTechnologyArray, validateArrayParameter, FIRESTORE_IN_LIMIT } from '../utils/controllerHelpers.js'; +/** + * List technologies with optional filtering and field selection + */ const listTechnologies = async (req, res) => { - try { - const data = await queryTechnologies(req.query); - sendJSONResponse(req, res, data); - } catch (error) { - handleControllerError(res, error, 'fetching technologies'); - } + const queryBuilder = async (params) => { + /* + // Validate parameters + const supportedParams = ['technology', 'category', 'onlyname', 'fields']; + const providedParams = Object.keys(params); + const unsupportedParams = providedParams.filter(param => !supportedParams.includes(param)); + + if (unsupportedParams.length > 0) { + const error = new Error(`Unsupported parameters: ${unsupportedParams.join(', ')}.`); + error.statusCode = 400; + throw error; + } + */ + + const isOnlyNames = params.onlyname || typeof params.onlyname === 'string'; + const hasCustomFields = params.fields && !isOnlyNames; + + let query = firestore.collection('technologies').orderBy('technology', 'asc'); + + // Apply technology filter with validation + const technologyParam = params.technology || 'ALL'; + if (technologyParam !== 'ALL') { + const technologies = validateTechnologyArray(technologyParam); + if (technologies === null) { + throw new Error(`Too many technologies specified. Maximum ${FIRESTORE_IN_LIMIT} allowed.`); + } + if (technologies.length > 0) { + query = query.where('technology', 'in', technologies); + } + } + + // Apply category filter with validation + if (params.category) { + const categories = validateArrayParameter(params.category, 'category'); + if (categories.length > 0) { + query = query.where('category_obj', 'array-contains-any', categories); + } + } + + // Apply field selection + if (isOnlyNames) { + query = query.select('technology'); + } else if (hasCustomFields) { + const requestedFields = params.fields.split(',').map(f => f.trim()); + query = query.select(...requestedFields); + } else { + query = query.select('technology', 'category', 'description', 'icon', 'origins'); + } + + return query; + }; + + const dataProcessor = (data, params) => { + const isOnlyNames = params.onlyname || typeof params.onlyname === 'string'; + + if (isOnlyNames) { + return data.map(item => item.technology); + } + + return data; + }; + + await executeQuery(req, res, 'technologies', queryBuilder, dataProcessor); }; -export { listTechnologies }; +export { + listTechnologies +}; diff --git a/src/controllers/versionsController.js b/src/controllers/versionsController.js index b69fd8e..d79dfcb 100644 --- a/src/controllers/versionsController.js +++ b/src/controllers/versionsController.js @@ -1,13 +1,55 @@ -import { handleControllerError, sendJSONResponse } from '../utils/controllerHelpers.js'; -import { queryVersions } from '../utils/reportService.js'; +import { firestore } from '../utils/db.js'; +import { executeQuery, validateTechnologyArray, FIRESTORE_IN_LIMIT } from '../utils/controllerHelpers.js'; +/** + * List versions with optional technology filtering + */ const listVersions = async (req, res) => { - try { - const data = await queryVersions(req.query); - sendJSONResponse(req, res, data); - } catch (error) { - handleControllerError(res, error, 'fetching versions'); - } + const queryBuilder = async (params) => { + /* + // Validate parameters + const supportedParams = ['version', 'technology', 'category', 'onlyname', 'fields']; + const providedParams = Object.keys(params); + const unsupportedParams = providedParams.filter(param => !supportedParams.includes(param)); + + if (unsupportedParams.length > 0) { + const error = new Error(`Unsupported parameters: ${unsupportedParams.join(', ')}.`); + error.statusCode = 400; + throw error; + } + */ + + let query = firestore.collection('versions'); + + // Apply technology filter with validation + const technologyParam = params.technology || 'ALL'; + if (technologyParam !== 'ALL') { + const technologies = validateTechnologyArray(technologyParam); + if (technologies === null) { + throw new Error(`Too many technologies specified. Maximum ${FIRESTORE_IN_LIMIT} allowed.`); + } + if (technologies.length > 0) { + query = query.where('technology', 'in', technologies); + } + } + + // Apply version filter + if (params.version) { + query = query.where('version', '==', params.version); + } + + // Apply field selection + if (params.fields) { + const requestedFields = params.fields.split(',').map(f => f.trim()); + query = query.select(...requestedFields); + } + + return query; + }; + + await executeQuery(req, res, 'versions', queryBuilder); }; -export { listVersions }; +export { + listVersions +}; diff --git a/src/index.js b/src/index.js index 81f8f13..ec23e22 100644 --- a/src/index.js +++ b/src/index.js @@ -1,46 +1,59 @@ import functions from '@google-cloud/functions-framework'; -import { sendJSONResponse } from './utils/controllerHelpers.js'; +import { sendJSONResponse, isModified } from './utils/controllerHelpers.js'; -const CONTROLLER_MODULES = { - technologies: './controllers/technologiesController.js', - categories: './controllers/categoriesController.js', - adoption: './controllers/reportController.js', - cwvtech: './controllers/reportController.js', - lighthouse: './controllers/reportController.js', - pageWeight: './controllers/reportController.js', - audits: './controllers/reportController.js', - geoBreakdown: './controllers/reportController.js', - ranks: './controllers/ranksController.js', - geos: './controllers/geosController.js', - versions: './controllers/versionsController.js', - static: './controllers/cdnController.js', - cwvDistribution: './controllers/cwvDistributionController.js', +// Dynamic imports for better performance - only load when needed +const controllers = { + technologies: null, + categories: null, + adoption: null, + cwvtech: null, + lighthouse: null, + pageWeight: null, + audits: null, + ranks: null, + geos: null, + versions: null, + geoBreakdown: null, + static: null }; -const controllers = {}; - +// Helper function to dynamically import controllers const getController = async (name) => { if (!controllers[name]) { - controllers[name] = await import(CONTROLLER_MODULES[name]); + switch (name) { + case 'technologies': + controllers[name] = await import('./controllers/technologiesController.js'); + break; + case 'categories': + controllers[name] = await import('./controllers/categoriesController.js'); + break; + case 'adoption': + case 'cwvtech': + case 'lighthouse': + case 'pageWeight': + case 'audits': + controllers[name] = await import('./controllers/reportController.js'); + break; + case 'ranks': + controllers[name] = await import('./controllers/ranksController.js'); + break; + case 'geos': + controllers[name] = await import('./controllers/geosController.js'); + break; + case 'versions': + controllers[name] = await import('./controllers/versionsController.js'); + break; + case 'geoBreakdown': + controllers[name] = await import('./controllers/reportController.js'); + break; + case 'static': + controllers[name] = await import('./controllers/cdnController.js'); + break; + } } return controllers[name]; }; -const V1_ROUTES = { - '/v1/technologies': ['technologies', 'listTechnologies'], - '/v1/categories': ['categories', 'listCategories'], - '/v1/adoption': ['adoption', 'listAdoptionData'], - '/v1/cwv': ['cwvtech', 'listCWVTechData'], - '/v1/lighthouse': ['lighthouse', 'listLighthouseData'], - '/v1/page-weight': ['pageWeight', 'listPageWeightData'], - '/v1/audits': ['audits', 'listAuditsData'], - '/v1/ranks': ['ranks', 'listRanks'], - '/v1/geos': ['geos', 'listGeos'], - '/v1/versions': ['versions', 'listVersions'], - '/v1/geo-breakdown': ['geoBreakdown', 'listGeoBreakdownData'], - '/v1/cwv-distribution': ['cwvDistribution', 'listCWVDistributionData'] -}; - // Helper function to set CORS headers const setCORSHeaders = (res) => { res.setHeader('Access-Control-Allow-Origin', '*'); @@ -63,36 +76,6 @@ const setCommonHeaders = (res) => { // Route handler function const handleRequest = async (req, res) => { try { - // Parse URL path first so we can route /mcp before setting common headers - const pathname = req.path || req.url.split('?')[0]; - - // MCP endpoint — handled before common headers; transport owns the response - if (pathname === '/mcp') { - setCORSHeaders(res); - res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS'); - if (req.method === 'OPTIONS') { - res.statusCode = 204; - res.end(); - return; - } - // HEAD is used by health checks / probes — respond quickly without delegating to MCP transport - if (req.method === 'HEAD') { - res.statusCode = 200; - res.end(); - return; - } - // Only GET, POST, DELETE are valid MCP methods; reject everything else cleanly - if (!['GET', 'POST', 'DELETE'].includes(req.method)) { - res.statusCode = 405; - res.setHeader('Allow', 'GET, POST, DELETE, OPTIONS, HEAD'); - res.end(); - return; - } - const { handleMcp } = await import('./mcpHandler.js'); - await handleMcp(req, res); - return; - } - setCommonHeaders(res); // Handle OPTIONS requests for CORS preflight @@ -102,13 +85,49 @@ const handleRequest = async (req, res) => { return; } + // Parse URL path - robustly handle Express (req.path) or native Node (req.url) + const pathname = req.path || req.url.split('?')[0]; + + // Route handling if (pathname === '/' && req.method === 'GET') { - sendJSONResponse(req, res, { status: 'ok' }); - } else if (req.method === 'GET' && V1_ROUTES[pathname]) { - const [controllerKey, handlerName] = V1_ROUTES[pathname]; - const controller = await getController(controllerKey); - await controller[handlerName](req, res); + // Health check endpoint + const data = { status: 'ok' }; + sendJSONResponse(res, data); + } else if (pathname === '/v1/technologies' && req.method === 'GET') { + const { listTechnologies } = await getController('technologies'); + await listTechnologies(req, res); + } else if (pathname === '/v1/categories' && req.method === 'GET') { + const { listCategories } = await getController('categories'); + await listCategories(req, res); + } else if (pathname === '/v1/adoption' && req.method === 'GET') { + const { listAdoptionData } = await getController('adoption'); + await listAdoptionData(req, res); + } else if (pathname === '/v1/cwv' && req.method === 'GET') { + const { listCWVTechData } = await getController('cwvtech'); + await listCWVTechData(req, res); + } else if (pathname === '/v1/lighthouse' && req.method === 'GET') { + const { listLighthouseData } = await getController('lighthouse'); + await listLighthouseData(req, res); + } else if (pathname === '/v1/page-weight' && req.method === 'GET') { + const { listPageWeightData } = await getController('pageWeight'); + await listPageWeightData(req, res); + } else if (pathname === '/v1/audits' && req.method === 'GET') { + const { listAuditsData } = await getController('audits'); + await listAuditsData(req, res); + } else if (pathname === '/v1/ranks' && req.method === 'GET') { + const { listRanks } = await getController('ranks'); + await listRanks(req, res); + } else if (pathname === '/v1/geos' && req.method === 'GET') { + const { listGeos } = await getController('geos'); + await listGeos(req, res); + } else if (pathname === '/v1/versions' && req.method === 'GET') { + const { listVersions } = await getController('versions'); + await listVersions(req, res); + } else if (pathname === '/v1/geo-breakdown' && req.method === 'GET') { + const { listGeoBreakdownData } = await getController('geoBreakdown'); + await listGeoBreakdownData(req, res); } else if (pathname.startsWith('/v1/static/') && req.method === 'GET') { + // GCS proxy endpoint for reports files const filePath = decodeURIComponent(pathname.replace('/v1/static/', '')); if (!filePath) { res.statusCode = 400; @@ -118,6 +137,7 @@ const handleRequest = async (req, res) => { const { proxyReportsFile } = await getController('static'); await proxyReportsFile(req, res, filePath); } else { + // 404 Not Found res.statusCode = 404; res.end(JSON.stringify({ error: 'Not Found' })); } diff --git a/src/mcpHandler.js b/src/mcpHandler.js deleted file mode 100644 index 4785d53..0000000 --- a/src/mcpHandler.js +++ /dev/null @@ -1,207 +0,0 @@ -import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; -import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; -import { z } from 'zod'; - -import { - queryTechnologies, - queryCategories, - queryReport, - queryRanks, - queryGeos, - queryVersions, - queryCWVDistribution, -} from './utils/reportService.js'; - -const createMcpServer = () => { - const server = new McpServer({ - name: 'tech-report', - version: '1.0.0', - }); - - server.tool( - 'search_technologies', - 'Search and filter web technologies tracked by HTTP Archive Tech Report. Returns technology metadata including categories, descriptions, and origin counts.', - { - technology: z.string().optional().describe('Comma-separated technology names to filter by (e.g. "WordPress,Drupal")'), - category: z.string().optional().describe('Comma-separated category names to filter by (e.g. "CMS,CDN")'), - sort: z.enum(['name']).optional().describe('Sort results by "name" (defaults to popularity)'), - limit: z.number().optional().describe('Limit the number of results returned'), - }, - async ({ technology, category, sort, limit }) => { - const data = await queryTechnologies({ technology, category, sort, limit }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'list_categories', - 'List all technology categories tracked by HTTP Archive Tech Report (e.g. CMS, CDN, JavaScript, Analytics).', - { - category: z.string().optional().describe('Comma-separated category names to filter results'), - sort: z.enum(['name']).optional().describe('Sort results by "name" (defaults to popularity)'), - limit: z.number().optional().describe('Limit the number of results returned'), - }, - async ({ category, sort, limit }) => { - const data = await queryCategories({ category, sort, limit }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_adoption_metrics', - 'Get web technology adoption metrics over time from HTTP Archive. Returns the percentage of websites using a technology for a given geography, rank segment, and date range.', - { - technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "WordPress,Drupal")'), - geo: z.string().optional().describe('Geographic region (e.g. "ALL", "US", "GB"). Defaults to "ALL"'), - rank: z.string().optional().describe('Traffic rank segment (e.g. "ALL", "top 1000", "top 10000"). Defaults to "ALL"'), - start: z.string().optional().describe('Start date in YYYY-MM-DD format, or "latest" for most recent data'), - end: z.string().optional().describe('End date in YYYY-MM-DD format'), - }, - async ({ technology, geo, rank, start, end }) => { - const data = await queryReport('adoption', { technology, geo, rank, start, end }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_cwv_metrics', - 'Get Core Web Vitals (CWV) metrics for websites using specific web technologies. Returns good/needs improvement/poor rates for LCP, CLS, INP, and TTFB.', - { - technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "WordPress,Drupal")'), - geo: z.string().optional().describe('Geographic region (e.g. "ALL", "US", "GB"). Defaults to "ALL"'), - rank: z.string().optional().describe('Traffic rank segment (e.g. "ALL", "top 1000", "top 10000"). Defaults to "ALL"'), - start: z.string().optional().describe('Start date in YYYY-MM-DD format, or "latest" for most recent data'), - end: z.string().optional().describe('End date in YYYY-MM-DD format'), - }, - async ({ technology, geo, rank, start, end }) => { - const data = await queryReport('cwv', { technology, geo, rank, start, end }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_lighthouse_metrics', - 'Get Google Lighthouse audit scores for websites using specific web technologies. Returns median scores for Performance, Accessibility, Best Practices, and SEO.', - { - technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "WordPress,Drupal")'), - geo: z.string().optional().describe('Geographic region (e.g. "ALL", "US", "GB"). Defaults to "ALL"'), - rank: z.string().optional().describe('Traffic rank segment (e.g. "ALL", "top 1000", "top 10000"). Defaults to "ALL"'), - start: z.string().optional().describe('Start date in YYYY-MM-DD format, or "latest" for most recent data'), - end: z.string().optional().describe('End date in YYYY-MM-DD format'), - }, - async ({ technology, geo, rank, start, end }) => { - const data = await queryReport('lighthouse', { technology, geo, rank, start, end }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_page_weight_metrics', - 'Get page weight and size metrics for websites using specific web technologies. Returns total page weight, JavaScript size, CSS size, and other resource sizes in bytes.', - { - technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "WordPress,Drupal")'), - geo: z.string().optional().describe('Geographic region (e.g. "ALL", "US", "GB"). Defaults to "ALL"'), - rank: z.string().optional().describe('Traffic rank segment (e.g. "ALL", "top 1000", "top 10000"). Defaults to "ALL"'), - start: z.string().optional().describe('Start date in YYYY-MM-DD format, or "latest" for most recent data'), - end: z.string().optional().describe('End date in YYYY-MM-DD format'), - }, - async ({ technology, geo, rank, start, end }) => { - const data = await queryReport('pageWeight', { technology, geo, rank, start, end }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_audits_metrics', - 'Get web performance and quality audit metrics for websites using specific technologies, sourced from HTTP Archive crawl data.', - { - technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "WordPress,Drupal")'), - geo: z.string().optional().describe('Geographic region (e.g. "ALL", "US", "GB"). Defaults to "ALL"'), - rank: z.string().optional().describe('Traffic rank segment (e.g. "ALL", "top 1000", "top 10000"). Defaults to "ALL"'), - start: z.string().optional().describe('Start date in YYYY-MM-DD format, or "latest" for most recent data'), - end: z.string().optional().describe('End date in YYYY-MM-DD format'), - }, - async ({ technology, geo, rank, start, end }) => { - const data = await queryReport('audits', { technology, geo, rank, start, end }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_geo_breakdown', - 'Get Core Web Vitals breakdown by geography for a given technology, rank, and snapshot date. Returns a single month of CWV data (LCP, CLS, INP, TTFB) across all geographies.', - { - technology: z.string().optional().describe('Comma-separated technology names (e.g. "WordPress" or "WordPress,Drupal"). Defaults to "ALL"'), - rank: z.string().optional().describe('Traffic rank segment (e.g. "ALL", "top 1000", "top 10000"). Defaults to "ALL"'), - end: z.string().optional().describe('Snapshot date in YYYY-MM-DD format. Defaults to the latest available date'), - }, - async ({ technology, rank, end }) => { - const data = await queryReport('cwv', { crossGeo: true, technology, rank, end }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'get_cwv_distribution', - 'Get Core Web Vitals metric distribution histograms for websites using specific web technologies. Returns per-bucket origin counts for LCP, INP, CLS, FCP, and TTFB, optionally filtered by geography and rank.', - { - technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "Wix,WordPress")'), - date: z.string().describe('Crawl date in YYYY-MM-DD format (e.g. "2026-02-01")'), - geo: z.string().optional().describe('Geographic filter — a country name (e.g. "United States of America") or "ALL" for global data. Defaults to "ALL"'), - rank: z.string().optional().describe('Numeric rank ceiling (e.g. "10000"). Omit or set to "ALL" for all ranks'), - }, - async ({ technology, date, geo, rank }) => { - const data = await queryCWVDistribution({ technology, date, geo: geo || 'ALL', rank: rank && rank !== 'ALL' ? rank : null }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'list_ranks', - 'List available traffic rank segments for filtering Tech Report data (e.g. "top 1000", "top 10000", "top 100000", "ALL").', - async () => { - const data = await queryRanks(); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'list_geos', - 'List available geographic regions for filtering Tech Report data (e.g. "ALL", "US", "GB", "IN").', - async () => { - const data = await queryGeos(); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - server.tool( - 'list_versions', - 'List technology versions tracked in HTTP Archive Tech Report.', - { - technology: z.string().optional().describe('Comma-separated technology names to filter versions'), - version: z.string().optional().describe('Exact version string to look up'), - }, - async ({ technology, version }) => { - const data = await queryVersions({ technology, version }); - return { content: [{ type: 'text', text: JSON.stringify(data) }] }; - } - ); - - return server; -}; - -export const handleMcp = async (req, res) => { - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: undefined, // stateless — safe for Cloud Run - }); - - const server = createMcpServer(); - await server.connect(transport); - - res.on('close', () => { - transport.close(); - server.close(); - }); - - await transport.handleRequest(req, res, req.body); -}; diff --git a/src/package-lock.json b/src/package-lock.json index 997b9e3..6b7c05e 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -8,12 +8,9 @@ "name": "report-api", "version": "1.0.0", "dependencies": { - "@google-cloud/bigquery": "^7.9.1", "@google-cloud/firestore": "8.3.0", "@google-cloud/functions-framework": "^5.0.2", - "@google-cloud/storage": "7.19.0", - "@modelcontextprotocol/sdk": "^1.0.0", - "zod": "^3.0.0" + "@google-cloud/storage": "7.19.0" }, "devDependencies": { "@jest/transform": "^30.3.0", @@ -54,7 +51,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -573,61 +569,6 @@ "tslib": "^2.4.0" } }, - "node_modules/@google-cloud/bigquery": { - "version": "7.9.4", - "resolved": "https://registry.npmjs.org/@google-cloud/bigquery/-/bigquery-7.9.4.tgz", - "integrity": "sha512-C7jeI+9lnCDYK3cRDujcBsPgiwshWKn/f0BiaJmClplfyosCLfWE83iGQ0eKH113UZzjR9c9q7aZQg0nU388sw==", - "license": "Apache-2.0", - "dependencies": { - "@google-cloud/common": "^5.0.0", - "@google-cloud/paginator": "^5.0.2", - "@google-cloud/precise-date": "^4.0.0", - "@google-cloud/promisify": "4.0.0", - "arrify": "^2.0.1", - "big.js": "^6.0.0", - "duplexify": "^4.0.0", - "extend": "^3.0.2", - "is": "^3.3.0", - "stream-events": "^1.0.5", - "uuid": "^9.0.0" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@google-cloud/bigquery/node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, - "node_modules/@google-cloud/common": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/@google-cloud/common/-/common-5.0.2.tgz", - "integrity": "sha512-V7bmBKYQyu0eVG2BFejuUjlBt+zrya6vtsKdY+JxMM/dNntPF41vZ9+LhOshEUH01zOHEqBSvI7Dad7ZS6aUeA==", - "license": "Apache-2.0", - "dependencies": { - "@google-cloud/projectify": "^4.0.0", - "@google-cloud/promisify": "^4.0.0", - "arrify": "^2.0.1", - "duplexify": "^4.1.1", - "extend": "^3.0.2", - "google-auth-library": "^9.0.0", - "html-entities": "^2.5.2", - "retry-request": "^7.0.0", - "teeny-request": "^9.0.0" - }, - "engines": { - "node": ">=14.0.0" - } - }, "node_modules/@google-cloud/firestore": { "version": "8.3.0", "resolved": "https://registry.npmjs.org/@google-cloud/firestore/-/firestore-8.3.0.tgz", @@ -679,15 +620,6 @@ "node": ">=14.0.0" } }, - "node_modules/@google-cloud/precise-date": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@google-cloud/precise-date/-/precise-date-4.0.0.tgz", - "integrity": "sha512-1TUx3KdaU3cN7nfCdNf+UVqA/PSX29Cjcox3fZZBtINlRrXVTmUkQnCKv2MbBUbCopbK4olAT1IHl76uZyCiVA==", - "license": "Apache-2.0", - "engines": { - "node": ">=14.0.0" - } - }, "node_modules/@google-cloud/projectify": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/@google-cloud/projectify/-/projectify-4.0.0.tgz", @@ -763,18 +695,6 @@ "node": ">=6" } }, - "node_modules/@hono/node-server": { - "version": "1.19.11", - "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.11.tgz", - "integrity": "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==", - "license": "MIT", - "engines": { - "node": ">=18.14.1" - }, - "peerDependencies": { - "hono": "^4" - } - }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -1219,63 +1139,6 @@ "url": "https://opencollective.com/js-sdsl" } }, - "node_modules/@modelcontextprotocol/sdk": { - "version": "1.27.1", - "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.27.1.tgz", - "integrity": "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA==", - "license": "MIT", - "dependencies": { - "@hono/node-server": "^1.19.9", - "ajv": "^8.17.1", - "ajv-formats": "^3.0.1", - "content-type": "^1.0.5", - "cors": "^2.8.5", - "cross-spawn": "^7.0.5", - "eventsource": "^3.0.2", - "eventsource-parser": "^3.0.0", - "express": "^5.2.1", - "express-rate-limit": "^8.2.1", - "hono": "^4.11.4", - "jose": "^6.1.3", - "json-schema-typed": "^8.0.2", - "pkce-challenge": "^5.0.0", - "raw-body": "^3.0.0", - "zod": "^3.25 || ^4.0", - "zod-to-json-schema": "^3.25.1" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@cfworker/json-schema": "^4.1.1", - "zod": "^3.25 || ^4.0" - }, - "peerDependenciesMeta": { - "@cfworker/json-schema": { - "optional": true - }, - "zod": { - "optional": false - } - } - }, - "node_modules/@modelcontextprotocol/sdk/node_modules/ajv-formats": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", - "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", - "license": "MIT", - "dependencies": { - "ajv": "^8.0.0" - }, - "peerDependencies": { - "ajv": "^8.0.0" - }, - "peerDependenciesMeta": { - "ajv": { - "optional": true - } - } - }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.12", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", @@ -2285,19 +2148,6 @@ "baseline-browser-mapping": "dist/cli.js" } }, - "node_modules/big.js": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/big.js/-/big.js-6.2.2.tgz", - "integrity": "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ==", - "license": "MIT", - "engines": { - "node": "*" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/bigjs" - } - }, "node_modules/bignumber.js": { "version": "9.3.1", "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", @@ -2360,7 +2210,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -2753,23 +2602,6 @@ "dev": true, "license": "MIT" }, - "node_modules/cors": { - "version": "2.8.6", - "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", - "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", - "license": "MIT", - "dependencies": { - "object-assign": "^4", - "vary": "^1" - }, - "engines": { - "node": ">= 0.10" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -3094,27 +2926,6 @@ "node": ">=6" } }, - "node_modules/eventsource": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", - "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", - "license": "MIT", - "dependencies": { - "eventsource-parser": "^3.0.1" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/eventsource-parser": { - "version": "3.0.6", - "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", - "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", - "license": "MIT", - "engines": { - "node": ">=18.0.0" - } - }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", @@ -3217,24 +3028,6 @@ "url": "https://opencollective.com/express" } }, - "node_modules/express-rate-limit": { - "version": "8.3.1", - "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz", - "integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==", - "license": "MIT", - "dependencies": { - "ip-address": "10.1.0" - }, - "engines": { - "node": ">= 16" - }, - "funding": { - "url": "https://github.com/sponsors/express-rate-limit" - }, - "peerDependencies": { - "express": ">= 4.11" - } - }, "node_modules/express/node_modules/mime-db": { "version": "1.54.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", @@ -3965,16 +3758,6 @@ "node": ">= 0.4" } }, - "node_modules/hono": { - "version": "4.12.7", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz", - "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==", - "license": "MIT", - "peer": true, - "engines": { - "node": ">=16.9.0" - } - }, "node_modules/hosted-git-info": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-7.0.2.tgz", @@ -4161,15 +3944,6 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "license": "ISC" }, - "node_modules/ip-address": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", - "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", - "license": "MIT", - "engines": { - "node": ">= 12" - } - }, "node_modules/ipaddr.js": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", @@ -4179,15 +3953,6 @@ "node": ">= 0.10" } }, - "node_modules/is": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/is/-/is-3.3.2.tgz", - "integrity": "sha512-a2xr4E3s1PjDS8ORcGgXpWx6V+liNs+O3JRD2mb9aeugD7rtkkZ0zgLdYgw0tWsKhsdiezGYptSiMlVazCBTuQ==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, "node_modules/is-arguments": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.2.0.tgz", @@ -5022,15 +4787,6 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, - "node_modules/jose": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.1.tgz", - "integrity": "sha512-jUaKr1yrbfaImV7R2TN/b3IcZzsw38/chqMpo2XJ7i2F8AfM/lA4G1goC3JVEwg0H7UldTmSt3P68nt31W7/mw==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -5086,12 +4842,6 @@ "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", "license": "MIT" }, - "node_modules/json-schema-typed": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", - "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==", - "license": "BSD-2-Clause" - }, "node_modules/json5": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", @@ -5456,15 +5206,6 @@ "node": ">=8" } }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/object-hash": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", @@ -5701,15 +5442,6 @@ "node": ">= 6" } }, - "node_modules/pkce-challenge": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", - "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", - "license": "MIT", - "engines": { - "node": ">=16.20.0" - } - }, "node_modules/pkg-dir": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz", @@ -7351,25 +7083,6 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } - }, - "node_modules/zod": { - "version": "3.25.76", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", - "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", - "license": "MIT", - "peer": true, - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } - }, - "node_modules/zod-to-json-schema": { - "version": "3.25.1", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", - "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", - "license": "ISC", - "peerDependencies": { - "zod": "^3.25 || ^4" - } } } } diff --git a/src/package.json b/src/package.json index 82a2529..ad4fc68 100644 --- a/src/package.json +++ b/src/package.json @@ -8,19 +8,16 @@ "node": ">=22.0.0" }, "scripts": { - "function": "DATABASE=tech-report-api-prod functions-framework --target=app", + "start": "DATABASE=tech-report-api-prod functions-framework --target=app", "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js", "test:live": "bash ../test-api.sh", "build": "docker build -t report-api .", - "docker": "docker run -p 8080:8080 report-api" + "run": "docker run -p 8080:8080 report-api" }, "dependencies": { - "@google-cloud/bigquery": "^7.9.1", "@google-cloud/firestore": "8.3.0", "@google-cloud/functions-framework": "^5.0.2", - "@google-cloud/storage": "7.19.0", - "@modelcontextprotocol/sdk": "^1.0.0", - "zod": "^3.0.0" + "@google-cloud/storage": "7.19.0" }, "devDependencies": { "@jest/transform": "^30.3.0", diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index dde4d5e..9414b9f 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -73,14 +73,9 @@ jest.unstable_mockModule('../utils/db.js', () => { collection: jest.fn().mockImplementation((collectionName) => mockQuery) }; - const mockBigQueryInstance = { - query: jest.fn().mockResolvedValue([[]]) - }; - return { firestore: mockFirestoreInstance, - firestoreOld: mockFirestoreInstance, - bigquery: mockBigQueryInstance + firestoreOld: mockFirestoreInstance }; }); @@ -633,7 +628,9 @@ describe('API Routes', () => { it('should reject paths with encoded double dots', async () => { // URL-encoded '..' = %2e%2e - mockFileExists.mockResolvedValue([false]); // Will be checked after validation + const notFoundError = new Error('Not Found'); + notFoundError.code = 404; + mockGetMetadata.mockRejectedValue(notFoundError); // Will be checked after validation const res = await request(app) .get('/v1/static/reports/%2e%2e/secret/passwd'); @@ -645,7 +642,9 @@ describe('API Routes', () => { describe('Non-existent files (404 handling)', () => { it('should return 404 for non-existent files', async () => { - mockFileExists.mockResolvedValue([false]); + const notFoundError = new Error('Not Found'); + notFoundError.code = 404; + mockGetMetadata.mockRejectedValue(notFoundError); const res = await request(app) .get('/v1/static/reports/nonexistent.json') @@ -725,7 +724,8 @@ describe('API Routes', () => { describe('Error scenarios (GCS failures)', () => { it('should handle GCS exists() failure', async () => { - mockFileExists.mockRejectedValue(new Error('GCS connection failed')); + // This test represents GCS failures (like network error), mockGetMetadata will throw an error without a 404 code + mockGetMetadata.mockRejectedValue(new Error('GCS connection failed')); const res = await request(app) .get('/v1/static/reports/data.json') @@ -736,7 +736,6 @@ describe('API Routes', () => { }); it('should handle GCS getMetadata() failure', async () => { - mockFileExists.mockResolvedValue([true]); mockGetMetadata.mockRejectedValue(new Error('Metadata retrieval failed')); const res = await request(app) @@ -840,63 +839,4 @@ describe('API Routes', () => { }); }); }); - - describe('GET /v1/cwv-distribution', () => { - it('should return 400 when technology is missing', async () => { - const res = await request(app).get('/v1/cwv-distribution?date=2026-02-01'); - expect(res.statusCode).toEqual(400); - expect(res.body).toHaveProperty('errors'); - }); - - it('should return 400 when date is missing', async () => { - const res = await request(app).get('/v1/cwv-distribution?technology=Wix'); - expect(res.statusCode).toEqual(400); - expect(res.body).toHaveProperty('errors'); - }); - - it('should return 400 when both technology and date are missing', async () => { - const res = await request(app).get('/v1/cwv-distribution'); - expect(res.statusCode).toEqual(400); - expect(res.body).toHaveProperty('errors'); - }); - - it('should return 200 with valid technology and date', async () => { - const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01'); - expect(res.statusCode).toEqual(200); - expect(Array.isArray(res.body)).toBe(true); - }); - - it('should return 200 with multiple technologies', async () => { - const res = await request(app).get('/v1/cwv-distribution?technology=Wix,WordPress&date=2026-02-01'); - expect(res.statusCode).toEqual(200); - expect(Array.isArray(res.body)).toBe(true); - }); - - it('should return 200 with rank filter applied', async () => { - const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01&rank=10000'); - expect(res.statusCode).toEqual(200); - expect(Array.isArray(res.body)).toBe(true); - }); - - it('should return 200 with geo filter applied', async () => { - const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01&geo=United%20States%20of%20America'); - expect(res.statusCode).toEqual(200); - expect(Array.isArray(res.body)).toBe(true); - }); - - it('should return 200 with geo=ALL (default behavior)', async () => { - const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01&geo=ALL'); - expect(res.statusCode).toEqual(200); - expect(Array.isArray(res.body)).toBe(true); - }); - - it('should handle CORS preflight requests', async () => { - const res = await request(app) - .options('/v1/cwv-distribution') - .set('Origin', 'http://example.com') - .set('Access-Control-Request-Method', 'GET'); - expect(res.statusCode).toEqual(204); - expect(res.headers['access-control-allow-origin']).toEqual('*'); - }); - }); }); diff --git a/src/utils/controllerHelpers.js b/src/utils/controllerHelpers.js index 3ff005e..58583d6 100644 --- a/src/utils/controllerHelpers.js +++ b/src/utils/controllerHelpers.js @@ -104,15 +104,10 @@ const generateETag = (jsonData) => { return crypto.createHash('md5').update(jsonData).digest('hex'); }; -const sendJSONResponse = (req, res, data, statusCode = 200) => { +const sendJSONResponse = (res, data, statusCode = 200) => { const jsonData = JSON.stringify(data); const etag = generateETag(jsonData); res.setHeader('ETag', `"${etag}"`); - if (!isModified(req, etag)) { - res.statusCode = 304; - res.end(); - return; - } res.statusCode = statusCode; res.end(jsonData); }; @@ -150,7 +145,16 @@ const executeQuery = async (req, res, collection, queryBuilder, dataProcessor = } // Send response with ETag support - sendJSONResponse(req, res, data); + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } + res.statusCode = 200; + res.end(jsonData); } catch (error) { // Handle validation errors specifically diff --git a/src/utils/db.js b/src/utils/db.js index 0e6e5ae..41eed29 100644 --- a/src/utils/db.js +++ b/src/utils/db.js @@ -1,5 +1,4 @@ import { Firestore } from '@google-cloud/firestore'; -import { BigQuery } from '@google-cloud/bigquery'; // Initialize Firestore with basic optimizations (default connection using env variables) const firestore = new Firestore({ @@ -31,10 +30,3 @@ const firestoreOld = new Firestore({ // Export both connections - maintain backward compatibility export { firestore, firestoreOld }; - -// Initialize BigQuery client -const bigquery = new BigQuery({ - projectId: process.env.PROJECT -}); - -export { bigquery }; diff --git a/src/utils/reportService.js b/src/utils/reportService.js deleted file mode 100644 index d7960a8..0000000 --- a/src/utils/reportService.js +++ /dev/null @@ -1,323 +0,0 @@ -import { firestore, firestoreOld, bigquery } from './db.js'; -import { convertToArray } from './helpers.js'; -import { - getLatestDate, - validateArrayParameter, - validateTechnologyArray, - FIRESTORE_IN_LIMIT, -} from './controllerHelpers.js'; - -const REPORT_CONFIGS = { - adoption: { table: 'adoption', dataField: 'adoption' }, - pageWeight: { table: 'page_weight', dataField: 'pageWeight' }, - lighthouse: { table: 'lighthouse', dataField: 'lighthouse' }, - cwv: { table: 'core_web_vitals', dataField: 'vitals' }, - audits: { table: 'audits', dataField: 'audits' }, -}; - -export const queryTechnologies = async (params = {}) => { - const isOnlyNames = 'onlyname' in params; - const hasCustomFields = params.fields && !isOnlyNames; - - let query = firestore.collection('technologies'); - - const technologyParam = params.technology || 'ALL'; - const technologies = technologyParam !== 'ALL' ? validateTechnologyArray(technologyParam) : []; - - if (technologies.length > 0) { - if (technologyParam !== 'ALL' && validateTechnologyArray(technologyParam) === null) { - const err = new Error(`Too many technologies specified. Maximum ${FIRESTORE_IN_LIMIT} allowed.`); - err.statusCode = 400; - throw err; - } - query = query.where('technology', 'in', technologies); - } - - if (params.category) { - const categories = validateArrayParameter(params.category, 'category'); - if (categories.length > 0) { - query = query.where('category_obj', 'array-contains-any', categories); - } - } - - if (params.sort === 'name') { - query = query.orderBy('technology', 'asc'); - } else { - query = query.orderBy('origins.mobile', 'desc'); - } - - if (isOnlyNames) { - query = query.select('technology'); - } else if (hasCustomFields) { - const requestedFields = params.fields.split(',').map(f => f.trim()); - query = query.select(...requestedFields); - } else { - query = query.select('technology', 'category', 'description', 'icon', 'origins'); - } - - if (params.limit) { - query = query.limit(parseInt(params.limit, 10)); - } - - const snapshot = await query.get(); - const data = []; - snapshot.forEach(doc => data.push(doc.data())); - - if (isOnlyNames) { - return data.map(item => item.technology); - } - return data; -}; - -export const queryCategories = async (params = {}) => { - const isOnlyNames = 'onlyname' in params; - const hasCustomFields = params.fields && !isOnlyNames; - - let query = firestore.collection('categories'); - - const categoryParam = params.category || 'ALL'; - - if (categoryParam !== 'ALL') { - const categories = validateArrayParameter(categoryParam, 'category'); - if (categories.length > 0) { - query = query.where('category', 'in', categories); - } - } - - if (params.sort === 'name') { - query = query.orderBy('category', 'asc'); - } else { - query = query.orderBy('origins.mobile', 'desc'); - } - - if (isOnlyNames) { - query = query.select('category'); - } else if (hasCustomFields) { - const requestedFields = params.fields.split(',').map(f => f.trim()); - query = query.select(...requestedFields); - } - - if (params.limit) { - query = query.limit(parseInt(params.limit, 10)); - } - - const snapshot = await query.get(); - const data = []; - snapshot.forEach(doc => data.push(doc.data())); - - if (isOnlyNames) { - return data.map(item => item.category); - } - return data; -}; - -export const queryReport = async (reportType, params = {}) => { - const config = REPORT_CONFIGS[reportType]; - if (!config) throw new Error(`Unknown report type: ${reportType}`); - - const db = firestoreOld; - const crossGeo = params.crossGeo || false; - const technologyParam = params.technology || 'ALL'; - const geoParam = params.geo || 'ALL'; - const rankParam = params.rank || 'ALL'; - - const techArray = validateArrayParameter(technologyParam, 'technology'); - - let query = db.collection(config.table); - query = query.where('rank', '==', rankParam); - query = query.where('technology', 'in', techArray); - - if (crossGeo) { - // Cross-geo: single-month snapshot, all geographies included. - // Use 'end' param if provided, otherwise default to latest available date. - const snapshotDate = params.end || await getLatestDate(db, config.table); - query = query.where('date', '==', snapshotDate); - query = query.select('date', 'technology', 'geo', config.dataField); - } else { - // Normal time-series: filter by geo, apply date range, no geo in projection. - query = query.where('geo', '==', geoParam); - - let startDate = params.start; - if (startDate === 'latest') { - startDate = await getLatestDate(db, config.table); - } - - if (startDate) query = query.where('date', '>=', startDate); - if (params.end) query = query.where('date', '<=', params.end); - - query = query.select('date', 'technology', config.dataField); - } - - const snapshot = await query.get(); - const data = []; - snapshot.forEach(doc => data.push(doc.data())); - - return data; -}; - -export const queryCWVDistribution = async ({ technology, date, geo = 'ALL', rank = null }) => { - const allTechnologies = !technology || technology === 'ALL'; - const technologies = allTechnologies ? [] : convertToArray(technology); - const techClause = allTechnologies ? '' : 'AND t.technology IN UNNEST(@technologies)'; - const rankParam = (rank !== null && rank !== 'ALL') ? parseInt(rank, 10) : null; - const rankClause = rankParam !== null ? 'AND rank <= @rank' : ''; - - const query = `WITH pages AS ( - SELECT - client, - t.technology AS technology, - root_page - FROM - httparchive.crawl.pages, - UNNEST(technologies) AS t - WHERE - date = @date - ${techClause} - ${rankClause} - ), metrics AS ( - SELECT - 'ALL' AS geo, - client, - technology, - root_page, - ANY_VALUE(p75_lcp) AS lcp, - ANY_VALUE(p75_inp) AS inp, - ANY_VALUE(p75_cls) AS cls, - ANY_VALUE(p75_fcp) AS fcp, - ANY_VALUE(p75_ttfb) AS ttfb - FROM pages AS p, - \`chrome-ux-report.materialized.device_summary\` d - WHERE - d.date = @date AND - root_page = origin || '/' AND - IF(device = 'desktop', 'desktop', 'mobile') = client AND - @geo = 'ALL' - GROUP BY - client, - technology, - root_page - - UNION ALL - - SELECT - \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo, - client, - technology, - root_page, - ANY_VALUE(p75_lcp) AS lcp, - ANY_VALUE(p75_inp) AS inp, - ANY_VALUE(p75_cls) AS cls, - ANY_VALUE(p75_fcp) AS fcp, - ANY_VALUE(p75_ttfb) AS ttfb - FROM pages AS p, - \`chrome-ux-report.materialized.country_summary\` c - WHERE - yyyymm = CAST(FORMAT_DATE('%Y%m', @date) AS INT64) AND - root_page = origin || '/' AND - IF(device = 'desktop', 'desktop', 'mobile') = client AND - \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) = @geo - GROUP BY - geo, - client, - technology, - root_page -) - -SELECT - geo, - client, - technology, - bucket AS loading_bucket, - bucket / 4 AS inp_bucket, - bucket / 2000 AS cls_bucket, - COUNT(DISTINCT root_page WHERE lcp = bucket) AS lcp_origins, - COUNT(DISTINCT root_page WHERE inp = bucket / 4) AS inp_origins, - COUNT(DISTINCT root_page WHERE cls = bucket / 2000) AS cls_origins, - COUNT(DISTINCT root_page WHERE fcp = bucket) AS fcp_origins, - COUNT(DISTINCT root_page WHERE ttfb = bucket) AS ttfb_origins -FROM - metrics, - UNNEST(GENERATE_ARRAY(0.0, 10000.0, 100.0)) AS bucket -GROUP BY - geo, - client, - technology, - bucket -ORDER BY - geo, - client, - technology, - bucket`; - - const [rows] = await bigquery.query({ - query, - params: { - ...(!allTechnologies && { technologies }), - date, - geo, - ...(rankParam !== null && { rank: rankParam }), - }, - types: { - ...(!allTechnologies && { technologies: ['STRING'] }), - date: 'STRING', - geo: 'STRING', - ...(rankParam !== null && { rank: 'INT64' }), - }, - labels: { source: 'cwv-distribution' }, - }); - - return rows; -}; - -export const queryRanks = async () => { - const snapshot = await firestore - .collection('ranks') - .orderBy('mobile_origins', 'desc') - .select('rank') - .get(); - const data = []; - snapshot.forEach(doc => data.push(doc.data())); - return data; -}; - -export const queryGeos = async () => { - const snapshot = await firestore - .collection('geos') - .orderBy('mobile_origins', 'desc') - .select('geo') - .get(); - const data = []; - snapshot.forEach(doc => data.push(doc.data())); - return data; -}; - -export const queryVersions = async (params = {}) => { - let query = firestore.collection('versions'); - - const technologyParam = params.technology || 'ALL'; - if (technologyParam !== 'ALL') { - const technologies = validateTechnologyArray(technologyParam); - if (technologies === null) { - const err = new Error(`Too many technologies specified. Maximum ${FIRESTORE_IN_LIMIT} allowed.`); - err.statusCode = 400; - throw err; - } - if (technologies.length > 0) { - query = query.where('technology', 'in', technologies); - } - } - - if (params.version) { - query = query.where('version', '==', params.version); - } - - if (params.fields) { - const requestedFields = params.fields.split(',').map(f => f.trim()); - query = query.select(...requestedFields); - } - - const snapshot = await query.get(); - const data = []; - snapshot.forEach(doc => data.push(doc.data())); - return data; -}; diff --git a/terraform/main.tf b/terraform/main.tf index f894709..81afba9 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -28,8 +28,6 @@ module "endpoints" { service_name = "report-api" region = var.region min_instances = var.environment == "prod" ? 1 : 0 - ingress_settings = var.environment == "prod" ? "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" : "INGRESS_TRAFFIC_ALL" - environment_variables = { "PROJECT" = var.project "DATABASE" = "${var.project_database}prod" // TODO: Update this to use ${var.environment} diff --git a/terraform/run-service/variables.tf b/terraform/run-service/variables.tf index 997b132..d5f77a0 100644 --- a/terraform/run-service/variables.tf +++ b/terraform/run-service/variables.tf @@ -29,17 +29,17 @@ variable "available_memory" { description = "The amount of memory for the Cloud Function" } variable "available_cpu" { - default = "2" + default = "1" type = string description = "The amount of CPU for the Cloud Function" } variable "ingress_settings" { type = string - default = "INGRESS_TRAFFIC_ALL" + default = "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" description = "String value that controls what traffic can reach the function. Check ingress documentation to see the impact of each settings value. Changes to this field will recreate the cloud function." } variable "timeout" { - default = "120s" + default = "60s" type = string description = "Timeout for the service. Default value is 60 seconds. Cannot be more than 540 seconds." } @@ -56,7 +56,7 @@ variable "min_instances" { variable "max_instance_request_concurrency" { description = "(Optional) The limit on the maximum number of requests that an instance can handle simultaneously. This can be used to control costs when scaling. Defaults to 1." type = number - default = 100 + default = 80 } variable "environment_variables" { description = "environment_variables" diff --git a/test-api.sh b/test-api.sh index a1bebd4..7809094 100755 --- a/test-api.sh +++ b/test-api.sh @@ -195,41 +195,4 @@ test_filter "/v1/geo-breakdown" "?technology=WordPress" \ "all(.[]; has(\"geo\")) and length > 0" \ "Geo breakdown response includes geo field" -# Test cwv-distribution endpoint -test_cors_preflight "/v1/cwv-distribution" -test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" -test_endpoint "/v1/cwv-distribution" "?technology=Wix,WordPress&date=2026-02-01" -test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&rank=10000" -test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&geo=ALL" -test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&geo=United%20States%20of%20America" - -# Test cwv-distribution filter correspondences -test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ - "all(.[]; .technology == \"Wix\") and length > 0" \ - "CWV distribution single technology (Wix)" - -test_filter "/v1/cwv-distribution" "?technology=Wix,WordPress&date=2026-02-01" \ - "all(.[]; .technology == \"Wix\" or .technology == \"WordPress\") and length > 0" \ - "CWV distribution multiple technologies (Wix, WordPress)" - -test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ - "all(.[]; has(\"loading_bucket\") and has(\"lcp_origins\") and has(\"inp_origins\") and has(\"cls_origins\")) and length > 0" \ - "CWV distribution response includes histogram bucket fields" - -test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ - "[.[].client] | unique | sort == [\"desktop\", \"mobile\"]" \ - "CWV distribution returns both desktop and mobile clients" - -test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ - "all(.[]; has(\"geo\")) and length > 0" \ - "CWV distribution response includes geo field" - -test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ - "all(.[]; .geo == \"ALL\") and length > 0" \ - "CWV distribution defaults to geo=ALL" - -test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&geo=United%20States%20of%20America" \ - "all(.[]; .geo == \"United States of America\") and length > 0" \ - "CWV distribution filters by specific geo (United States of America)" - echo "API tests complete! All endpoints returned 200 and data corresponds to filters."