|
| 1 | +import { createLogger } from '@sim/logger' |
| 2 | +import { type NextRequest, NextResponse } from 'next/server' |
| 3 | +import { z } from 'zod' |
| 4 | +import { checkInternalAuth } from '@/lib/auth/hybrid' |
| 5 | +import { |
| 6 | + secureFetchWithPinnedIP, |
| 7 | + validateUrlWithDNS, |
| 8 | +} from '@/lib/core/security/input-validation.server' |
| 9 | +import { generateRequestId } from '@/lib/core/utils/request' |
| 10 | +import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas' |
| 11 | +import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils' |
| 12 | +import { resolveFileInputToUrl } from '@/lib/uploads/utils/file-utils.server' |
| 13 | + |
| 14 | +export const dynamic = 'force-dynamic' |
| 15 | + |
| 16 | +const logger = createLogger('ExtendParseAPI') |
| 17 | + |
| 18 | +const ExtendParseSchema = z.object({ |
| 19 | + apiKey: z.string().min(1, 'API key is required'), |
| 20 | + filePath: z.string().optional(), |
| 21 | + file: RawFileInputSchema.optional(), |
| 22 | + outputFormat: z.enum(['markdown', 'spatial']).optional(), |
| 23 | + chunking: z.enum(['page', 'document', 'section']).optional(), |
| 24 | + engine: z.enum(['parse_performance', 'parse_light']).optional(), |
| 25 | +}) |
| 26 | + |
| 27 | +export async function POST(request: NextRequest) { |
| 28 | + const requestId = generateRequestId() |
| 29 | + |
| 30 | + try { |
| 31 | + const authResult = await checkInternalAuth(request, { requireWorkflowId: false }) |
| 32 | + |
| 33 | + if (!authResult.success || !authResult.userId) { |
| 34 | + logger.warn(`[${requestId}] Unauthorized Extend parse attempt`, { |
| 35 | + error: authResult.error || 'Missing userId', |
| 36 | + }) |
| 37 | + return NextResponse.json( |
| 38 | + { |
| 39 | + success: false, |
| 40 | + error: authResult.error || 'Unauthorized', |
| 41 | + }, |
| 42 | + { status: 401 } |
| 43 | + ) |
| 44 | + } |
| 45 | + |
| 46 | + const userId = authResult.userId |
| 47 | + const body = await request.json() |
| 48 | + const validatedData = ExtendParseSchema.parse(body) |
| 49 | + |
| 50 | + logger.info(`[${requestId}] Extend parse request`, { |
| 51 | + fileName: validatedData.file?.name, |
| 52 | + filePath: validatedData.filePath, |
| 53 | + isWorkspaceFile: validatedData.filePath ? isInternalFileUrl(validatedData.filePath) : false, |
| 54 | + userId, |
| 55 | + }) |
| 56 | + |
| 57 | + const resolution = await resolveFileInputToUrl({ |
| 58 | + file: validatedData.file, |
| 59 | + filePath: validatedData.filePath, |
| 60 | + userId, |
| 61 | + requestId, |
| 62 | + logger, |
| 63 | + }) |
| 64 | + |
| 65 | + if (resolution.error) { |
| 66 | + return NextResponse.json( |
| 67 | + { success: false, error: resolution.error.message }, |
| 68 | + { status: resolution.error.status } |
| 69 | + ) |
| 70 | + } |
| 71 | + |
| 72 | + const fileUrl = resolution.fileUrl |
| 73 | + if (!fileUrl) { |
| 74 | + return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 }) |
| 75 | + } |
| 76 | + |
| 77 | + const extendBody: Record<string, unknown> = { |
| 78 | + file: { fileUrl }, |
| 79 | + } |
| 80 | + |
| 81 | + const config: Record<string, unknown> = {} |
| 82 | + |
| 83 | + if (validatedData.outputFormat) { |
| 84 | + config.target = validatedData.outputFormat |
| 85 | + } |
| 86 | + |
| 87 | + if (validatedData.chunking) { |
| 88 | + config.chunkingStrategy = { type: validatedData.chunking } |
| 89 | + } |
| 90 | + |
| 91 | + if (validatedData.engine) { |
| 92 | + config.engine = validatedData.engine |
| 93 | + } |
| 94 | + |
| 95 | + if (Object.keys(config).length > 0) { |
| 96 | + extendBody.config = config |
| 97 | + } |
| 98 | + |
| 99 | + const extendEndpoint = 'https://api.extend.ai/parse' |
| 100 | + const extendValidation = await validateUrlWithDNS(extendEndpoint, 'Extend API URL') |
| 101 | + if (!extendValidation.isValid) { |
| 102 | + logger.error(`[${requestId}] Extend API URL validation failed`, { |
| 103 | + error: extendValidation.error, |
| 104 | + }) |
| 105 | + return NextResponse.json( |
| 106 | + { |
| 107 | + success: false, |
| 108 | + error: 'Failed to reach Extend API', |
| 109 | + }, |
| 110 | + { status: 502 } |
| 111 | + ) |
| 112 | + } |
| 113 | + |
| 114 | + const extendResponse = await secureFetchWithPinnedIP( |
| 115 | + extendEndpoint, |
| 116 | + extendValidation.resolvedIP!, |
| 117 | + { |
| 118 | + method: 'POST', |
| 119 | + headers: { |
| 120 | + 'Content-Type': 'application/json', |
| 121 | + Accept: 'application/json', |
| 122 | + Authorization: `Bearer ${validatedData.apiKey}`, |
| 123 | + 'x-extend-api-version': '2025-04-21', |
| 124 | + }, |
| 125 | + body: JSON.stringify(extendBody), |
| 126 | + } |
| 127 | + ) |
| 128 | + |
| 129 | + if (!extendResponse.ok) { |
| 130 | + const errorText = await extendResponse.text() |
| 131 | + logger.error(`[${requestId}] Extend API error:`, errorText) |
| 132 | + let clientError = `Extend API error: ${extendResponse.statusText || extendResponse.status}` |
| 133 | + try { |
| 134 | + const parsedError = JSON.parse(errorText) |
| 135 | + if (parsedError?.message || parsedError?.error) { |
| 136 | + clientError = (parsedError.message ?? parsedError.error) as string |
| 137 | + } |
| 138 | + } catch { |
| 139 | + // errorText is not JSON; keep generic message |
| 140 | + } |
| 141 | + return NextResponse.json( |
| 142 | + { |
| 143 | + success: false, |
| 144 | + error: clientError, |
| 145 | + }, |
| 146 | + { status: extendResponse.status } |
| 147 | + ) |
| 148 | + } |
| 149 | + |
| 150 | + const extendData = (await extendResponse.json()) as Record<string, unknown> |
| 151 | + |
| 152 | + logger.info(`[${requestId}] Extend parse successful`) |
| 153 | + |
| 154 | + return NextResponse.json({ |
| 155 | + success: true, |
| 156 | + output: { |
| 157 | + id: extendData.id ?? null, |
| 158 | + status: extendData.status ?? 'PROCESSED', |
| 159 | + chunks: extendData.chunks ?? [], |
| 160 | + blocks: extendData.blocks ?? [], |
| 161 | + pageCount: extendData.pageCount ?? extendData.page_count ?? null, |
| 162 | + creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null, |
| 163 | + }, |
| 164 | + }) |
| 165 | + } catch (error) { |
| 166 | + if (error instanceof z.ZodError) { |
| 167 | + logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors }) |
| 168 | + return NextResponse.json( |
| 169 | + { |
| 170 | + success: false, |
| 171 | + error: 'Invalid request data', |
| 172 | + details: error.errors, |
| 173 | + }, |
| 174 | + { status: 400 } |
| 175 | + ) |
| 176 | + } |
| 177 | + |
| 178 | + logger.error(`[${requestId}] Error in Extend parse:`, error) |
| 179 | + |
| 180 | + return NextResponse.json( |
| 181 | + { |
| 182 | + success: false, |
| 183 | + error: error instanceof Error ? error.message : 'Internal server error', |
| 184 | + }, |
| 185 | + { status: 500 } |
| 186 | + ) |
| 187 | + } |
| 188 | +} |
0 commit comments