Skip to content

Commit b172b28

Browse files
authored
fix(x2a): Handeling deletions of jobs (#2348)
* Deletion fixes: k8s job delete -> error on UI, delete k8s jobs before delete on DB * revert change on Pending state
1 parent 0742b22 commit b172b28

5 files changed

Lines changed: 103 additions & 5 deletions

File tree

workspaces/x2a/plugins/x2a-backend/src/router/common.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,20 @@ async function reconcileJobStatus(
124124
return job;
125125
}
126126

127+
deps.logger.info(
128+
`Reconciling job ${job.id} (k8s: ${job.k8sJobName}), DB status: '${job.status}'`,
129+
);
127130
const k8sStatus = await deps.kubeService.getJobStatus(job.k8sJobName);
128131

129132
if (k8sStatus.status === 'success' || k8sStatus.status === 'error') {
130-
const log = (await deps.kubeService.getJobLogs(job.k8sJobName)) as string;
133+
let log: string | null = null;
134+
try {
135+
log = (await deps.kubeService.getJobLogs(job.k8sJobName)) as string;
136+
} catch {
137+
deps.logger.warn(
138+
`Could not fetch logs for job ${job.id} (k8s job: ${job.k8sJobName})`,
139+
);
140+
}
131141
const updated = await deps.x2aDatabase.updateJob({
132142
id: job.id,
133143
status: k8sStatus.status,

workspaces/x2a/plugins/x2a-backend/src/router/modules.ts

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,29 @@ import express from 'express';
1919
import { randomUUID } from 'node:crypto';
2020
import { InputError, NotFoundError } from '@backstage/errors';
2121

22+
import type { Module } from '@red-hat-developer-hub/backstage-plugin-x2a-common';
23+
2224
import type { RouterDeps } from './types';
2325
import { getUserRef, reconcileJobStatus } from './common';
26+
import { calculateModuleStatus } from '../services/X2ADatabaseService/status';
27+
28+
/**
29+
* Reconcile any pending/running phase jobs on a module against K8s state.
30+
* Mutates the module in-place and returns it.
31+
*/
32+
async function reconcileModuleJobs(
33+
module: Module,
34+
deps: Pick<RouterDeps, 'kubeService' | 'x2aDatabase' | 'logger'>,
35+
): Promise<Module> {
36+
const phases = ['analyze', 'migrate', 'publish'] as const;
37+
for (const phase of phases) {
38+
const job = module[phase];
39+
if (job && ['pending', 'running'].includes(job.status)) {
40+
module[phase] = await reconcileJobStatus(job, deps);
41+
}
42+
}
43+
return module;
44+
}
2445

2546
export function registerModuleRoutes(
2647
router: express.Router,
@@ -49,6 +70,24 @@ export function registerModuleRoutes(
4970
// List modules
5071
const modules = await x2aDatabase.listModules({ projectId });
5172

73+
// Reconcile any pending/running jobs against K8s
74+
await Promise.all(
75+
modules.map(m =>
76+
reconcileModuleJobs(m, { kubeService, x2aDatabase, logger }),
77+
),
78+
);
79+
80+
// Recalculate status for each module after reconciliation
81+
for (const m of modules) {
82+
const { status, errorDetails } = calculateModuleStatus({
83+
analyze: m.analyze,
84+
migrate: m.migrate,
85+
publish: m.publish,
86+
});
87+
m.status = status;
88+
m.errorDetails = errorDetails;
89+
}
90+
5291
res.json(modules);
5392
});
5493

@@ -85,6 +124,18 @@ export function registerModuleRoutes(
85124
);
86125
}
87126

127+
// Reconcile any pending/running jobs against K8s
128+
await reconcileModuleJobs(module, { kubeService, x2aDatabase, logger });
129+
130+
// Recalculate status after reconciliation may have updated phase jobs
131+
const { status, errorDetails } = calculateModuleStatus({
132+
analyze: module.analyze,
133+
migrate: module.migrate,
134+
publish: module.publish,
135+
});
136+
module.status = status;
137+
module.errorDetails = errorDetails;
138+
88139
res.json(module);
89140
});
90141

@@ -293,7 +344,10 @@ export function registerModuleRoutes(
293344
});
294345

295346
// Update job with k8s job name
296-
await x2aDatabase.updateJob({ id: job.id, k8sJobName });
347+
await x2aDatabase.updateJob({
348+
id: job.id,
349+
k8sJobName,
350+
});
297351

298352
logger.info(
299353
`${phase} job created: jobId=${job.id}, moduleId=${moduleId}, k8sJobName=${k8sJobName}`,

workspaces/x2a/plugins/x2a-backend/src/router/projects.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,25 @@ export function registerProjectRoutes(
167167
const endpoint = 'DELETE /projects/:projectId';
168168
const projectId = req.params.projectId;
169169
logger.info(`${endpoint} request received: projectId=${projectId}`);
170+
171+
// Cancel any active k8s jobs before deleting DB records
172+
const jobs = await x2aDatabase.listJobsForProject({ projectId });
173+
const activeJobs = jobs.filter(
174+
job => ['pending', 'running'].includes(job.status) && job.k8sJobName,
175+
);
176+
await Promise.all(
177+
activeJobs.map(job => {
178+
logger.info(
179+
`Cancelling k8s job ${job.k8sJobName} for project ${projectId}`,
180+
);
181+
return kubeService.deleteJob(job.k8sJobName!).catch(err => {
182+
logger.warn(
183+
`Failed to cancel k8s job ${job.k8sJobName}: ${err.message}`,
184+
);
185+
});
186+
}),
187+
);
188+
170189
const deletedCount = await x2aDatabase.deleteProject(
171190
{ projectId },
172191
{
@@ -317,7 +336,10 @@ export function registerProjectRoutes(
317336
});
318337

319338
// Update job with k8s job name
320-
await x2aDatabase.updateJob({ id: job.id, k8sJobName });
339+
await x2aDatabase.updateJob({
340+
id: job.id,
341+
k8sJobName,
342+
});
321343

322344
logger.info(
323345
`Init job created: jobId=${job.id}, k8sJobName=${k8sJobName}`,

workspaces/x2a/plugins/x2a-backend/src/services/KubeService.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,12 @@ export class KubeService {
305305
namespace: this.#namespace,
306306
});
307307

308+
// If the job is being deleted, treat it as an error regardless of status
309+
if (job.metadata?.deletionTimestamp) {
310+
this.#logger.warn(`Job ${k8sJobName} is being deleted`);
311+
return { status: 'error', message: 'Job was deleted' };
312+
}
313+
308314
const jobStatus = job.status;
309315

310316
// Check if job succeeded
@@ -325,7 +331,9 @@ export class KubeService {
325331
// Job exists but hasn't started yet
326332
return { status: 'pending', message: 'Job is pending' };
327333
} catch (error: any) {
328-
if (error.statusCode === 404 || error.code === 404) {
334+
const statusCode =
335+
error.statusCode ?? error.response?.statusCode ?? error.code;
336+
if (statusCode === 404) {
329337
this.#logger.warn(`Job ${k8sJobName} not found`);
330338
return { status: 'error', message: 'Job not found' };
331339
}

workspaces/x2a/plugins/x2a-backend/templates/x2a-job-script.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export GIT_TERMINAL_PROMPT=0
88
ERROR_MESSAGE=""
99
ARTIFACTS=()
1010
PUSH_FAILED=""
11+
TERMINATED=false
1112

1213
# Report job result back to the backend.
1314
# TODO: Incorporate CALLBACK_TOKEN for request signing (HMAC-SHA256).
@@ -92,7 +93,9 @@ Co-Authored-By: ${GIT_AUTHOR_NAME} <${GIT_AUTHOR_EMAIL}>
9293
fi
9394
fi
9495

95-
if [ ${exit_code} -ne 0 ]; then
96+
if [ "$TERMINATED" = true ]; then
97+
report_result "error" "Job was terminated"
98+
elif [ ${exit_code} -ne 0 ]; then
9699
report_result "error" "${ERROR_MESSAGE:-Script failed with exit code ${exit_code}}"
97100
elif [ -n "${PUSH_FAILED}" ]; then
98101
report_result "error" "${PUSH_FAILED}"
@@ -136,6 +139,7 @@ git_clone_repos() {
136139
}
137140

138141
trap cleanup EXIT
142+
trap 'TERMINATED=true' SIGTERM SIGINT
139143

140144
#
141145
# X2A Job Script

0 commit comments

Comments
 (0)