|
@@ -41,6 +41,29 @@ if (cluster.isMaster) {
|
|
|
|
|
|
|
|
fs.mkdirSync(UPLOAD_DIR, { recursive: true });
|
|
fs.mkdirSync(UPLOAD_DIR, { recursive: true });
|
|
|
|
|
|
|
|
|
|
+ // Recovery runs ONCE in master before any workers start, so all stale
|
|
|
|
|
+ // PROCESSING jobs are atomically reset to PENDING before workers poll.
|
|
|
|
|
+ // This prevents the multi-worker race where every worker independently
|
|
|
|
|
+ // sees stale jobs and tries to claim/recover them simultaneously.
|
|
|
|
|
+ const masterPrisma = new PrismaClient({ datasources: { db: { url: process.env.DATABASE_URL } } });
|
|
|
|
|
+ (async () => {
|
|
|
|
|
+ try {
|
|
|
|
|
+ const stale = await masterPrisma.asset.findMany({
|
|
|
|
|
+ where: { transcodeStatus: 'PROCESSING', transcodePaused: false },
|
|
|
|
|
+ select: { id: true },
|
|
|
|
|
+ });
|
|
|
|
|
+ if (stale.length > 0) {
|
|
|
|
|
+ await masterPrisma.asset.updateMany({
|
|
|
|
|
+ where: { id: { in: stale.map(s => s.id) } },
|
|
|
|
|
+ data: { transcodeStatus: 'PENDING', transcodeProgress: 0 },
|
|
|
|
|
+ });
|
|
|
|
|
+ console.log(`[master] Reset ${stale.length} stale job(s) to PENDING`);
|
|
|
|
|
+ }
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ await masterPrisma.$disconnect();
|
|
|
|
|
+ }
|
|
|
|
|
+ })();
|
|
|
|
|
+
|
|
|
cluster.on('exit', (worker, code, signal) => {
|
|
cluster.on('exit', (worker, code, signal) => {
|
|
|
console.log(`[master] Worker ${worker.id} exited (${code || signal}), restarting...`);
|
|
console.log(`[master] Worker ${worker.id} exited (${code || signal}), restarting...`);
|
|
|
setTimeout(() => cluster.fork(), 2000);
|
|
setTimeout(() => cluster.fork(), 2000);
|
|
@@ -421,21 +444,6 @@ async function claimOneJob() {
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/** ── Recover stale jobs on startup ────────────────────────────────────── */
|
|
|
|
|
-async function recoverStaleJobs() {
|
|
|
|
|
- const stale = await prisma.asset.findMany({
|
|
|
|
|
- where: { transcodeStatus: 'PROCESSING', transcodePaused: false },
|
|
|
|
|
- select: { id: true },
|
|
|
|
|
- });
|
|
|
|
|
- if (stale.length > 0) {
|
|
|
|
|
- console.log(`[worker:${WORKER_ID}] Recovering ${stale.length} stale job(s)...`);
|
|
|
|
|
- await prisma.asset.updateMany({
|
|
|
|
|
- where: { id: { in: stale.map(s => s.id) } },
|
|
|
|
|
- data: { transcodeStatus: 'PENDING', transcodeProgress: 0 },
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
/** ── Poll loop ────────────────────────────────────────────────────────── */
|
|
/** ── Poll loop ────────────────────────────────────────────────────────── */
|
|
|
async function poll() {
|
|
async function poll() {
|
|
|
try {
|
|
try {
|
|
@@ -460,7 +468,6 @@ async function main() {
|
|
|
// Don't run the worker entry point in master — it only forks children
|
|
// Don't run the worker entry point in master — it only forks children
|
|
|
if (cluster.isMaster) return;
|
|
if (cluster.isMaster) return;
|
|
|
console.log(`[worker:${WORKER_ID}] Started, ENCODER=${ENCODER}`);
|
|
console.log(`[worker:${WORKER_ID}] Started, ENCODER=${ENCODER}`);
|
|
|
- await recoverStaleJobs();
|
|
|
|
|
// Start polling immediately — tight recursive loop with backoff when idle
|
|
// Start polling immediately — tight recursive loop with backoff when idle
|
|
|
poll().catch(err => console.error(`[worker:${WORKER_ID}] Fatal poll error:`, err.message));
|
|
poll().catch(err => console.error(`[worker:${WORKER_ID}] Fatal poll error:`, err.message));
|
|
|
log('ready', {});
|
|
log('ready', {});
|