Przeglądaj źródła

feat: add watchdog timeout, admin reprocess-all button, and fix Dockerfile DNS

- Worker: add STUCK_TIMEOUT_MS watchdog (2 min default) that runs in poll loop
  when queue is empty — resets PROCESSING jobs stale > threshold to PENDING
- API: add POST /api/assets/admin/reprocess-all (admin-only) to manually
  force-reset all stuck jobs from the UI
- Frontend: add "Force Reprocess All" button in TranscodeTasksPanel
  visible only to ADMIN members, shows stuck count badge
- Dockerfile.api: write /etc/resolv.conf with 1.1.1.1/8.8.8.8 before apt-get
  to resolve mirror timeouts; copy worker JS to src/worker (not dist/workers)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
kingkong 1 miesiąc temu
rodzic
commit
8477e27892

+ 8 - 2
Dockerfile.api

@@ -1,6 +1,12 @@
 FROM node:22-slim
 
 # Install FFmpeg + wget (for thumbnail generation and healthcheck)
+# DNS: 1.1.1.1 for fast DNS resolution; fallback to security.debian.org mirror
+RUN cat > /etc/resolv.conf <<EOF
+nameserver 1.1.1.1
+nameserver 8.8.8.8
+nameserver 8.26.56.26
+EOF
 RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg wget ca-certificates && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
@@ -17,8 +23,8 @@ COPY packages/api/src ./src
 # Build TypeScript
 RUN npx tsc -p tsconfig.json
 
-# Copy the workers JS into dist (NOT compiled — pure JS, runs in forked child)
-COPY packages/api/src/workers ./dist/workers
+# Copy the worker JS files (pure JS, not compiled — run in forked child)
+COPY packages/api/src/worker ./src/worker
 
 EXPOSE 3001
 

+ 26 - 0
packages/api/src/routes/assets.ts

@@ -511,4 +511,30 @@ router.delete('/:id', async (req: Request, res: Response) => {
   }
 });
 
+// POST /api/assets/admin/reprocess-all — admin-only: reset all PROCESSING jobs to PENDING
+router.post('/admin/reprocess-all', async (req: Request, res: Response) => {
+  try {
+    if (req.user!.globalRole !== 'ADMIN') {
+      res.status(403).json({ error: 'Admin access required' });
+      return;
+    }
+    const stuck = await prisma.asset.findMany({
+      where: { transcodeStatus: 'PROCESSING', transcodePaused: false },
+      select: { id: true },
+    });
+    if (stuck.length === 0) {
+      res.json({ message: 'No stuck jobs found', count: 0 });
+      return;
+    }
+    await prisma.asset.updateMany({
+      where: { id: { in: stuck.map(s => s.id) } },
+      data: { transcodeStatus: 'PENDING', transcodeProgress: 0 },
+    });
+    res.json({ message: `Reset ${stuck.length} stuck job(s) to PENDING`, count: stuck.length });
+  } catch (err) {
+    console.error('Reprocess-all error:', err);
+    res.status(500).json({ error: 'Internal server error' });
+  }
+});
+
 export default router;

+ 25 - 1
packages/api/src/worker/index.js

@@ -31,6 +31,7 @@ const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '2000', 10);
 const BACKOFF_MS = parseInt(process.env.BACKOFF_MS || '1000', 10); // idle sleep between polls
 const WORKER_CONCURRENCY = parseInt(process.env.WORKER_CONCURRENCY || '4', 10);
 const ENCODER = process.env.ENCODER || 'libx264'; // libx264 | h264_nvenc | h264_qsv | h264_videotoolbox
+const STUCK_TIMEOUT_MS = parseInt(process.env.STUCK_TIMEOUT_MS || '120000', 10); // 2 min default
 
 // ─── Master: fork N workers and manage them ────────────────────────────────
 if (cluster.isMaster) {
@@ -418,6 +419,28 @@ async function processJob(asset) {
   }
 }
 
+/** ── Watchdog: re-reset jobs that have been PROCESSING too long without progress ── */
+async function resetStuckJobs() {
+  const cutoff = new Date(Date.now() - STUCK_TIMEOUT_MS);
+  try {
+    const stuck = await prisma.asset.findMany({
+      where: {
+        transcodeStatus: 'PROCESSING',
+        transcodePaused: false,
+        updatedAt: { lt: cutoff },
+      },
+      select: { id: true },
+    });
+    if (stuck.length > 0) {
+      await prisma.asset.updateMany({
+        where: { id: { in: stuck.map(s => s.id) } },
+        data: { transcodeStatus: 'PENDING', transcodeProgress: 0 },
+      });
+      log('watchdog_reset', { count: stuck.length, cutoffSeconds: STUCK_TIMEOUT_MS / 1000 });
+    }
+  } catch {}
+}
+
 /** ── Claim one job (atomic, skip locked) ─────────────────────────────── */
 async function claimOneJob() {
   const result = await prisma.$executeRaw`
@@ -449,7 +472,8 @@ async function poll() {
   try {
     const claimed = await claimOneJob();
     if (!claimed) {
-      // No job — sleep then poll again with backoff (max 5s)
+      // No job — check for stuck jobs and sleep with backoff
+      await resetStuckJobs();
       await sleep(BACKOFF_MS);
       return poll();
     }

+ 18 - 0
src/app/(dashboard)/projects/[projectId]/page.tsx

@@ -187,6 +187,7 @@ export default function ProjectDetailPage() {
   // Copy link
   const [copiedInviteId, setCopiedInviteId] = useState<string | null>(null);
   const [inviteUrlMap, setInviteUrlMap] = useState<Record<string, string>>({});
+  const [reprocessingAll, setReprocessingAll] = useState(false);
 
   const canManage = members.some(m =>
     m.user.id === user?.id && ['ADMIN', 'EDITOR'].includes(m.role)
@@ -955,6 +956,7 @@ export default function ProjectDetailPage() {
               assets={assets}
               token={token}
               canManage={canManage}
+              isAdmin={isAdmin}
               onDelete={handleDeleteAsset}
               onCancel={async (id) => {
                 if (!token) return;
@@ -980,6 +982,22 @@ export default function ProjectDetailPage() {
                   setAssets(prev => prev.map(a => a.id === id ? { ...a, transcodeStatus: 'PENDING', transcodeProgress: 0, transcodeError: null, hlsPath: null, transcodePaused: false } : a));
                 } catch (err) { alert(err instanceof Error ? err.message : 'Failed to reprocess transcode'); }
               }}
+              onReprocessAll={async () => {
+                if (!token) return;
+                setReprocessingAll(true);
+                try {
+                  const result = await assetsApi.reprocessAll(token);
+                  // Reset all PROCESSING assets in local state
+                  setAssets(prev => prev.map(a =>
+                    a.transcodeStatus === 'PROCESSING'
+                      ? { ...a, transcodeStatus: 'PENDING', transcodeProgress: 0 }
+                      : a
+                  ));
+                  alert(result.message);
+                } catch (err) { alert(err instanceof Error ? err.message : 'Failed to reset stuck jobs'); }
+                finally { setReprocessingAll(false); }
+              }}
+              isReprocessingAll={reprocessingAll}
             />
           </div>
         )}

+ 36 - 1
src/components/transcode/TranscodeTasksPanel.tsx

@@ -7,11 +7,14 @@ interface Props {
   assets: Asset[];
   token: string | null;
   canManage: boolean;
+  isAdmin: boolean;
   onDelete: (id: string, title: string) => void;
   onCancel: (id: string) => void;
   onPause: (id: string) => void;
   onResume: (id: string) => void;
   onReprocess: (id: string) => void;
+  onReprocessAll: () => void;
+  isReprocessingAll: boolean;
 }
 
 const STATUS_CONFIG: Record<TranscodeStatus, {
@@ -246,7 +249,7 @@ function TranscodeTaskRow({
   );
 }
 
-export function TranscodeTasksPanel({ assets, canManage, onDelete, onCancel, onPause, onResume, onReprocess }: Props) {
+export function TranscodeTasksPanel({ assets, canManage, isAdmin, onDelete, onCancel, onPause, onResume, onReprocess, onReprocessAll, isReprocessingAll }: Props) {
   const [filter, setFilter] = useState<'all' | 'processing' | 'completed' | 'failed'>('all');
 
   const filtered = assets.filter(a => {
@@ -260,6 +263,7 @@ export function TranscodeTasksPanel({ assets, canManage, onDelete, onCancel, onP
   const processingCount = assets.filter(a => ['PENDING', 'UPLOADING', 'PROCESSING'].includes(a.transcodeStatus)).length;
   const completedCount = assets.filter(a => a.transcodeStatus === 'COMPLETED').length;
   const failedCount = assets.filter(a => ['FAILED', 'UNSUPPORTED_CODEC'].includes(a.transcodeStatus)).length;
+  const stuckCount = assets.filter(a => a.transcodeStatus === 'PROCESSING').length;
 
   return (
     <div>
@@ -290,6 +294,37 @@ export function TranscodeTasksPanel({ assets, canManage, onDelete, onCancel, onP
         ))}
       </div>
 
+      {/* Admin: Force Reprocess All */}
+      {isAdmin && (
+        <div className="flex justify-end mb-4">
+          <button
+            onClick={onReprocessAll}
+            disabled={isReprocessingAll || stuckCount === 0}
+            className="flex items-center gap-2 px-3 py-1.5 rounded-lg text-xs font-medium transition-all disabled:opacity-40"
+            style={{
+              background: 'rgba(251,146,60,0.10)',
+              color: '#FB923C',
+              border: '1px solid rgba(251,146,60,0.20)',
+            }}
+            title="Reset all stuck PROCESSING jobs to PENDING"
+          >
+            {isReprocessingAll ? (
+              <div className="w-3.5 h-3.5 border-2 border-current border-t-transparent rounded-full animate-spin" />
+            ) : (
+              <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
+                <path strokeLinecap="round" strokeLinejoin="round" d="M19.5 12c0-1.232-.046-2.453-.138-3.662a4.006 4.006 0 00-3.7-3.7 48.678 48.678 0 00-7.324 0 4.006 4.006 0 00-3.7 3.7c-.017.22-.032.441-.046.662M19.5 12l3-3m-3 3l-3-3m-12 3c0 1.232.046 2.453.138 3.662a4.006 4.006 0 003.7 3.7 48.656 48.656 0 007.324 0 4.006 4.006 0 003.7-3.7c.017-.22.032-.441.046-.662M4.5 12l3 3m-3-3l-3 3" />
+              </svg>
+            )}
+            <span>{isReprocessingAll ? 'Resetting…' : 'Force Reprocess All'}</span>
+            {stuckCount > 0 && !isReprocessingAll && (
+              <span className="text-[10px] px-1.5 py-0.5 rounded-full" style={{ background: 'rgba(251,146,60,0.20)' }}>
+                {stuckCount} stuck
+              </span>
+            )}
+          </button>
+        </div>
+      )}
+
       {/* Task list */}
       {filtered.length === 0 ? (
         <div className="card p-16 text-center">

+ 3 - 0
src/lib/api.ts

@@ -154,6 +154,9 @@ export const assetsApi = {
 
   resumeTranscode: (token: string, id: string) =>
     apiFetch<{ asset: Asset }>(`/api/assets/${id}/transcode/resume`, { method: 'POST', token }),
+
+  reprocessAll: (token: string) =>
+    apiFetch<{ message: string; count: number }>(`/api/assets/admin/reprocess-all`, { method: 'POST', token }),
 };
 
 // ── Comments ─────────────────────────────────────────────────────────────────