feat: implement OCR fallback for PDF text extraction and enhance background UI with animated elements.

This commit is contained in:
2025-12-10 01:10:10 -05:00
parent 00656c40cc
commit f4aa45e527
6 changed files with 7633 additions and 1 deletions

7541
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -21,6 +21,7 @@
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-tabs": "^1.1.13",
"@t3-oss/env-nextjs": "^0.12.0",
"canvas": "^3.2.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.556.0",
@@ -30,6 +31,7 @@
"react-dom": "19.2.1",
"tailwind-merge": "^3.4.0",
"tailwindcss-animate": "^1.0.7",
"tesseract.js": "^6.0.1",
"zod": "^3.24.2"
},
"devDependencies": {

View File

@@ -5,7 +5,15 @@ export default function HomePage() {
return (
<main className="relative min-h-screen w-full bg-background selection:bg-primary/10">
{/* Background Pattern */}
<div className="absolute inset-0 -z-10 h-full w-full bg-white dark:bg-black bg-[radial-gradient(#e5e7eb_1px,transparent_1px)] [background-size:16px_16px] dark:bg-[radial-gradient(#ffffff33_1px,transparent_1px)]" />
{/* Background Pattern */}
<div className="fixed inset-0 -z-10 h-full w-full bg-white dark:bg-neutral-950 overflow-hidden">
<div className="absolute inset-0 bg-[linear-gradient(to_right,#80808012_1px,transparent_1px),linear-gradient(to_bottom,#80808012_1px,transparent_1px)] bg-[size:24px_24px]"></div>
{/* Animated Blobs */}
<div className="absolute top-0 left-1/4 w-72 h-72 bg-purple-300 dark:bg-purple-600 rounded-full mix-blend-multiply dark:mix-blend-screen filter blur-xl opacity-70 dark:opacity-40 animate-blob"></div>
<div className="absolute top-0 right-1/4 w-72 h-72 bg-yellow-300 dark:bg-yellow-600 rounded-full mix-blend-multiply dark:mix-blend-screen filter blur-xl opacity-70 dark:opacity-40 animate-blob animation-delay-2000"></div>
<div className="absolute -bottom-8 left-1/3 w-72 h-72 bg-pink-300 dark:bg-pink-600 rounded-full mix-blend-multiply dark:mix-blend-screen filter blur-xl opacity-70 dark:opacity-40 animate-blob animation-delay-4000"></div>
</div>
<Navbar />

31
src/lib/ocr.ts Normal file
View File

@@ -0,0 +1,31 @@
import { type PDFPageProxy } from 'pdfjs-dist';
import { createWorker } from 'tesseract.js';
import { createCanvas } from 'canvas';
export async function performOcrOnPage(page: PDFPageProxy): Promise<string> {
try {
const viewport = page.getViewport({ scale: 2.0 }); // Scale up for better OCR accuracy
const canvas = createCanvas(viewport.width, viewport.height);
const context = canvas.getContext('2d');
// Render PDF page to canvas
await page.render({
canvasContext: context as any, // Type mismatch between node-canvas and DOM canvas
viewport: viewport,
}).promise;
// Convert canvas to image buffer
const buffer = canvas.toBuffer('image/png');
// Perform OCR
const worker = await createWorker('eng');
const ret = await worker.recognize(buffer);
const text = ret.data.text;
await worker.terminate();
return text;
} catch (error) {
console.error('OCR failed for page:', error);
return '';
}
}

View File

@@ -18,6 +18,10 @@ interface TextItem {
hasEOL?: boolean;
}
import { performOcrOnPage } from './ocr';
// ... (existing imports and setup)
export async function extractTextFromPdf(buffer: Buffer): Promise<string> {
const data = new Uint8Array(buffer);
@@ -43,6 +47,20 @@ export async function extractTextFromPdf(buffer: Buffer): Promise<string> {
// Filter empty items
const contentItems = items.filter(item => item.str.trim().length > 0);
// OCR Fallback: If page has very little text, try OCR
if (contentItems.length < 5) { // Threshold: fewer than 5 text items
// Check total character count too, just in case
const totalChars = contentItems.reduce((acc, item) => acc + item.str.length, 0);
if (totalChars < 50) {
console.log(`Page ${i} seems to be an image/scanned. Attempting OCR...`);
const ocrText = await performOcrOnPage(page);
if (ocrText.trim().length > 0) {
fullText += `\n\n${ocrText}\n\n`;
continue;
}
}
}
if (contentItems.length === 0) continue;
for (const item of contentItems) {

View File

@@ -124,4 +124,36 @@
body {
@apply bg-background text-foreground;
}
}
@layer utilities {
.animate-blob {
animation: blob 7s infinite;
}
.animation-delay-2000 {
animation-delay: 2s;
}
.animation-delay-4000 {
animation-delay: 4s;
}
}
@keyframes blob {
0% {
transform: translate(0px, 0px) scale(1);
}
33% {
transform: translate(30px, -50px) scale(1.1);
}
66% {
transform: translate(-20px, 20px) scale(0.9);
}
100% {
transform: translate(0px, 0px) scale(1);
}
}