import { FileProcessingError } from '../errors/FileProcessingError';
import * as pdfjsLib from 'pdfjs-dist';

pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.js`;

export async function parsePDF(file: File): Promise<string> {
  try {
    const arrayBuffer = await file.arrayBuffer();
    const pdf = await pdfjsLib.getDocument({
      data: arrayBuffer,
      useWorkerFetch: false,
      isEvalSupported: false
    }).promise;

    const textContent = await extractTextFromPDF(pdf);
    return textContent;
  } catch (error) {
    console.error('PDF parsing error:', error);
    throw new FileProcessingError('Failed to process PDF file');
  }
}

async function extractTextFromPDF(pdf: pdfjsLib.PDFDocumentProxy): Promise<string> {
  const numPages = pdf.numPages;
  const pageTexts: string[] = [];

  for (let i = 1; i <= numPages; i++) {
    const page = await pdf.getPage(i);
    const content = await page.getTextContent();
    const text = content.items
      .map((item: any) => item.str)
      .join(' ');
    pageTexts.push(`Page ${i}:\n${text}`);
  }

  return pageTexts.join('\n\n');
}