import { createWorker } from 'tesseract.js';
import { ExtractorError } from '../errors';

let worker: any | null = null; // Replace Tesseract.Worker with any for compatibility

async function getWorker(): Promise<any> {
  if (!worker) {
    worker = createWorker();

    await worker.load();
    await worker.loadLanguage('eng');
    await worker.initialize('eng');

    // Optional: Set parameters or logger
    await worker.setParameters({
      tessedit_pageseg_mode: 1, // Automatic page segmentation with OSD
      tessedit_char_whitelist: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,!?@#$%&*()+-=:;"\'', // Limit recognized characters
    });

    if (process.env.NODE_ENV === 'development') {
      worker.setParameters({
        logger: (m: string) => console.log(m), // Development logger
      });
    }
  }
  return worker;
}

export async function extractTextFromImage(file: File): Promise<string> {
  try {
    const currentWorker = await getWorker();

    // Convert file to data URL
    const dataUrl = await new Promise<string>((resolve, reject) => {
      const reader = new FileReader();
      reader.onload = () => resolve(reader.result as string);
      reader.onerror = reject;
      reader.readAsDataURL(file);
    });

    const result = await currentWorker.recognize(dataUrl);
    const text = result.data.text.trim();

    if (!text) {
      throw new ExtractorError('No readable text found in the image');
    }

    return text;
  } catch (error) {
    console.error('OCR error:', error);

    if (error instanceof ExtractorError) {
      throw error;
    }

    throw new ExtractorError(
      'Failed to extract text from image. Please try a clearer image or a different format.'
    );
  }
}

export async function cleanup(): Promise<void> {
  if (worker) {
    await worker.terminate();
    worker = null;
  }
}
