import Tesseract from "tesseract.js";
import { FabricImage } from "fabric"
import * as pdfjsLib from "pdfjs-dist"
import { GlobalWorkerOptions } from 'pdfjs-dist/build/pdf';

GlobalWorkerOptions.workerSrc = `https://unpkg.com/browse/pdfjs-dist@${pdfjsLib.version}/build/pdf.worker.min.mjs`;

const extractImagesFromPDF = async (file) => {
    const arrayBuffer = await file.arrayBuffer();
    const pdf = await pdfjsLib.getDocument(arrayBuffer).promise;
    const numPages = pdf.numPages;
    const images = [];

    for (let pageNum = 1; pageNum <= numPages; pageNum++) {
        const page = await pdf.getPage(pageNum);
        const viewport = page.getViewport({ scale: 1.0 });
        
        const canvas = document.createElement('canvas');
        const context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;
        
        await page.render({ canvasContext: context, viewport: viewport }).promise;
        
        // Vérifiez si la page contient une image significative
        if (hasSignificantImage(canvas)) {
        images.push(canvas.toDataURL());
        }
    }

    return images;
};

// Fonction pour vérifier si une page contient une image significative
const hasSignificantImage = (canvas) => {
    const context = canvas.getContext('2d');
    const imageData = context.getImageData(0, 0, canvas.width, canvas.height);
    const data = imageData.data;
    
    let nonWhitePixels = 0;
    for (let i = 0; i < data.length; i += 4) {
        if (data[i] < 250 || data[i+1] < 250 || data[i+2] < 250) {
        nonWhitePixels++;
        }
    }
    
    // Si plus de 5% des pixels ne sont pas blancs, considérez-le comme une image significative
    return (nonWhitePixels / (data.length / 4)) > 0.05;
};

const loadImage = (imageData) => {console.log('load image called')
    return new Promise((resolve, reject) => {
        const imageUrl = imageData instanceof Blob ? URL.createObjectURL(imageData) : imageData;
        console.log('image url:', imageUrl)
        FabricImage.fromURL(imageUrl, (img, err) => {
            console.log('image loaded: ', Boolean(img), err);
            if (err) {
                console.error('Error loading image:', err);
                reject(err);
            } else if (img) {
                resolve(img);
            } else {
                reject(new Error('Failed to load image for unknown reason'));
            }
        }, { crossOrigin: 'anonymous' });
    });
};

const convertToGrayscale = (image) => {
    image.filters.push(new FabricImage.filters.Grayscale());console.log('convert to gray scale filters:', image.filters?.length)
    image.applyFilters();
    return image;
};

const adjustContrast = (image, contrast = 50) => {
    image.filters.push(new FabricImage.filters.Contrast({contrast: contrast}));
    image.applyFilters();
    return image;
};

const adjustBrightness = (image, brightness = 0.1) => {
    image.filters.push(new FabricImage.filters.Brightness({brightness: brightness}));
    image.applyFilters();
    return image;
};

const reduceNoise = (image, blur = 0.1) => {
    image.filters.push(new FabricImage.filters.Blur({blur: blur}));
    image.applyFilters();
    return image;
};

const preprocessImage = async (file) => {console.log('preprocess image');

    // const image = await loadImage(file);
    loadImage(file)
    .then(image => {
        console.log('Image chargée avec succès:', image);
        // Utilisez l'image ici
        convertToGrayscale(image);
        adjustContrast(image);
        adjustBrightness(image);
        reduceNoise(image);
        console.log('image traitée');
        
        // Convertir l'image traitée en données URL pour l'OCR
        const canvas = document.createElement('canvas');
        canvas.width = image.width;
        canvas.height = image.height;
        image.render(canvas); console.log('image converti en url data');
        
        return canvas.toDataURL();
    })
    .catch(error => {
        console.error('Erreur lors du chargement de l\'image:', error);
    });
    
};

const extractTextFromImage = async (imageFile, lang) => {console.log('extract text from image');

    const preprocessedImage = await preprocessImage(imageFile);
    const result = await Tesseract.recognize(preprocessedImage, lang); console.log('text recognized in image');
    
    return result.data.text;
  };

const extractInfo = (text) => {
    const nomRegex = /Nom\s*:\s*([^\n]+)/i;
    const prenomRegex = /Prénom\s*:\s*([^\n]+)/i;
    const dateNaissanceRegex = /Né\(e\) le\s*:\s*(\d{2}\/\d{2}\/\d{4})/i;
  
    const nom = text.match(nomRegex)?.[1];
    const prenom = text.match(prenomRegex)?.[1];
    const dateNaissance = text.match(dateNaissanceRegex)?.[1];
  
    return { nom, prenom, dateNaissance };
};

const processImage = async (imageFile, lang) => { console.log('process image');

    const result = await extractTextFromImage(imageFile, lang);
    const extractedInfo = extractInfo(result); console.log('info extracted');
    
    return extractedInfo;
};

export default async function processDocument(file, lang) {
    console.log('process file: ', file.type);
    
    if (file.type === 'application/pdf') {
        const images = await extractImagesFromPDF(file);
        let text = ''
        for (const imageData of images) {
            const result = await extractTextFromImage(imageData, lang);
            text = text.concat('\n'+result)
        }
        const extractedInfo = extractInfo(text);
        return extractedInfo;
    } else {
        const imageData = URL.createObjectURL(file);
        const extractedInfo = await processImage(imageData, lang); console.log('image process finish');
        
        return extractedInfo
    }
}