package org.jpedal.examples.text;

import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.xml.xmp.XmpWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.Iterator;
import java.util.Vector;
import org.bouncycastle.i18n.TextBundle;
import org.jpedal.PdfDecoder;
import org.jpedal.exception.PdfException;
import org.jpedal.grouping.PdfGroupingAlgorithms;
import org.jpedal.objects.PdfPageData;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.Strip;

/* loaded from: input_file:lib/jpedalSTD.jar:org/jpedal/examples/text/TextAsWordlistInBackground.class */
public class TextAsWordlistInBackground {
    private int wordsExtracted;
    private String user_dir;
    String separator;
    PdfDecoder decodePdf;
    private String outputDir;
    private boolean isFile;
    private byte[] byteArray;
    static boolean outputMessages = false;
    private static String testFile = "/mnt/shared/sample.pdf";

    public TextAsWordlistInBackground() {
        this.wordsExtracted = 0;
        this.user_dir = System.getProperty("user.dir");
        this.separator = System.getProperty("file.separator");
        this.decodePdf = null;
        this.outputDir = PdfObject.NOTHING;
        this.isFile = true;
        this.byteArray = null;
    }

    public TextAsWordlistInBackground(String str) {
        this.wordsExtracted = 0;
        this.user_dir = System.getProperty("user.dir");
        this.separator = System.getProperty("file.separator");
        this.decodePdf = null;
        this.outputDir = PdfObject.NOTHING;
        this.isFile = true;
        this.byteArray = null;
        if (outputMessages) {
            System.out.println(new StringBuffer().append("processing ").append(str).toString());
        }
        if (!this.user_dir.endsWith(this.separator)) {
            this.user_dir = new StringBuffer().append(this.user_dir).append(this.separator).toString();
        }
        if (str.toLowerCase().endsWith(".pdf")) {
            decodeFile(str);
            return;
        }
        String[] strArr = null;
        str = str.endsWith(this.separator) ? str : new StringBuffer().append(str).append(this.separator).toString();
        try {
            File file = new File(str);
            if (!file.isDirectory()) {
                System.err.println(new StringBuffer().append(str).append(" is not a directory. Exiting program").toString());
            }
            strArr = file.list();
        } catch (Exception e) {
            LogWriter.writeLog(new StringBuffer().append("Exception trying to access file ").append(e.getMessage()).toString());
        }
        long length = strArr.length;
        for (int i = 0; i < length; i++) {
            if (strArr[i].toLowerCase().endsWith(".pdf")) {
                if (outputMessages) {
                    System.out.println(new StringBuffer().append(str).append(strArr[i]).toString());
                }
                decodeFile(new StringBuffer().append(str).append(strArr[i]).toString());
            }
        }
    }

    public TextAsWordlistInBackground(byte[] bArr) {
        this.wordsExtracted = 0;
        this.user_dir = System.getProperty("user.dir");
        this.separator = System.getProperty("file.separator");
        this.decodePdf = null;
        this.outputDir = PdfObject.NOTHING;
        this.isFile = true;
        this.byteArray = null;
        if (outputMessages) {
            System.out.println("processing byte array");
        }
        if (!this.user_dir.endsWith(this.separator)) {
            this.user_dir = new StringBuffer().append(this.user_dir).append(this.separator).toString();
        }
        this.byteArray = bArr;
        this.isFile = false;
        decodeFile("byteArray");
    }

    private void decodeFile(String str) {
        int lastIndexOf = str.lastIndexOf(this.separator);
        this.outputDir = new StringBuffer().append(this.user_dir).append(TextBundle.TEXT_ENTRY).append(this.separator).append(lastIndexOf != -1 ? str.substring(lastIndexOf + 1, str.length() - 4) : "demo").append(this.separator).toString();
        try {
            this.decodePdf = new PdfDecoder(false);
            this.decodePdf.setExtractionMode(1);
            this.decodePdf.init(true);
            if (outputMessages) {
                System.out.println(new StringBuffer().append("Opening file :").append(str).toString());
            }
            if (this.isFile) {
                this.decodePdf.openPdfFile(str);
            } else {
                this.decodePdf.openPdfArray(this.byteArray);
            }
        } catch (Exception e) {
            System.err.println(new StringBuffer().append("Exception ").append(e).append(" in pdf code ").append(str).toString());
        }
        if (!this.decodePdf.isEncrypted() || this.decodePdf.isExtractionAllowed()) {
            int pageCount = this.decodePdf.getPageCount();
            for (int i = 1; i < pageCount + 1; i++) {
                try {
                    this.decodePdf.decodePageInBackground(i);
                    PdfGroupingAlgorithms pdfGroupingAlgorithms = new PdfGroupingAlgorithms(this.decodePdf.getPdfBackgroundData());
                    PdfPageData pdfBackgroundPageData = this.decodePdf.getPdfBackgroundPageData();
                    int mediaBoxX = pdfBackgroundPageData.getMediaBoxX(i);
                    int mediaBoxWidth = pdfBackgroundPageData.getMediaBoxWidth(i) + mediaBoxX;
                    int mediaBoxY = pdfBackgroundPageData.getMediaBoxY(i);
                    int mediaBoxHeight = pdfBackgroundPageData.getMediaBoxHeight(i) + mediaBoxY;
                    if (outputMessages) {
                        System.out.println(new StringBuffer().append("Page ").append(i).append(" Extracting text from rectangle (").append(mediaBoxX).append(",").append(mediaBoxHeight).append(" ").append(mediaBoxWidth).append(",").append(mediaBoxY).append(")").toString());
                    }
                    Vector vector = null;
                    try {
                        vector = pdfGroupingAlgorithms.extractTextAsWordlist(mediaBoxX, mediaBoxHeight, mediaBoxWidth, mediaBoxY, i, false, true, "!.,\"\"''");
                    } catch (PdfException e2) {
                        this.decodePdf.closePdfFile();
                        System.err.println(new StringBuffer().append("Exception= ").append(e2).append(" in ").append(str).toString());
                    }
                    if (vector != null) {
                        File file = new File(this.outputDir);
                        if (!file.exists()) {
                            file.mkdirs();
                        }
                        int size = vector.size() / 5;
                        this.wordsExtracted += size;
                        if (outputMessages) {
                            System.out.println(new StringBuffer().append("Page contains ").append(size).append(" words.").toString());
                        }
                        if (outputMessages) {
                            System.out.println(new StringBuffer().append("Writing to ").append(this.outputDir).append("words-").append(i).append(".txt").toString());
                        }
                        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new StringBuffer().append(this.outputDir).append("words-").append(i).append(".txt").toString()), XmpWriter.UTF8);
                        Iterator it = vector.iterator();
                        while (it.hasNext()) {
                            String convertToText = Strip.convertToText((String) it.next());
                            int parseFloat = (int) Float.parseFloat((String) it.next());
                            outputStreamWriter.write(new StringBuffer().append(convertToText).append(",").append(parseFloat).append(",").append((int) Float.parseFloat((String) it.next())).append(",").append((int) Float.parseFloat((String) it.next())).append(",").append((int) Float.parseFloat((String) it.next())).append("\n").toString());
                        }
                        outputStreamWriter.close();
                    } else if (outputMessages) {
                        System.out.println("No text found");
                    }
                    this.decodePdf.flushObjectValues(false);
                } catch (Exception e3) {
                    this.decodePdf.closePdfFile();
                    System.err.println(new StringBuffer().append("Exception ").append(e3).append(" in ").append(str).toString());
                }
            }
            this.decodePdf.flushObjectValues(true);
            if (outputMessages) {
                System.out.println("Text read");
            }
        } else if (outputMessages) {
            System.out.println("Encrypted settings");
            System.out.println("Please look at SimpleViewer for code sample to handle such files");
        }
        this.decodePdf.closePdfFile();
        this.decodePdf = null;
    }

    public static void main(String[] strArr) {
        if (outputMessages) {
            System.out.println("Simple demo to extract text objects using background calls");
        }
        String str = testFile;
        if (strArr.length != 0) {
            str = strArr[0];
            if (outputMessages) {
                System.out.println(new StringBuffer().append("File :").append(str).toString());
            }
        } else if (outputMessages) {
            System.out.println("Default test file used");
        }
        if (!new File(str).exists()) {
            System.out.println(new StringBuffer().append("File ").append(str).append(" not found").toString());
        }
        new TextAsWordlistInBackground(str);
    }

    public int getWordsExtractedCount() {
        return this.wordsExtracted;
    }
}
