diff --git a/src/main/java/com/mindee/input/LocalInputSource.java b/src/main/java/com/mindee/input/LocalInputSource.java index b5f0169ad..46340ea72 100644 --- a/src/main/java/com/mindee/input/LocalInputSource.java +++ b/src/main/java/com/mindee/input/LocalInputSource.java @@ -1,10 +1,10 @@ package com.mindee.input; import com.mindee.image.ImageCompressor; +import com.mindee.pdf.PDFBoxApi; +import com.mindee.pdf.PDFCompressor; +import com.mindee.pdf.PDFOperation; import com.mindee.pdf.PDFUtils; -import com.mindee.pdf.PdfBoxApi; -import com.mindee.pdf.PdfCompressor; -import com.mindee.pdf.PdfOperation; import com.mindee.pdf.SplitQuery; import java.io.File; import java.io.IOException; @@ -13,16 +13,20 @@ import java.nio.file.Path; import java.util.Base64; import lombok.Getter; +import lombok.Setter; import org.apache.pdfbox.io.IOUtils; /** * A source document for Mindee API operations. */ -@Getter public final class LocalInputSource { + @Getter private byte[] file; + @Getter private final String filename; + @Setter + private PDFOperation pdfOperation; public LocalInputSource(InputStream file, String filename) throws IOException { this.file = IOUtils.toByteArray(file); @@ -55,6 +59,13 @@ public LocalInputSource(String fileAsBase64, String filename) { this.filename = filename; } + public PDFOperation getPdfOperation() { + if (this.pdfOperation == null) { + this.pdfOperation = new PDFBoxApi(); + } + return this.pdfOperation; + } + /** * Get the number of pages in the document. * @@ -76,8 +87,7 @@ public int getPageCount() throws IOException { */ public void applyPageOptions(PageOptions pageOptions) throws IOException { if (pageOptions != null && this.isPdf()) { - PdfOperation pdfOperation = new PdfBoxApi(); - this.file = pdfOperation.split(new SplitQuery(this.file, pageOptions)).getFile(); + this.file = getPdfOperation().split(new SplitQuery(this.file, pageOptions)).getFile(); } } @@ -97,7 +107,7 @@ public void compress( Boolean disableSourceText ) throws IOException { if (isPdf()) { - this.file = PdfCompressor.compressPdf(this.file, quality, forceSourceText, disableSourceText); + this.file = PDFCompressor.compressPdf(this.file, quality, forceSourceText, disableSourceText); } else { this.file = ImageCompressor.compressImage(this.file, quality, maxWidth, maxHeight); } diff --git a/src/main/java/com/mindee/pdf/PDFExtractor.java b/src/main/java/com/mindee/pdf/BasePDFExtractor.java similarity index 53% rename from src/main/java/com/mindee/pdf/PDFExtractor.java rename to src/main/java/com/mindee/pdf/BasePDFExtractor.java index 2b2323499..fb2f62c5c 100644 --- a/src/main/java/com/mindee/pdf/PDFExtractor.java +++ b/src/main/java/com/mindee/pdf/BasePDFExtractor.java @@ -5,14 +5,11 @@ import com.mindee.MindeeException; import com.mindee.input.InputSourceUtils; import com.mindee.input.LocalInputSource; -import com.mindee.v1.product.invoicesplitter.InvoiceSplitterV1InvoicePageGroup; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; -import java.util.stream.Collectors; import javax.imageio.ImageIO; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; @@ -24,19 +21,9 @@ /** * PDF extraction class. */ -public class PDFExtractor { - private final PDDocument sourcePdf; - private final String filename; - - /** - * Init from a path. - * - * @param filePath Path to the file. - * @throws IOException Throws if the file can't be accessed. - */ - public PDFExtractor(String filePath) throws IOException { - this(new LocalInputSource(filePath)); - } +public class BasePDFExtractor { + protected final PDDocument sourcePdf; + protected final String filename; /** * Init from a {@link LocalInputSource}. @@ -44,13 +31,13 @@ public PDFExtractor(String filePath) throws IOException { * @param source The local source. * @throws IOException Throws if the file can't be accessed. */ - public PDFExtractor(LocalInputSource source) throws IOException { + protected BasePDFExtractor(LocalInputSource source) throws IOException { this.filename = source.getFilename(); if (source.isPdf()) { this.sourcePdf = Loader.loadPDF(source.getFile()); } else { - PDDocument document = new PDDocument(); - PDPage page = new PDPage(); + var document = new PDDocument(); + var page = new PDPage(); document.addPage(page); BufferedImage bufferedImage = byteArrayToBufferedImage(source.getFile()); PDImageXObject pdImage = LosslessFactory.createFromImage(document, bufferedImage); @@ -65,7 +52,6 @@ public PDFExtractor(LocalInputSource source) throws IOException { ); } this.sourcePdf = document; - } } @@ -101,7 +87,7 @@ public static BufferedImage byteArrayToBufferedImage(byte[] byteArray) throws IO public List extractSubDocuments( List> pageIndexes ) throws IOException { - List extractedPDFs = new ArrayList<>(); + var extractedPDFs = new ArrayList(); for (List pageIndexElement : pageIndexes) { if (pageIndexElement.isEmpty()) { @@ -126,65 +112,4 @@ public List extractSubDocuments( } return extractedPDFs; } - - /** - * Extract invoices from the given page indexes (from an invoice-splitter prediction). - * - * @param pageIndexes List of page indexes. - * @return a list of extracted files. - * @throws IOException Throws if the file can't be accessed. - */ - public List extractInvoices( - List pageIndexes - ) throws IOException { - - List> indexes = pageIndexes - .stream() - .map(InvoiceSplitterV1InvoicePageGroup::getPageIndexes) - .collect(Collectors.toList()); - - return extractSubDocuments(indexes); - } - - /** - * Extract invoices from the given page indexes (from an invoice-splitter prediction). - * - * @param pageIndexes List of page indexes. - * @param strict Whether the extraction should strictly follow the confidence scores or not. - * @return a list of extracted files. - * @throws IOException Throws if the file can't be accessed. - */ - public List extractInvoices( - List pageIndexes, - boolean strict - ) throws IOException { - List> correctPageIndexes = new ArrayList<>(); - if (!strict) { - return extractInvoices(pageIndexes); - } - Iterator iterator = pageIndexes.iterator(); - List currentList = new ArrayList<>(); - Double previousConfidence = null; - while (iterator.hasNext()) { - InvoiceSplitterV1InvoicePageGroup pageIndex = iterator.next(); - Double confidence = pageIndex.getConfidence(); - List pageList = pageIndex.getPageIndexes(); - - if (confidence == 1.0 && previousConfidence == null) { - currentList = new ArrayList<>(pageList); - } else if (confidence == 1.0) { - correctPageIndexes.add(currentList); - currentList = new ArrayList<>(pageList); - } else if (confidence == 0.0 && !iterator.hasNext()) { - currentList.addAll(pageList); - correctPageIndexes.add(currentList); - } else { - correctPageIndexes.add(currentList); - correctPageIndexes.add(pageList); - } - previousConfidence = confidence; - } - return extractSubDocuments(correctPageIndexes); - } - } diff --git a/src/main/java/com/mindee/pdf/PdfBoxApi.java b/src/main/java/com/mindee/pdf/PDFBoxApi.java similarity index 71% rename from src/main/java/com/mindee/pdf/PdfBoxApi.java rename to src/main/java/com/mindee/pdf/PDFBoxApi.java index aa6fb3798..3d7f28425 100644 --- a/src/main/java/com/mindee/pdf/PdfBoxApi.java +++ b/src/main/java/com/mindee/pdf/PDFBoxApi.java @@ -11,32 +11,30 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; -import java.util.stream.Stream; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; /** * Allows performing various operations on PDFs. */ -public final class PdfBoxApi implements PdfOperation { +public final class PDFBoxApi implements PDFOperation { @Override - public SplitPdf split(SplitQuery splitQuery) throws IOException { + public SplitPDF split(SplitQuery splitQuery) throws IOException { if (!checkPdfOpen(splitQuery.getFile())) { throw new MindeeException("This document cannot be open and cannot be split."); } - try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) { - try (PDDocument splitDocument = new PDDocument()) { + try (var originalDocument = Loader.loadPDF(splitQuery.getFile())) { + try (var splitDocument = new PDDocument()) { int totalOriginalPages = countPages(splitQuery.getFile()); if (totalOriginalPages < splitQuery.getPageOptions().getOnMinPages()) { - return new SplitPdf(splitQuery.getFile(), totalOriginalPages); + return new SplitPDF(splitQuery.getFile(), totalOriginalPages); } - List pageRange = getPageRanges(splitQuery.getPageOptions(), totalOriginalPages); - + var pageRange = getPageRanges(splitQuery.getPageOptions(), totalOriginalPages); pageRange .stream() .filter(i -> i < totalOriginalPages) @@ -45,7 +43,7 @@ public SplitPdf split(SplitQuery splitQuery) throws IOException { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { splitDocument.save(outputStream); byte[] splitPdf = outputStream.toByteArray(); - return new SplitPdf(splitPdf, countPages(splitPdf)); + return new SplitPDF(splitPdf, countPages(splitPdf)); } } } @@ -55,12 +53,12 @@ private List getPageRanges(PageOptions pageOptions, Integer numberOfPag Set pages = Optional .ofNullable(pageOptions.getPageIndexes()) - .map(Collection::stream) - .orElseGet(Stream::empty) + .stream() + .flatMap(Collection::stream) .filter(x -> x > (numberOfPages) * (-1) && x <= (numberOfPages - 1)) .map(x -> (numberOfPages + x) % numberOfPages) .collect(Collectors.toSet()); - List allPages = IntStream.range(0, numberOfPages).boxed().collect(Collectors.toList()); + var allPages = IntStream.range(0, numberOfPages).boxed().collect(Collectors.toList()); switch (pageOptions.getOperation()) { case KEEP_ONLY: @@ -85,9 +83,6 @@ private boolean checkPdfOpen(byte[] documentFile) { } private int countPages(byte[] documentFile) throws IOException { - PDDocument document = Loader.loadPDF(documentFile); - int pageCount = document.getNumberOfPages(); - document.close(); - return pageCount; + return PDFUtils.getNumberOfPages(documentFile); } } diff --git a/src/main/java/com/mindee/pdf/PdfCompressor.java b/src/main/java/com/mindee/pdf/PDFCompressor.java similarity index 86% rename from src/main/java/com/mindee/pdf/PdfCompressor.java rename to src/main/java/com/mindee/pdf/PDFCompressor.java index dee8ea642..b705ea929 100644 --- a/src/main/java/com/mindee/pdf/PdfCompressor.java +++ b/src/main/java/com/mindee/pdf/PDFCompressor.java @@ -11,14 +11,13 @@ import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory; -import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; /** * PDF compression class. */ -public class PdfCompressor { +public class PDFCompressor { public static byte[] compressPdf( byte[] pdfData, Integer imageQuality, @@ -44,10 +43,10 @@ public static byte[] compressPdf( } try (PDDocument inputDoc = Loader.loadPDF(pdfData); PDDocument outputDoc = new PDDocument()) { - PDFRenderer pdfRenderer = new PDFRenderer(inputDoc); + var pdfRenderer = new PDFRenderer(inputDoc); for (int pageIndex = 0; pageIndex < inputDoc.getNumberOfPages(); pageIndex++) { - PDPage originalPage = inputDoc.getPage(pageIndex); + var originalPage = inputDoc.getPage(pageIndex); PDRectangle originalPageSize = originalPage.getMediaBox(); processPage( @@ -92,12 +91,12 @@ private static void processPage( PDRectangle originalPageSize, Boolean disableSourceText ) throws IOException { - PDPage newPage = new PDPage(originalPageSize); + var newPage = new PDPage(originalPageSize); outputDoc.addPage(newPage); - PDImageXObject pdImage = JPEGFactory.createFromImage(outputDoc, image, imageQuality); + var pdImage = JPEGFactory.createFromImage(outputDoc, image, imageQuality); - try (PDPageContentStream contentStream = new PDPageContentStream(outputDoc, newPage)) { + try (var contentStream = new PDPageContentStream(outputDoc, newPage)) { PDFUtils.addImageToPage(contentStream, pdImage, originalPageSize); PDFUtils.extractAndAddText(originalDocument, contentStream, pageIndex, disableSourceText); } diff --git a/src/main/java/com/mindee/pdf/PdfOperation.java b/src/main/java/com/mindee/pdf/PDFOperation.java similarity index 69% rename from src/main/java/com/mindee/pdf/PdfOperation.java rename to src/main/java/com/mindee/pdf/PDFOperation.java index 7d1466c28..514307021 100644 --- a/src/main/java/com/mindee/pdf/PdfOperation.java +++ b/src/main/java/com/mindee/pdf/PDFOperation.java @@ -5,7 +5,7 @@ /** * Minimum PDF operations. */ -public interface PdfOperation { +public interface PDFOperation { /** * Split a PDF file. @@ -13,5 +13,5 @@ public interface PdfOperation { * @param splitQuery Options to perform the query. * @return The split PDF. */ - SplitPdf split(SplitQuery splitQuery) throws IOException; + SplitPDF split(SplitQuery splitQuery) throws IOException; } diff --git a/src/main/java/com/mindee/pdf/PDFUtils.java b/src/main/java/com/mindee/pdf/PDFUtils.java index a0a4cb1fe..760a0246f 100644 --- a/src/main/java/com/mindee/pdf/PDFUtils.java +++ b/src/main/java/com/mindee/pdf/PDFUtils.java @@ -69,8 +69,8 @@ private static byte[] createPdfFromExistingPdf( List pageNumbers, boolean closeOriginal ) throws IOException { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PDDocument newDocument = new PDDocument(); + var outputStream = new ByteArrayOutputStream(); + var newDocument = new PDDocument(); int pageCount = document.getNumberOfPages(); pageNumbers .stream() @@ -161,7 +161,7 @@ public static List pdfToImages(String filePath) throws IOException */ public static List pdfToImages(LocalInputSource source) throws IOException { PDDocument document = Loader.loadPDF(source.getFile()); - PDFRenderer pdfRenderer = new PDFRenderer(document); + var pdfRenderer = new PDFRenderer(document); List pdfPageImages = new ArrayList<>(); for (int i = 0; i < document.getNumberOfPages(); i++) { BufferedImage imageBuffer = pdfPageToImageBuffer(i, document, pdfRenderer); @@ -201,7 +201,7 @@ public static PdfPageImage pdfPageToImage( ) throws IOException { int index = pageNumber - 1; PDDocument document = Loader.loadPDF(source.getFile()); - PDFRenderer pdfRenderer = new PDFRenderer(document); + var pdfRenderer = new PDFRenderer(document); BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer); document.close(); return new PdfPageImage(imageBuffer, index, source.getFilename(), "jpg"); @@ -226,7 +226,7 @@ private static BufferedImage pdfPageToImageBuffer( } public static byte[] documentToBytes(PDDocument document) throws IOException { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + var outputStream = new ByteArrayOutputStream(); document.save(outputStream); return outputStream.toByteArray(); } diff --git a/src/main/java/com/mindee/pdf/PdfPageImage.java b/src/main/java/com/mindee/pdf/PdfPageImage.java index 5ef51de5b..9ef2e54d7 100644 --- a/src/main/java/com/mindee/pdf/PdfPageImage.java +++ b/src/main/java/com/mindee/pdf/PdfPageImage.java @@ -36,11 +36,11 @@ public PdfPageImage( /** * Return the image in a format suitable for sending to MindeeClient for parsing. - * + * * @return an instance of {@link LocalInputSource} */ public LocalInputSource asInputSource() throws IOException { - ByteArrayOutputStream output = new ByteArrayOutputStream(); + var output = new ByteArrayOutputStream(); ImageIO.write(this.image, this.saveFormat, output); return new LocalInputSource(output.toByteArray(), this.getFilename()); } @@ -48,7 +48,7 @@ public LocalInputSource asInputSource() throws IOException { /** * Write the image to a file. * Uses the default image format and filename. - * + * * @param outputPath the output directory (must exist). */ public void writeToFile(String outputPath) throws IOException, MindeeException { @@ -59,7 +59,7 @@ public void writeToFile(String outputPath) throws IOException, MindeeException { /** * Generate a filename for the image. - * + * * @return An auto-generated filename String. */ public String getFilename() { diff --git a/src/main/java/com/mindee/pdf/SplitPdf.java b/src/main/java/com/mindee/pdf/SplitPDF.java similarity index 88% rename from src/main/java/com/mindee/pdf/SplitPdf.java rename to src/main/java/com/mindee/pdf/SplitPDF.java index 042b4fd6e..02fa97d81 100644 --- a/src/main/java/com/mindee/pdf/SplitPdf.java +++ b/src/main/java/com/mindee/pdf/SplitPDF.java @@ -6,7 +6,7 @@ * The split PDF. */ @Value -public class SplitPdf { +public class SplitPDF { /** * The file. diff --git a/src/main/java/com/mindee/v1/MindeeClient.java b/src/main/java/com/mindee/v1/MindeeClient.java index ce949cf69..7dc73d1a9 100644 --- a/src/main/java/com/mindee/v1/MindeeClient.java +++ b/src/main/java/com/mindee/v1/MindeeClient.java @@ -4,8 +4,8 @@ import com.mindee.input.InputSourceUtils; import com.mindee.input.LocalInputSource; import com.mindee.input.PageOptions; -import com.mindee.pdf.PdfBoxApi; -import com.mindee.pdf.PdfOperation; +import com.mindee.pdf.PDFBoxApi; +import com.mindee.pdf.PDFOperation; import com.mindee.pdf.SplitQuery; import com.mindee.v1.clientOptions.PollingOptions; import com.mindee.v1.clientOptions.PredictOptions; @@ -27,7 +27,7 @@ */ public class MindeeClient { - protected PdfOperation pdfOperation; + protected PDFOperation pdfOperation; private final MindeeApiV1 mindeeApi; /** @@ -35,7 +35,7 @@ public class MindeeClient { * You'll need to set the API key in the environment for this approach to work properly. */ public MindeeClient() { - this.pdfOperation = new PdfBoxApi(); + this.pdfOperation = new PDFBoxApi(); this.mindeeApi = createDefaultApi(""); } @@ -45,7 +45,7 @@ public MindeeClient() { * @param apiKey The api key to use. */ public MindeeClient(String apiKey) { - this.pdfOperation = new PdfBoxApi(); + this.pdfOperation = new PDFBoxApi(); this.mindeeApi = createDefaultApi(apiKey); } @@ -55,7 +55,7 @@ public MindeeClient(String apiKey) { * @param mindeeApi The MindeeApi implementation to be used by the created MindeeClient. */ public MindeeClient(MindeeApiV1 mindeeApi) { - this.pdfOperation = new PdfBoxApi(); + this.pdfOperation = new PDFBoxApi(); this.mindeeApi = mindeeApi; } @@ -65,7 +65,7 @@ public MindeeClient(MindeeApiV1 mindeeApi) { * @param pdfOperation The PdfOperation implementation to be used by the created MindeeClient. * @param mindeeApi The MindeeApi implementation to be used by the created MindeeClient. */ - public MindeeClient(PdfOperation pdfOperation, MindeeApiV1 mindeeApi) { + public MindeeClient(PDFOperation pdfOperation, MindeeApiV1 mindeeApi) { this.pdfOperation = pdfOperation; this.mindeeApi = mindeeApi; } diff --git a/src/main/java/com/mindee/v1/pdf/PDFExtractor.java b/src/main/java/com/mindee/v1/pdf/PDFExtractor.java new file mode 100644 index 000000000..e22460650 --- /dev/null +++ b/src/main/java/com/mindee/v1/pdf/PDFExtractor.java @@ -0,0 +1,87 @@ +package com.mindee.v1.pdf; + +import com.mindee.input.LocalInputSource; +import com.mindee.pdf.BasePDFExtractor; +import com.mindee.pdf.ExtractedPDF; +import com.mindee.v1.product.invoicesplitter.InvoiceSplitterV1InvoicePageGroup; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * PDF extraction class. + */ +public class PDFExtractor extends BasePDFExtractor { + + /** + * Init from a {@link LocalInputSource}. + * + * @param source The local source. + * @throws IOException Throws if the file can't be accessed. + */ + public PDFExtractor(LocalInputSource source) throws IOException { + super(source); + } + + /** + * Extract invoices from the given page indexes (from an invoice-splitter prediction). + * + * @param pageIndexes List of page indexes. + * @return a list of extracted files. + * @throws IOException Throws if the file can't be accessed. + */ + public List extractInvoices( + List pageIndexes + ) throws IOException { + + List> indexes = pageIndexes + .stream() + .map(InvoiceSplitterV1InvoicePageGroup::getPageIndexes) + .collect(Collectors.toList()); + + return extractSubDocuments(indexes); + } + + /** + * Extract invoices from the given page indexes (from an invoice-splitter prediction). + * + * @param pageIndexes List of page indexes. + * @param strict Whether the extraction should strictly follow the confidence scores or not. + * @return a list of extracted files. + * @throws IOException Throws if the file can't be accessed. + */ + public List extractInvoices( + List pageIndexes, + boolean strict + ) throws IOException { + var correctPageIndexes = new ArrayList>(); + if (!strict) { + return extractInvoices(pageIndexes); + } + var iterator = pageIndexes.iterator(); + var currentList = new ArrayList(); + Double previousConfidence = null; + while (iterator.hasNext()) { + InvoiceSplitterV1InvoicePageGroup pageIndex = iterator.next(); + Double confidence = pageIndex.getConfidence(); + List pageList = pageIndex.getPageIndexes(); + + if (confidence == 1.0 && previousConfidence == null) { + currentList = new ArrayList<>(pageList); + } else if (confidence == 1.0) { + correctPageIndexes.add(currentList); + currentList = new ArrayList<>(pageList); + } else if (confidence == 0.0 && !iterator.hasNext()) { + currentList.addAll(pageList); + correctPageIndexes.add(currentList); + } else { + correctPageIndexes.add(currentList); + correctPageIndexes.add(pageList); + } + previousConfidence = confidence; + } + return extractSubDocuments(correctPageIndexes); + } + +} diff --git a/src/main/java/com/mindee/v2/MindeeClient.java b/src/main/java/com/mindee/v2/MindeeClient.java index f9e5ea184..edbfe10e7 100644 --- a/src/main/java/com/mindee/v2/MindeeClient.java +++ b/src/main/java/com/mindee/v2/MindeeClient.java @@ -29,7 +29,7 @@ public MindeeClient(String apiKey) { this(createDefaultApiV2(apiKey)); } - /** Inject both a PDF implementation and an HTTP implementation. */ + /** Inject a custom HTTP API implementation. */ public MindeeClient(MindeeApiV2 mindeeApi) { this.mindeeApi = mindeeApi; } diff --git a/src/test/java/com/mindee/input/FileCompressionTest.java b/src/test/java/com/mindee/input/FileCompressionTest.java index a8e6d3a35..50b2677cf 100644 --- a/src/test/java/com/mindee/input/FileCompressionTest.java +++ b/src/test/java/com/mindee/input/FileCompressionTest.java @@ -4,7 +4,7 @@ import static com.mindee.TestingUtilities.getV1ResourcePath; import com.mindee.image.ImageCompressor; -import com.mindee.pdf.PdfCompressor; +import com.mindee.pdf.PDFCompressor; import java.awt.image.BufferedImage; import java.io.IOException; import java.nio.file.Files; @@ -254,10 +254,10 @@ public void testPdfResizeFromCompressor() throws IOException { List resizes = Arrays .asList( - PdfCompressor.compressPdf(pdfResizeInput.getFile()), - PdfCompressor.compressPdf(pdfResizeInput.getFile(), 75), - PdfCompressor.compressPdf(pdfResizeInput.getFile(), 50), - PdfCompressor.compressPdf(pdfResizeInput.getFile(), 10) + PDFCompressor.compressPdf(pdfResizeInput.getFile()), + PDFCompressor.compressPdf(pdfResizeInput.getFile(), 75), + PDFCompressor.compressPdf(pdfResizeInput.getFile(), 50), + PDFCompressor.compressPdf(pdfResizeInput.getFile(), 10) ); List outputPaths = Arrays @@ -323,7 +323,7 @@ public void testPdfResizeFromCompressor() throws IOException { public void testPdfResizeWithTextKeepsText() throws IOException { Path inputPath = getResourcePath("file_types/pdf/multipage.pdf"); LocalInputSource initialWithText = new LocalInputSource(inputPath.toString()); - byte[] compressedWithText = PdfCompressor + byte[] compressedWithText = PDFCompressor .compressPdf(initialWithText.getFile(), 100, true, false); PDDocument originalDoc = Loader.loadPDF(initialWithText.getFile()); diff --git a/src/test/java/com/mindee/pdf/PdfOperationTest.java b/src/test/java/com/mindee/pdf/PDFOperationTest.java similarity index 91% rename from src/test/java/com/mindee/pdf/PdfOperationTest.java rename to src/test/java/com/mindee/pdf/PDFOperationTest.java index 5ed8d32dc..af6340c21 100644 --- a/src/test/java/com/mindee/pdf/PdfOperationTest.java +++ b/src/test/java/com/mindee/pdf/PDFOperationTest.java @@ -12,9 +12,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class PdfOperationTest { +public class PDFOperationTest { - private final PdfOperation pdfOperation = new PdfBoxApi(); + private final PDFOperation pdfOperation = new PDFBoxApi(); @Test public void givenADocumentAndPageToKeep_whenSplit_thenReturnsOnlyKeptPage() throws IOException { @@ -26,7 +26,7 @@ public void givenADocumentAndPageToKeep_whenSplit_thenReturnsOnlyKeptPage() thro byte[] fileBytes = Files.readAllBytes(getResourcePath("file_types/pdf/multipage.pdf")); SplitQuery splitQuery = new SplitQuery(fileBytes, pageOptions); - SplitPdf splitPdf = pdfOperation.split(splitQuery); + SplitPDF splitPdf = pdfOperation.split(splitQuery); Assertions.assertNotNull(splitPdf); Assertions.assertNotNull(splitPdf.getFile()); @@ -49,7 +49,7 @@ public void givenADocumentAndListOfPagesToKeep_whenSplit_thenReturnsOnlyKeptPage Files.readAllBytes(getResourcePath("file_types/pdf/multipage.pdf")), pageOptions ); - SplitPdf splitPdf = pdfOperation.split(splitQuery); + SplitPDF splitPdf = pdfOperation.split(splitQuery); Assertions.assertNotNull(splitPdf); Assertions.assertNotNull(splitPdf.getFile()); @@ -68,7 +68,7 @@ public void givenADocumentAndListOfPagesToRemove_whenSplit_thenReturnsOnlyNotRem Files.readAllBytes(getResourcePath("file_types/pdf/multipage.pdf")), pageOptions ); - SplitPdf splitPdf = pdfOperation.split(splitQuery); + SplitPDF splitPdf = pdfOperation.split(splitQuery); Assertions.assertNotNull(splitPdf); Assertions.assertNotNull(splitPdf.getFile()); @@ -104,7 +104,7 @@ public void givenADocumentAndListPagesToRemoveAndMinPagesCondition_whenSplit_mus Files.readAllBytes(getResourcePath("file_types/pdf/multipage_cut-2.pdf")), pageOptions ); - SplitPdf splitPdf = pdfOperation.split(splitQuery); + SplitPDF splitPdf = pdfOperation.split(splitQuery); Assertions.assertNotNull(splitPdf); Assertions.assertNotNull(splitPdf.getFile()); @@ -123,7 +123,7 @@ public void givenADocumentAndNegativeListPagesToKeep_whenSplit_thenReturnsOnlyKe Files.readAllBytes(getResourcePath("file_types/pdf/multipage.pdf")), pageOptions ); - SplitPdf splitPdf = pdfOperation.split(splitQuery); + SplitPDF splitPdf = pdfOperation.split(splitQuery); Assertions.assertNotNull(splitPdf); Assertions.assertNotNull(splitPdf.getFile()); diff --git a/src/test/java/com/mindee/v1/fileOperation/InvoiceSplitterAutoExtractionIT.java b/src/test/java/com/mindee/v1/fileOperation/InvoiceSplitterAutoExtractionIT.java index 982efc911..794cf7874 100644 --- a/src/test/java/com/mindee/v1/fileOperation/InvoiceSplitterAutoExtractionIT.java +++ b/src/test/java/com/mindee/v1/fileOperation/InvoiceSplitterAutoExtractionIT.java @@ -7,11 +7,11 @@ import com.mindee.TestingUtilities; import com.mindee.input.LocalInputSource; import com.mindee.pdf.ExtractedPDF; -import com.mindee.pdf.PDFExtractor; import com.mindee.v1.MindeeClient; import com.mindee.v1.parsing.common.AsyncPredictResponse; import com.mindee.v1.parsing.common.Document; import com.mindee.v1.parsing.common.PredictResponse; +import com.mindee.v1.pdf.PDFExtractor; import com.mindee.v1.product.invoice.InvoiceV4; import com.mindee.v1.product.invoicesplitter.InvoiceSplitterV1; import java.io.IOException; diff --git a/src/test/java/com/mindee/pdf/PDFExtractorTest.java b/src/test/java/com/mindee/v1/pdf/PDFExtractorTest.java similarity index 77% rename from src/test/java/com/mindee/pdf/PDFExtractorTest.java rename to src/test/java/com/mindee/v1/pdf/PDFExtractorTest.java index aa4810349..e30aada8e 100644 --- a/src/test/java/com/mindee/pdf/PDFExtractorTest.java +++ b/src/test/java/com/mindee/v1/pdf/PDFExtractorTest.java @@ -1,31 +1,22 @@ -package com.mindee.pdf; +package com.mindee.v1.pdf; import static com.mindee.TestingUtilities.getV1ResourcePath; -import com.fasterxml.jackson.databind.JavaType; -import com.fasterxml.jackson.databind.ObjectMapper; import com.mindee.input.LocalInputSource; +import com.mindee.v1.parsing.LocalResponse; import com.mindee.v1.parsing.common.PredictResponse; import com.mindee.v1.product.invoicesplitter.InvoiceSplitterV1; import java.io.IOException; -import java.util.List; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class PDFExtractorTest { protected PredictResponse getInvoiceSplitterPrediction() throws IOException { - ObjectMapper objectMapper = new ObjectMapper(); - objectMapper.findAndRegisterModules(); - - JavaType type = objectMapper - .getTypeFactory() - .constructParametricType(PredictResponse.class, InvoiceSplitterV1.class); - return objectMapper - .readValue( - getV1ResourcePath("products/invoice_splitter/response_v1/complete.json").toFile(), - type - ); + var localResponse = new LocalResponse( + getV1ResourcePath("products/invoice_splitter/response_v1/complete.json") + ); + return localResponse.deserializeSyncResponse(InvoiceSplitterV1.class); } @Test @@ -38,7 +29,7 @@ public void givenAPDF_shouldExtractInvoicesNoStrict() throws IOException { PDFExtractor extractor = new PDFExtractor(pdf); Assertions.assertEquals(5, extractor.getPageCount()); - List extractedPDFSNoStrict = extractor + var extractedPDFSNoStrict = extractor .extractInvoices(inference.getPrediction().getInvoicePageGroups(), false); Assertions.assertEquals(3, extractedPDFSNoStrict.size()); Assertions.assertEquals("invoice_5p_001-001.pdf", extractedPDFSNoStrict.get(0).getFilename()); @@ -56,7 +47,7 @@ public void givenAPDF_shouldExtractInvoicesStrict() throws IOException { PDFExtractor extractor = new PDFExtractor(pdf); Assertions.assertEquals(5, extractor.getPageCount()); - List extractedPDFStrict = extractor + var extractedPDFStrict = extractor .extractInvoices(inference.getPrediction().getInvoicePageGroups(), true); Assertions.assertEquals(2, extractedPDFStrict.size()); Assertions.assertEquals("invoice_5p_001-001.pdf", extractedPDFStrict.get(0).getFilename());