Skip to content

Commit fb7988a

Browse files
committed
Add more tests for conversion API options
- Add tests for different output formats (HTML, TEXT, multiple formats) - Add tests for PDF backend options - Add tests for OCR options (engine, force OCR) - Add tests for table structure options - Add tests for image options - Add tests for page range and error handling options - Add tests for enrichment features (code, formula, picture classification) Signed-off-by: Abhiraj Marne <abhirajmarne11@gmail.com>
1 parent 1f4e01c commit fb7988a

File tree

1 file changed

+294
-0
lines changed

1 file changed

+294
-0
lines changed

docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@
6363
import ai.docling.serve.api.clear.response.ClearResponse;
6464
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
6565
import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions;
66+
import ai.docling.serve.api.convert.request.options.OcrEngine;
6667
import ai.docling.serve.api.convert.request.options.OutputFormat;
68+
import ai.docling.serve.api.convert.request.options.PdfBackend;
6769
import ai.docling.serve.api.convert.request.options.TableFormerMode;
6870
import ai.docling.serve.api.convert.request.source.HttpSource;
6971
import ai.docling.serve.api.convert.request.source.S3Source;
@@ -707,6 +709,298 @@ void convertAsyncFilesNotRegularFile() {
707709
.isThrownBy(() -> getDoclingClient().convertFilesAsync(Path.of("src", "test", "resources")))
708710
.withMessage("File (src/test/resources) is not a regular file");
709711
}
712+
713+
@Test
714+
void shouldConvertToHtmlFormat() {
715+
var options = ConvertDocumentOptions.builder()
716+
.toFormat(OutputFormat.HTML)
717+
.build();
718+
719+
var request = ConvertDocumentRequest.builder()
720+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
721+
.options(options)
722+
.build();
723+
724+
var response = getDoclingClient().convertSource(request);
725+
726+
assertThat(response).isNotNull();
727+
assertThat(response.getStatus()).isNotEmpty();
728+
assertThat(response.getDocument()).isNotNull();
729+
assertThat(response.getDocument().getHtmlContent()).isNotEmpty();
730+
}
731+
732+
@Test
733+
void shouldConvertToTextFormat() {
734+
var options = ConvertDocumentOptions.builder()
735+
.toFormat(OutputFormat.TEXT)
736+
.build();
737+
738+
var request = ConvertDocumentRequest.builder()
739+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
740+
.options(options)
741+
.build();
742+
743+
var response = getDoclingClient().convertSource(request);
744+
745+
assertThat(response).isNotNull();
746+
assertThat(response.getStatus()).isNotEmpty();
747+
assertThat(response.getDocument()).isNotNull();
748+
assertThat(response.getDocument().getTextContent()).isNotEmpty();
749+
}
750+
751+
@Test
752+
void shouldConvertWithPdfBackend() {
753+
var options = ConvertDocumentOptions.builder()
754+
.pdfBackend(PdfBackend.PYPDFIUM2)
755+
.build();
756+
757+
var request = ConvertDocumentRequest.builder()
758+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
759+
.options(options)
760+
.build();
761+
762+
var response = getDoclingClient().convertSource(request);
763+
764+
assertThat(response).isNotNull();
765+
assertThat(response.getStatus()).isNotEmpty();
766+
assertThat(response.getDocument()).isNotNull();
767+
}
768+
769+
@Test
770+
void shouldConvertWithOcrEngine() {
771+
var options = ConvertDocumentOptions.builder()
772+
.ocrEngine(OcrEngine.TESSERACT)
773+
.build();
774+
775+
var request = ConvertDocumentRequest.builder()
776+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
777+
.options(options)
778+
.build();
779+
780+
var response = getDoclingClient().convertSource(request);
781+
782+
assertThat(response).isNotNull();
783+
assertThat(response.getStatus()).isNotEmpty();
784+
assertThat(response.getDocument()).isNotNull();
785+
}
786+
787+
@Test
788+
void shouldConvertWithForceOcr() {
789+
var options = ConvertDocumentOptions.builder()
790+
.forceOcr(true)
791+
.build();
792+
793+
var request = ConvertDocumentRequest.builder()
794+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
795+
.options(options)
796+
.build();
797+
798+
var response = getDoclingClient().convertSource(request);
799+
800+
assertThat(response).isNotNull();
801+
assertThat(response.getStatus()).isNotEmpty();
802+
assertThat(response.getDocument()).isNotNull();
803+
}
804+
805+
@Test
806+
void shouldConvertWithTableDisabled() {
807+
var options = ConvertDocumentOptions.builder()
808+
.doTableStructure(false)
809+
.build();
810+
811+
var request = ConvertDocumentRequest.builder()
812+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
813+
.options(options)
814+
.build();
815+
816+
var response = getDoclingClient().convertSource(request);
817+
818+
assertThat(response).isNotNull();
819+
assertThat(response.getStatus()).isNotEmpty();
820+
assertThat(response.getDocument()).isNotNull();
821+
}
822+
823+
@Test
824+
void shouldConvertWithImagesExcluded() {
825+
var options = ConvertDocumentOptions.builder()
826+
.includeImages(false)
827+
.build();
828+
829+
var request = ConvertDocumentRequest.builder()
830+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
831+
.options(options)
832+
.build();
833+
834+
var response = getDoclingClient().convertSource(request);
835+
836+
assertThat(response).isNotNull();
837+
assertThat(response.getStatus()).isNotEmpty();
838+
assertThat(response.getDocument()).isNotNull();
839+
}
840+
841+
@Test
842+
void shouldConvertWithPageRange() {
843+
var options = ConvertDocumentOptions.builder()
844+
.pageRange(1, 2)
845+
.build();
846+
847+
var request = ConvertDocumentRequest.builder()
848+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
849+
.options(options)
850+
.build();
851+
852+
var response = getDoclingClient().convertSource(request);
853+
854+
assertThat(response).isNotNull();
855+
assertThat(response.getStatus()).isNotEmpty();
856+
assertThat(response.getDocument()).isNotNull();
857+
}
858+
859+
@Test
860+
void shouldConvertWithAbortOnError() {
861+
var options = ConvertDocumentOptions.builder()
862+
.abortOnError(true)
863+
.build();
864+
865+
var request = ConvertDocumentRequest.builder()
866+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
867+
.options(options)
868+
.build();
869+
870+
var response = getDoclingClient().convertSource(request);
871+
872+
assertThat(response).isNotNull();
873+
assertThat(response.getStatus()).isNotEmpty();
874+
assertThat(response.getDocument()).isNotNull();
875+
}
876+
877+
@Test
878+
void shouldConvertWithCodeEnrichment() {
879+
var options = ConvertDocumentOptions.builder()
880+
.doCodeEnrichment(true)
881+
.build();
882+
883+
var request = ConvertDocumentRequest.builder()
884+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
885+
.options(options)
886+
.build();
887+
888+
var response = getDoclingClient().convertSource(request);
889+
890+
assertThat(response).isNotNull();
891+
assertThat(response.getStatus()).isNotEmpty();
892+
assertThat(response.getDocument()).isNotNull();
893+
}
894+
895+
@Test
896+
void shouldConvertWithFormulaEnrichment() {
897+
var options = ConvertDocumentOptions.builder()
898+
.doFormulaEnrichment(true)
899+
.build();
900+
901+
var request = ConvertDocumentRequest.builder()
902+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
903+
.options(options)
904+
.build();
905+
906+
var response = getDoclingClient().convertSource(request);
907+
908+
assertThat(response).isNotNull();
909+
assertThat(response.getStatus()).isNotEmpty();
910+
assertThat(response.getDocument()).isNotNull();
911+
}
912+
913+
@Test
914+
void shouldConvertWithPictureClassification() {
915+
var options = ConvertDocumentOptions.builder()
916+
.doPictureClassification(true)
917+
.build();
918+
919+
var request = ConvertDocumentRequest.builder()
920+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
921+
.options(options)
922+
.build();
923+
924+
var response = getDoclingClient().convertSource(request);
925+
926+
assertThat(response).isNotNull();
927+
assertThat(response.getStatus()).isNotEmpty();
928+
assertThat(response.getDocument()).isNotNull();
929+
}
930+
931+
@Test
932+
void shouldConvertWithMultipleOutputFormats() {
933+
var options = ConvertDocumentOptions.builder()
934+
.toFormats(List.of(OutputFormat.MARKDOWN, OutputFormat.JSON))
935+
.build();
936+
937+
var request = ConvertDocumentRequest.builder()
938+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
939+
.options(options)
940+
.build();
941+
942+
var response = getDoclingClient().convertSource(request);
943+
944+
assertThat(response).isNotNull();
945+
assertThat(response.getStatus()).isNotEmpty();
946+
assertThat(response.getDocument()).isNotNull();
947+
assertThat(response.getDocument().getMarkdownContent()).isNotEmpty();
948+
assertThat(response.getDocument().getJsonContent()).isNotNull();
949+
}
950+
951+
@Test
952+
void shouldConvertWithImageScale() {
953+
var options = ConvertDocumentOptions.builder()
954+
.imagesScale(1.5)
955+
.build();
956+
957+
var request = ConvertDocumentRequest.builder()
958+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
959+
.options(options)
960+
.build();
961+
962+
var response = getDoclingClient().convertSource(request);
963+
964+
assertThat(response).isNotNull();
965+
assertThat(response.getStatus()).isNotEmpty();
966+
assertThat(response.getDocument()).isNotNull();
967+
}
968+
969+
@Test
970+
void shouldConvertWithMdPageBreakPlaceholder() {
971+
var options = ConvertDocumentOptions.builder()
972+
.mdPageBreakPlaceholder("---BREAK---")
973+
.build();
974+
975+
var request = ConvertDocumentRequest.builder()
976+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
977+
.options(options)
978+
.build();
979+
980+
var response = getDoclingClient().convertSource(request);
981+
982+
assertThat(response).isNotNull();
983+
assertThat(response.getStatus()).isNotEmpty();
984+
assertThat(response.getDocument()).isNotNull();
985+
}
986+
987+
@Test
988+
void shouldConvertWithTableCellMatchingDisabled() {
989+
var options = ConvertDocumentOptions.builder()
990+
.tableCellMatching(false)
991+
.build();
992+
993+
var request = ConvertDocumentRequest.builder()
994+
.source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
995+
.options(options)
996+
.build();
997+
998+
var response = getDoclingClient().convertSource(request);
999+
1000+
assertThat(response).isNotNull();
1001+
assertThat(response.getStatus()).isNotEmpty();
1002+
assertThat(response.getDocument()).isNotNull();
1003+
}
7101004
}
7111005

7121006
@Nested

0 commit comments

Comments
 (0)