|
63 | 63 | import ai.docling.serve.api.clear.response.ClearResponse; |
64 | 64 | import ai.docling.serve.api.convert.request.ConvertDocumentRequest; |
65 | 65 | import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions; |
| 66 | +import ai.docling.serve.api.convert.request.options.OcrEngine; |
66 | 67 | import ai.docling.serve.api.convert.request.options.OutputFormat; |
| 68 | +import ai.docling.serve.api.convert.request.options.PdfBackend; |
67 | 69 | import ai.docling.serve.api.convert.request.options.TableFormerMode; |
68 | 70 | import ai.docling.serve.api.convert.request.source.HttpSource; |
69 | 71 | import ai.docling.serve.api.convert.request.source.S3Source; |
@@ -707,6 +709,298 @@ void convertAsyncFilesNotRegularFile() { |
707 | 709 | .isThrownBy(() -> getDoclingClient().convertFilesAsync(Path.of("src", "test", "resources"))) |
708 | 710 | .withMessage("File (src/test/resources) is not a regular file"); |
709 | 711 | } |
| 712 | + |
| 713 | + @Test |
| 714 | + void shouldConvertToHtmlFormat() { |
| 715 | + var options = ConvertDocumentOptions.builder() |
| 716 | + .toFormat(OutputFormat.HTML) |
| 717 | + .build(); |
| 718 | + |
| 719 | + var request = ConvertDocumentRequest.builder() |
| 720 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 721 | + .options(options) |
| 722 | + .build(); |
| 723 | + |
| 724 | + var response = getDoclingClient().convertSource(request); |
| 725 | + |
| 726 | + assertThat(response).isNotNull(); |
| 727 | + assertThat(response.getStatus()).isNotEmpty(); |
| 728 | + assertThat(response.getDocument()).isNotNull(); |
| 729 | + assertThat(response.getDocument().getHtmlContent()).isNotEmpty(); |
| 730 | + } |
| 731 | + |
| 732 | + @Test |
| 733 | + void shouldConvertToTextFormat() { |
| 734 | + var options = ConvertDocumentOptions.builder() |
| 735 | + .toFormat(OutputFormat.TEXT) |
| 736 | + .build(); |
| 737 | + |
| 738 | + var request = ConvertDocumentRequest.builder() |
| 739 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 740 | + .options(options) |
| 741 | + .build(); |
| 742 | + |
| 743 | + var response = getDoclingClient().convertSource(request); |
| 744 | + |
| 745 | + assertThat(response).isNotNull(); |
| 746 | + assertThat(response.getStatus()).isNotEmpty(); |
| 747 | + assertThat(response.getDocument()).isNotNull(); |
| 748 | + assertThat(response.getDocument().getTextContent()).isNotEmpty(); |
| 749 | + } |
| 750 | + |
| 751 | + @Test |
| 752 | + void shouldConvertWithPdfBackend() { |
| 753 | + var options = ConvertDocumentOptions.builder() |
| 754 | + .pdfBackend(PdfBackend.PYPDFIUM2) |
| 755 | + .build(); |
| 756 | + |
| 757 | + var request = ConvertDocumentRequest.builder() |
| 758 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 759 | + .options(options) |
| 760 | + .build(); |
| 761 | + |
| 762 | + var response = getDoclingClient().convertSource(request); |
| 763 | + |
| 764 | + assertThat(response).isNotNull(); |
| 765 | + assertThat(response.getStatus()).isNotEmpty(); |
| 766 | + assertThat(response.getDocument()).isNotNull(); |
| 767 | + } |
| 768 | + |
| 769 | + @Test |
| 770 | + void shouldConvertWithOcrEngine() { |
| 771 | + var options = ConvertDocumentOptions.builder() |
| 772 | + .ocrEngine(OcrEngine.TESSERACT) |
| 773 | + .build(); |
| 774 | + |
| 775 | + var request = ConvertDocumentRequest.builder() |
| 776 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 777 | + .options(options) |
| 778 | + .build(); |
| 779 | + |
| 780 | + var response = getDoclingClient().convertSource(request); |
| 781 | + |
| 782 | + assertThat(response).isNotNull(); |
| 783 | + assertThat(response.getStatus()).isNotEmpty(); |
| 784 | + assertThat(response.getDocument()).isNotNull(); |
| 785 | + } |
| 786 | + |
| 787 | + @Test |
| 788 | + void shouldConvertWithForceOcr() { |
| 789 | + var options = ConvertDocumentOptions.builder() |
| 790 | + .forceOcr(true) |
| 791 | + .build(); |
| 792 | + |
| 793 | + var request = ConvertDocumentRequest.builder() |
| 794 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 795 | + .options(options) |
| 796 | + .build(); |
| 797 | + |
| 798 | + var response = getDoclingClient().convertSource(request); |
| 799 | + |
| 800 | + assertThat(response).isNotNull(); |
| 801 | + assertThat(response.getStatus()).isNotEmpty(); |
| 802 | + assertThat(response.getDocument()).isNotNull(); |
| 803 | + } |
| 804 | + |
| 805 | + @Test |
| 806 | + void shouldConvertWithTableDisabled() { |
| 807 | + var options = ConvertDocumentOptions.builder() |
| 808 | + .doTableStructure(false) |
| 809 | + .build(); |
| 810 | + |
| 811 | + var request = ConvertDocumentRequest.builder() |
| 812 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 813 | + .options(options) |
| 814 | + .build(); |
| 815 | + |
| 816 | + var response = getDoclingClient().convertSource(request); |
| 817 | + |
| 818 | + assertThat(response).isNotNull(); |
| 819 | + assertThat(response.getStatus()).isNotEmpty(); |
| 820 | + assertThat(response.getDocument()).isNotNull(); |
| 821 | + } |
| 822 | + |
| 823 | + @Test |
| 824 | + void shouldConvertWithImagesExcluded() { |
| 825 | + var options = ConvertDocumentOptions.builder() |
| 826 | + .includeImages(false) |
| 827 | + .build(); |
| 828 | + |
| 829 | + var request = ConvertDocumentRequest.builder() |
| 830 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 831 | + .options(options) |
| 832 | + .build(); |
| 833 | + |
| 834 | + var response = getDoclingClient().convertSource(request); |
| 835 | + |
| 836 | + assertThat(response).isNotNull(); |
| 837 | + assertThat(response.getStatus()).isNotEmpty(); |
| 838 | + assertThat(response.getDocument()).isNotNull(); |
| 839 | + } |
| 840 | + |
| 841 | + @Test |
| 842 | + void shouldConvertWithPageRange() { |
| 843 | + var options = ConvertDocumentOptions.builder() |
| 844 | + .pageRange(1, 2) |
| 845 | + .build(); |
| 846 | + |
| 847 | + var request = ConvertDocumentRequest.builder() |
| 848 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 849 | + .options(options) |
| 850 | + .build(); |
| 851 | + |
| 852 | + var response = getDoclingClient().convertSource(request); |
| 853 | + |
| 854 | + assertThat(response).isNotNull(); |
| 855 | + assertThat(response.getStatus()).isNotEmpty(); |
| 856 | + assertThat(response.getDocument()).isNotNull(); |
| 857 | + } |
| 858 | + |
| 859 | + @Test |
| 860 | + void shouldConvertWithAbortOnError() { |
| 861 | + var options = ConvertDocumentOptions.builder() |
| 862 | + .abortOnError(true) |
| 863 | + .build(); |
| 864 | + |
| 865 | + var request = ConvertDocumentRequest.builder() |
| 866 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 867 | + .options(options) |
| 868 | + .build(); |
| 869 | + |
| 870 | + var response = getDoclingClient().convertSource(request); |
| 871 | + |
| 872 | + assertThat(response).isNotNull(); |
| 873 | + assertThat(response.getStatus()).isNotEmpty(); |
| 874 | + assertThat(response.getDocument()).isNotNull(); |
| 875 | + } |
| 876 | + |
| 877 | + @Test |
| 878 | + void shouldConvertWithCodeEnrichment() { |
| 879 | + var options = ConvertDocumentOptions.builder() |
| 880 | + .doCodeEnrichment(true) |
| 881 | + .build(); |
| 882 | + |
| 883 | + var request = ConvertDocumentRequest.builder() |
| 884 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 885 | + .options(options) |
| 886 | + .build(); |
| 887 | + |
| 888 | + var response = getDoclingClient().convertSource(request); |
| 889 | + |
| 890 | + assertThat(response).isNotNull(); |
| 891 | + assertThat(response.getStatus()).isNotEmpty(); |
| 892 | + assertThat(response.getDocument()).isNotNull(); |
| 893 | + } |
| 894 | + |
| 895 | + @Test |
| 896 | + void shouldConvertWithFormulaEnrichment() { |
| 897 | + var options = ConvertDocumentOptions.builder() |
| 898 | + .doFormulaEnrichment(true) |
| 899 | + .build(); |
| 900 | + |
| 901 | + var request = ConvertDocumentRequest.builder() |
| 902 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 903 | + .options(options) |
| 904 | + .build(); |
| 905 | + |
| 906 | + var response = getDoclingClient().convertSource(request); |
| 907 | + |
| 908 | + assertThat(response).isNotNull(); |
| 909 | + assertThat(response.getStatus()).isNotEmpty(); |
| 910 | + assertThat(response.getDocument()).isNotNull(); |
| 911 | + } |
| 912 | + |
| 913 | + @Test |
| 914 | + void shouldConvertWithPictureClassification() { |
| 915 | + var options = ConvertDocumentOptions.builder() |
| 916 | + .doPictureClassification(true) |
| 917 | + .build(); |
| 918 | + |
| 919 | + var request = ConvertDocumentRequest.builder() |
| 920 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 921 | + .options(options) |
| 922 | + .build(); |
| 923 | + |
| 924 | + var response = getDoclingClient().convertSource(request); |
| 925 | + |
| 926 | + assertThat(response).isNotNull(); |
| 927 | + assertThat(response.getStatus()).isNotEmpty(); |
| 928 | + assertThat(response.getDocument()).isNotNull(); |
| 929 | + } |
| 930 | + |
| 931 | + @Test |
| 932 | + void shouldConvertWithMultipleOutputFormats() { |
| 933 | + var options = ConvertDocumentOptions.builder() |
| 934 | + .toFormats(List.of(OutputFormat.MARKDOWN, OutputFormat.JSON)) |
| 935 | + .build(); |
| 936 | + |
| 937 | + var request = ConvertDocumentRequest.builder() |
| 938 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 939 | + .options(options) |
| 940 | + .build(); |
| 941 | + |
| 942 | + var response = getDoclingClient().convertSource(request); |
| 943 | + |
| 944 | + assertThat(response).isNotNull(); |
| 945 | + assertThat(response.getStatus()).isNotEmpty(); |
| 946 | + assertThat(response.getDocument()).isNotNull(); |
| 947 | + assertThat(response.getDocument().getMarkdownContent()).isNotEmpty(); |
| 948 | + assertThat(response.getDocument().getJsonContent()).isNotNull(); |
| 949 | + } |
| 950 | + |
| 951 | + @Test |
| 952 | + void shouldConvertWithImageScale() { |
| 953 | + var options = ConvertDocumentOptions.builder() |
| 954 | + .imagesScale(1.5) |
| 955 | + .build(); |
| 956 | + |
| 957 | + var request = ConvertDocumentRequest.builder() |
| 958 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 959 | + .options(options) |
| 960 | + .build(); |
| 961 | + |
| 962 | + var response = getDoclingClient().convertSource(request); |
| 963 | + |
| 964 | + assertThat(response).isNotNull(); |
| 965 | + assertThat(response.getStatus()).isNotEmpty(); |
| 966 | + assertThat(response.getDocument()).isNotNull(); |
| 967 | + } |
| 968 | + |
| 969 | + @Test |
| 970 | + void shouldConvertWithMdPageBreakPlaceholder() { |
| 971 | + var options = ConvertDocumentOptions.builder() |
| 972 | + .mdPageBreakPlaceholder("---BREAK---") |
| 973 | + .build(); |
| 974 | + |
| 975 | + var request = ConvertDocumentRequest.builder() |
| 976 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 977 | + .options(options) |
| 978 | + .build(); |
| 979 | + |
| 980 | + var response = getDoclingClient().convertSource(request); |
| 981 | + |
| 982 | + assertThat(response).isNotNull(); |
| 983 | + assertThat(response.getStatus()).isNotEmpty(); |
| 984 | + assertThat(response.getDocument()).isNotNull(); |
| 985 | + } |
| 986 | + |
| 987 | + @Test |
| 988 | + void shouldConvertWithTableCellMatchingDisabled() { |
| 989 | + var options = ConvertDocumentOptions.builder() |
| 990 | + .tableCellMatching(false) |
| 991 | + .build(); |
| 992 | + |
| 993 | + var request = ConvertDocumentRequest.builder() |
| 994 | + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) |
| 995 | + .options(options) |
| 996 | + .build(); |
| 997 | + |
| 998 | + var response = getDoclingClient().convertSource(request); |
| 999 | + |
| 1000 | + assertThat(response).isNotNull(); |
| 1001 | + assertThat(response.getStatus()).isNotEmpty(); |
| 1002 | + assertThat(response.getDocument()).isNotNull(); |
| 1003 | + } |
710 | 1004 | } |
711 | 1005 |
|
712 | 1006 | @Nested |
|
0 commit comments