Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "java",
"TagPrefix": "java/contentunderstanding/azure-ai-contentunderstanding",
"Tag": "java/contentunderstanding/azure-ai-contentunderstanding_6d6c9cbfc1"
"Tag": "java/contentunderstanding/azure-ai-contentunderstanding_a546deb443"
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.azure.ai.contentunderstanding.ContentUnderstandingClientBuilder;
import com.azure.ai.contentunderstanding.models.AnalysisResult;
import com.azure.ai.contentunderstanding.models.ContentAnalyzerAnalyzeOperationStatus;
import com.azure.ai.contentunderstanding.models.ContentRange;
import com.azure.ai.contentunderstanding.models.DocumentContent;
import com.azure.ai.contentunderstanding.models.DocumentPage;
import com.azure.ai.contentunderstanding.models.DocumentTable;
Expand All @@ -29,6 +30,7 @@
* 2. Analyzing the document
* 3. Extracting markdown content
* 4. Accessing document properties (pages, tables, etc.)
* 5. Using ContentRange to analyze specific pages
*/
public class Sample01_AnalyzeBinary {

Expand Down Expand Up @@ -126,5 +128,79 @@ public static void main(String[] args) throws IOException {
// END:ContentUnderstandingAccessDocumentProperties

System.out.println("\nBinary document analysis completed successfully");

// Demonstrate ContentRange usage with a multi-page document
System.out.println("\n--- ContentRange Examples ---");
analyzeBinaryWithContentRange(client);
}

/**
* Sample demonstrating how to use ContentRange to analyze specific pages of a binary document.
* ContentRange allows you to specify which pages to analyze instead of the entire document.
*/
public static void analyzeBinaryWithContentRange(ContentUnderstandingClient client) {
try {
// Load a multi-page document (4 pages)
String multiPageFilePath = "src/samples/resources/mixed_financial_docs.pdf";
Path multiPagePath = Paths.get(multiPageFilePath);
byte[] multiPageBytes = Files.readAllBytes(multiPagePath);
BinaryData multiPageData = BinaryData.fromBytes(multiPageBytes);

// BEGIN:ContentUnderstandingAnalyzeBinaryWithContentRange
// Analyze only pages 3 to end using ContentRange.pagesFrom()
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalysisResult> rangeOperation
= client.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.pagesFrom(3), "application/octet-stream", null);
AnalysisResult rangeResult = rangeOperation.getFinalResult();

DocumentContent rangeDoc = (DocumentContent) rangeResult.getContents().get(0);
System.out.println("PagesFrom(3): returned " + rangeDoc.getPages().size() + " pages"
+ " (pages " + rangeDoc.getStartPageNumber() + "-" + rangeDoc.getEndPageNumber() + ")");

// Analyze a single page using ContentRange.page()
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalysisResult> pageOperation
= client.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.page(2), "application/octet-stream", null);
DocumentContent pageDoc = (DocumentContent) pageOperation.getFinalResult().getContents().get(0);
System.out.println("Page(2): returned " + pageDoc.getPages().size() + " page"
+ " (page " + pageDoc.getStartPageNumber() + ")");

// Analyze a page range using ContentRange.pages()
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalysisResult> pagesOperation
= client.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.pages(1, 3), "application/octet-stream", null);
DocumentContent pagesDoc = (DocumentContent) pagesOperation.getFinalResult().getContents().get(0);
System.out.println("Pages(1,3): returned " + pagesDoc.getPages().size() + " pages"
+ " (pages " + pagesDoc.getStartPageNumber() + "-" + pagesDoc.getEndPageNumber() + ")");

// Combine multiple ranges using ContentRange.combine()
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalysisResult> combineOperation
= client.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.combine(
ContentRange.page(1),
ContentRange.pages(3, 4)),
"application/octet-stream", null);
DocumentContent combineDoc = (DocumentContent) combineOperation.getFinalResult().getContents().get(0);
System.out.println("Combine(Page(1), Pages(3,4)): returned " + combineDoc.getPages().size() + " pages"
+ " (pages " + combineDoc.getStartPageNumber() + "-" + combineDoc.getEndPageNumber() + ")");

// Combine with out-of-range pages (clamped by the service)
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalysisResult> bigCombineOperation
= client.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.combine(
ContentRange.pages(1, 3),
ContentRange.page(5),
ContentRange.pagesFrom(9)),
"application/octet-stream", null);
DocumentContent bigCombineDoc
= (DocumentContent) bigCombineOperation.getFinalResult().getContents().get(0);
System.out.println(
"Combine(Pages(1,3), Page(5), PagesFrom(9)): returned " + bigCombineDoc.getPages().size() + " pages");
// END:ContentUnderstandingAnalyzeBinaryWithContentRange

System.out.println("ContentRange binary analysis completed successfully");
} catch (IOException e) {
System.err.println("Error reading multi-page file: " + e.getMessage());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.azure.ai.contentunderstanding.ContentUnderstandingClientBuilder;
import com.azure.ai.contentunderstanding.models.AnalysisResult;
import com.azure.ai.contentunderstanding.models.ContentAnalyzerAnalyzeOperationStatus;
import com.azure.ai.contentunderstanding.models.ContentRange;
import com.azure.ai.contentunderstanding.models.DocumentContent;
import com.azure.ai.contentunderstanding.models.DocumentPage;
import com.azure.ai.contentunderstanding.models.DocumentTable;
Expand All @@ -32,6 +33,7 @@
* 2. Analyzing the document
* 3. Extracting markdown content
* 4. Accessing document properties (pages, tables, etc.)
* 5. Using ContentRange to analyze specific pages
*/
public class Sample01_AnalyzeBinaryAsync {

Expand Down Expand Up @@ -162,5 +164,147 @@ public static void main(String[] args) throws IOException, InterruptedException
if (!latch.await(2, TimeUnit.MINUTES)) {
System.err.println("Timed out waiting for async operations to complete.");
}

// Demonstrate ContentRange usage with a multi-page document
System.out.println("\n--- ContentRange Examples ---");
analyzeBinaryWithContentRange(client);
}

/**
* Sample demonstrating how to use ContentRange to analyze specific pages of a binary document asynchronously.
* ContentRange allows you to specify which pages to analyze instead of the entire document.
*/
public static void analyzeBinaryWithContentRange(ContentUnderstandingAsyncClient client)
throws IOException, InterruptedException {
// Load a multi-page document (4 pages)
String multiPageFilePath = "src/samples/resources/mixed_financial_docs.pdf";
Path multiPagePath = Paths.get(multiPageFilePath);
byte[] multiPageBytes = Files.readAllBytes(multiPagePath);
BinaryData multiPageData = BinaryData.fromBytes(multiPageBytes);

CountDownLatch rangeLatch = new CountDownLatch(1);

// BEGIN:ContentUnderstandingAnalyzeBinaryWithContentRangeAsync
// Analyze only pages 3 to end using ContentRange.pagesFrom()
Mono<AnalysisResult> pagesFromMono = client
.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.pagesFrom(3), "application/octet-stream", null)
.last()
.flatMap(pollResponse -> {
if (pollResponse.getStatus().isComplete()) {
return pollResponse.getFinalResult();
} else {
return Mono.error(new RuntimeException(
"Polling completed unsuccessfully with status: " + pollResponse.getStatus()));
}
})
.doOnNext(result -> {
DocumentContent rangeDoc = (DocumentContent) result.getContents().get(0);
System.out.println("PagesFrom(3): returned " + rangeDoc.getPages().size() + " pages"
+ " (pages " + rangeDoc.getStartPageNumber() + "-" + rangeDoc.getEndPageNumber() + ")");
});

// Analyze a single page using ContentRange.page()
Mono<AnalysisResult> pageMono = client
.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.page(2), "application/octet-stream", null)
.last()
.flatMap(pollResponse -> {
if (pollResponse.getStatus().isComplete()) {
return pollResponse.getFinalResult();
} else {
return Mono.error(new RuntimeException(
"Polling completed unsuccessfully with status: " + pollResponse.getStatus()));
}
})
.doOnNext(result -> {
DocumentContent pageDoc = (DocumentContent) result.getContents().get(0);
System.out.println("Page(2): returned " + pageDoc.getPages().size() + " page"
+ " (page " + pageDoc.getStartPageNumber() + ")");
});

// Analyze a page range using ContentRange.pages()
Mono<AnalysisResult> pagesMono = client
.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.pages(1, 3), "application/octet-stream", null)
.last()
.flatMap(pollResponse -> {
if (pollResponse.getStatus().isComplete()) {
return pollResponse.getFinalResult();
} else {
return Mono.error(new RuntimeException(
"Polling completed unsuccessfully with status: " + pollResponse.getStatus()));
}
})
.doOnNext(result -> {
DocumentContent pagesDoc = (DocumentContent) result.getContents().get(0);
System.out.println("Pages(1,3): returned " + pagesDoc.getPages().size() + " pages"
+ " (pages " + pagesDoc.getStartPageNumber() + "-" + pagesDoc.getEndPageNumber() + ")");
});

// Combine multiple ranges using ContentRange.combine()
Mono<AnalysisResult> combineMono = client
.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.combine(
ContentRange.page(1),
ContentRange.pages(3, 4)),
"application/octet-stream", null)
.last()
.flatMap(pollResponse -> {
if (pollResponse.getStatus().isComplete()) {
return pollResponse.getFinalResult();
} else {
return Mono.error(new RuntimeException(
"Polling completed unsuccessfully with status: " + pollResponse.getStatus()));
}
})
.doOnNext(result -> {
DocumentContent combineDoc = (DocumentContent) result.getContents().get(0);
System.out.println("Combine(Page(1), Pages(3,4)): returned " + combineDoc.getPages().size() + " pages"
+ " (pages " + combineDoc.getStartPageNumber() + "-" + combineDoc.getEndPageNumber() + ")");
});

// Combine with out-of-range pages (clamped by the service)
Mono<AnalysisResult> bigCombineMono = client
.beginAnalyzeBinary("prebuilt-documentSearch", multiPageData,
ContentRange.combine(
ContentRange.pages(1, 3),
ContentRange.page(5),
ContentRange.pagesFrom(9)),
"application/octet-stream", null)
.last()
.flatMap(pollResponse -> {
if (pollResponse.getStatus().isComplete()) {
return pollResponse.getFinalResult();
} else {
return Mono.error(new RuntimeException(
"Polling completed unsuccessfully with status: " + pollResponse.getStatus()));
}
})
.doOnNext(result -> {
DocumentContent bigCombineDoc = (DocumentContent) result.getContents().get(0);
System.out.println("Combine(Pages(1,3), Page(5), PagesFrom(9)): returned "
+ bigCombineDoc.getPages().size() + " pages");
});

// Chain all operations sequentially using then()
pagesFromMono
.then(pageMono)
.then(pagesMono)
.then(combineMono)
.then(bigCombineMono)
.doOnError(error -> System.err.println("Error: " + error.getMessage()))
.subscribe(
result -> {
System.out.println("ContentRange async analysis completed successfully");
rangeLatch.countDown();
},
error -> rangeLatch.countDown()
);
// END:ContentUnderstandingAnalyzeBinaryWithContentRangeAsync

if (!rangeLatch.await(5, TimeUnit.MINUTES)) {
System.err.println("Timed out waiting for ContentRange async operations.");
}
}
}
Loading