diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentExportStrategy.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentExportStrategy.java
index 089586779..ddb607f2c 100644
--- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentExportStrategy.java
+++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentExportStrategy.java
@@ -12,6 +12,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.math.BigInteger;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -263,7 +264,7 @@ private void writeArrayCell(String cellPrefix, Cell cell, ColumnStructure column
if (subCell instanceof BinaryCell) {
writeBinaryCell(CELL_PREFIX_ARRAY, subCell, column, cellTag);
} else if (subCell instanceof SimpleCell) {
- writeSimpleCell(CELL_PREFIX_ARRAY, subCell, column, cellTag);
+ writeSimpleCell(CELL_PREFIX_ARRAY, subCell, column, columnIndex, cellTag);
} else if (subCell instanceof ComposedCell) {
writeComposedCell(CELL_PREFIX_ARRAY, subCell, column, cellTag);
} else if (subCell instanceof NullCell) {
@@ -344,6 +345,18 @@ protected void writeSimpleCell(String cellPrefix, Cell cell, ColumnStructure col
}
}
+ protected void writeSimpleCell(String cellPrefix, Cell cell, ColumnStructure column, int columnIndex, int arrayIndex)
+ throws ModuleException, IOException {
+ SimpleCell simpleCell = (SimpleCell) cell;
+
+ if (Sql2008toXSDType.isLargeType(column.getType(), reporter)
+ && simpleCell.getBytesSize() > THRESHOLD_TREAT_STRING_AS_CLOB) {
+ writeLargeObjectData(cellPrefix, cell, arrayIndex);
+ } else {
+ writeSimpleCellData(cellPrefix, simpleCell, arrayIndex);
+ }
+ }
+
protected void writeBinaryCell(String cellPrefix, Cell cell, ColumnStructure column, int columnIndex)
throws ModuleException, IOException {
BinaryCell binaryCell = (BinaryCell) cell;
@@ -389,8 +402,8 @@ protected void writeLargeObjectData(String cellPrefix, Cell cell, int columnInde
WaitingInputStream waitingInputStream = new WaitingInputStream(digest);
InputStream inputStream = new BufferedInputStream(waitingInputStream);
- String lobDir = contentPathStrategy.getAbsoluteInternalLobDirPath(currentSchema.getIndex(), currentTable.getIndex(),
- columnIndex);
+ String lobDir = contentPathStrategy.getAbsoluteInternalLobDirPath(currentSchema.getIndex(),
+ currentTable.getIndex(), columnIndex);
lob = new LargeObject(new InputStreamProviderImpl(inputStream),
contentPathStrategy.getInternalBlobFileName(currentRowIndex + 1));
@@ -431,8 +444,8 @@ protected void writeLargeObjectData(String cellPrefix, Cell cell, int columnInde
final WaitingInputStream waitingInputStream = new WaitingInputStream(digest);
InputStream inputStream = new BufferedInputStream(waitingInputStream);
- String lobDir = contentPathStrategy.getAbsoluteInternalLobDirPath(currentSchema.getIndex(), currentTable.getIndex(),
- columnIndex);
+ String lobDir = contentPathStrategy.getAbsoluteInternalLobDirPath(currentSchema.getIndex(),
+ currentTable.getIndex(), columnIndex);
lob = new LargeObject(new InputStreamProviderImpl(inputStream),
contentPathStrategy.getInternalClobFileName(currentRowIndex + 1));
@@ -558,8 +571,12 @@ private void writeXsd() throws IOException, ModuleException {
//
xsdWriter.openTag(XS_SEQUENCE, 6);
- xsdWriter.beginOpenTag("xs:any", 7).appendAttribute(MIN_OCCURS, "0").appendAttribute("maxOccurs", "unbounded")
- .appendAttribute("processContents", "skip").endShorthandTag();
+ String xsdSubtype = Sql2008toXSDType.convert(col.getType().getSql2008TypeName());
+ for (BigInteger c = BigInteger.valueOf(0); c.compareTo(col.getCardinality()) < 0; c = c.add(BigInteger.ONE)) {
+ xsdWriter.beginOpenTag(XS_ELEMENT, 7).appendAttribute(MIN_OCCURS, "0");
+ xsdWriter.appendAttribute("name", "a" + c.add(BigInteger.ONE)).appendAttribute("type", xsdSubtype)
+ .endShorthandTag();
+ }
//
xsdWriter.closeTag(XS_SEQUENCE, 6);
diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentWithExternalLobsExportStrategy.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentWithExternalLobsExportStrategy.java
index 9c5486e49..6ff172544 100644
--- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentWithExternalLobsExportStrategy.java
+++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/content/SIARD22ContentWithExternalLobsExportStrategy.java
@@ -92,6 +92,18 @@ protected void writeSimpleCell(String cellPrefix, Cell cell, ColumnStructure col
}
}
+ @Override
+ protected void writeSimpleCell(String cellPrefix, Cell cell, ColumnStructure column, int columnIndex, int arrayIndex)
+ throws ModuleException, IOException {
+ SimpleCell simpleCell = (SimpleCell) cell;
+ long length = simpleCell.getBytesSize();
+ if (Sql2008toXSDType.isLargeType(column.getType(), reporter) && length > clobThresholdLimit) {
+ writeLargeObjectDataOutside(cellPrefix, cell, columnIndex, arrayIndex);
+ } else {
+ writeSimpleCellData(cellPrefix, (SimpleCell) cell, arrayIndex);
+ }
+ }
+
@Override
protected void writeBinaryCell(String cellPrefix, Cell cell, ColumnStructure column, int columnIndex)
throws ModuleException, IOException {
@@ -208,7 +220,123 @@ private void writeLargeObjectDataOutside(String cellPrefix, Cell cell, int colum
.get(firstExternalContainer.getPath().getFileName().toString() + File.separator, lobFileParameter).toString());
// write the LOB XML element
- currentWriter.beginOpenTag("c" + columnIndex, 2).appendAttribute("file", lobURI).appendAttribute("length",
+ currentWriter.beginOpenTag(cellPrefix + columnIndex, 2).appendAttribute("file", lobURI).appendAttribute("length",
+ String.valueOf(lobSizeParameter));
+
+ if (lobDigestChecksum != null) {
+ cell.setMessageDigest(lobDigestChecksum);
+ cell.setDigestAlgorithm(messageDigestAlgorithm);
+
+ currentWriter.appendAttribute("digestType", messageDigestAlgorithm.toUpperCase());
+ currentWriter.appendAttribute("digest", MessageDigestUtils.getHexFromMessageDigest(lobDigestChecksum, lowerCase));
+ lobDigestChecksum = null; // reset it to the default value
+ }
+
+ currentWriter.endShorthandTag();
+ }
+
+ private void writeLargeObjectDataOutside(String cellPrefix, Cell cell, int columnIndex, int arrayIndex)
+ throws IOException, ModuleException {
+ String lobFileParameter = null;
+ long lobSizeParameter = 0;
+ LargeObject lob = null;
+
+ // get size
+ if (cell instanceof BinaryCell binCell) {
+ lobSizeParameter = binCell.getSize();
+ } else if (cell instanceof SimpleCell txtCell) {
+ lobSizeParameter = txtCell.getBytesSize();
+ }
+
+ // determine path
+ Triple segmentKey = Triple.of(currentSchema.getIndex(), currentTable.getIndex(),
+ columnIndex);
+ SIARDArchiveContainer currentExternalContainer = currentExternalContainers.getOrDefault(segmentKey, null);
+ if (currentExternalContainer == null) {
+ currentExternalContainer = getAnotherExternalContainer(segmentKey);
+ writeStrategy.setup(currentExternalContainer);
+ currentLobsFolderSize = 0;
+ currentLobsInFolder = 0;
+ } else if ((maximumLobsFolderSize > 0 && lobSizeParameter + currentLobsFolderSize >= maximumLobsFolderSize
+ && (lobSizeParameter <= maximumLobsFolderSize || currentLobsFolderSize >= maximumLobsFolderSize))
+ || currentLobsInFolder >= maximumLobsPerFolder) {
+ writeStrategy.finish(currentExternalContainer);
+ currentExternalContainer = getAnotherExternalContainer(segmentKey);
+ writeStrategy.setup(currentExternalContainer);
+ currentLobsFolderSize = 0;
+ currentLobsInFolder = 0;
+ }
+ currentExternalContainers.put(segmentKey, currentExternalContainer);
+ SIARDArchiveContainer firstExternalContainer = currentExternalContainer;
+
+ // get file xml parameters
+ if (contentPathStrategy instanceof SIARD22ContentWithExternalLobsPathExportStrategy paths) {
+ if (cell instanceof BinaryCell) {
+ lobFileParameter = paths.getBlobOuterFilePath(currentTable.getIndex(), columnIndex, currentRowIndex + 1,
+ arrayIndex);
+ } else if (cell instanceof SimpleCell) {
+ lobFileParameter = paths.getClobOuterFilePath(currentTable.getIndex(), columnIndex, currentRowIndex + 1,
+ arrayIndex);
+ }
+ } else {
+ throw new NotImplementedException("Unsupported ContentPathStrategy");
+ }
+
+ if (lobSizeParameter < 0) {
+ // NULL content
+ writeNullCellData(cellPrefix, new NullCell(cell.getId()), columnIndex);
+ return;
+ }
+
+ // get lob object
+ if (cell instanceof BinaryCell binCell) {
+ lob = new LargeObject(binCell, lobFileParameter);
+ } else if (cell instanceof SimpleCell txtCell) {
+ String data = txtCell.getSimpleData();
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
+ lob = new LargeObject(new InputStreamProviderImpl(inputStream, data.getBytes().length), lobFileParameter);
+ }
+
+ // write LOB
+ if (writeStrategy.isSimultaneousWritingSupported()) {
+ if (maximumLobsFolderSize > 0 && lobSizeParameter >= maximumLobsFolderSize) {
+ long remainingLobSize = lobSizeParameter;
+ int partSize = (int) (maximumLobsFolderSize - currentLobsFolderSize);
+ int partIndex = 1;
+ try (InputStream lobInputStream = lob.getInputStreamProvider().createInputStream()) {
+ while (remainingLobSize > 0) {
+ writeLOBPartOutside(lob, lobInputStream, currentExternalContainer, partSize, partIndex);
+ currentLobsInFolder++;
+ currentLobsFolderSize += partSize;
+ partIndex++;
+ remainingLobSize -= partSize;
+ partSize = (int) Math.min(maximumLobsFolderSize, remainingLobSize);
+ if (partSize > 0) {
+ writeStrategy.finish(currentExternalContainer);
+ currentExternalContainer = getAnotherExternalContainer(segmentKey);
+ writeStrategy.setup(currentExternalContainer);
+ currentLobsFolderSize = 0;
+ currentLobsInFolder = 0;
+ }
+ }
+ }
+ currentExternalContainers.put(segmentKey, currentExternalContainer);
+ } else {
+ writeLOBOutside(lob, currentExternalContainer);
+ currentLobsFolderSize += lobSizeParameter;
+ currentLobsInFolder++;
+ }
+ } else {
+ throw new NotImplementedException(SIARD22ContentWithExternalLobsExportStrategy.class.getName()
+ + " is not ready to be used with write strategies that don't support simultaneous writing.");
+ }
+
+ // something like "seg_0/t2_c8_r2.bin"
+ String lobURI = FilenameUtils.separatorsToUnix(Paths
+ .get(firstExternalContainer.getPath().getFileName().toString() + File.separator, lobFileParameter).toString());
+
+ // write the LOB XML element
+ currentWriter.beginOpenTag(cellPrefix + arrayIndex, 2).appendAttribute("file", lobURI).appendAttribute("length",
String.valueOf(lobSizeParameter));
if (lobDigestChecksum != null) {
diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/path/SIARD22ContentWithExternalLobsPathExportStrategy.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/path/SIARD22ContentWithExternalLobsPathExportStrategy.java
index f5d196693..8cf59368e 100644
--- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/path/SIARD22ContentWithExternalLobsPathExportStrategy.java
+++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/out/path/SIARD22ContentWithExternalLobsPathExportStrategy.java
@@ -47,11 +47,23 @@ public String getClobOuterFilePath(int tableIndex, int columnIndex, int rowIndex
.append(rowIndex).append(FILE_EXTENSION_SEPARATOR).append(CLOB_EXTENSION).toString();
}
+ public String getClobOuterFilePath(int tableIndex, int columnIndex, int rowIndex, int arrayIndex) {
+ return new StringBuilder().append("t").append(tableIndex).append("_c").append(columnIndex).append("_r")
+ .append(rowIndex).append("_a").append(arrayIndex).append(FILE_EXTENSION_SEPARATOR).append(CLOB_EXTENSION)
+ .toString();
+ }
+
public String getBlobOuterFilePath(int tableIndex, int columnIndex, int rowIndex) {
return new StringBuilder().append("t").append(tableIndex).append("_c").append(columnIndex).append("_r")
.append(rowIndex).append(FILE_EXTENSION_SEPARATOR).append(BLOB_EXTENSION).toString();
}
+ public String getBlobOuterFilePath(int tableIndex, int columnIndex, int rowIndex, int arrayIndex) {
+ return new StringBuilder().append("t").append(tableIndex).append("_c").append(columnIndex).append("_r")
+ .append(rowIndex).append("_a").append(arrayIndex).append(FILE_EXTENSION_SEPARATOR).append(BLOB_EXTENSION)
+ .toString();
+ }
+
@Override
public String getClobFilePath(int schemaIndex, int tableIndex, int columnIndex, int rowIndex) {
return new StringBuilder().append(CONTENT_DIR).append(FILE_SEPARATOR).append(SCHEMA_DIR).append(schemaIndex)