diff --git a/PCAxis.Serializers/Parquet/ParquetBuilder.cs b/PCAxis.Serializers/Parquet/ParquetBuilder.cs
index 4d847a7..8073cd7 100644
--- a/PCAxis.Serializers/Parquet/ParquetBuilder.cs
+++ b/PCAxis.Serializers/Parquet/ParquetBuilder.cs
@@ -58,7 +58,12 @@ public Table PopulateTable()
int matrixSize = model.Data.MatrixColumnCount * model.Data.MatrixRowCount;
double[] data = new double[matrixSize];
int[] variableValueCounts = GetVariableValueCounts();
- var indices = GenerateDataPointIndices(variableValueCounts);
+ // Build a mask indicating which variables are multi-valued content variables.
+ bool[] isContentMulti = model.Meta.Variables
+ .Select(v => v.IsContentVariable && v.Values.Count > 1)
+ .ToArray();
+
+ var indices = GenerateDataPointIndices(variableValueCounts, isContentMulti);
for (int m = 0; m < matrixSize; m++)
{
@@ -213,23 +218,26 @@ private void PopulateContentVariableRow(int[] index, int[] variableValueCounts,
int columnIndex = dataFieldIndices[columnName];
int symbolColumnIndex = dataFieldIndices[symbolColumnName]; // Get index of the symbol column
- int dataIndex = ParquetBuilder.GetDataIndex(index, variableValueCounts);
- if (dataIndex + j < data.Length)
- {
- dataIndex += j;
- }
+ // Compute the exact data index for this content value by cloning the
+ // multi-dimensional index, setting the content variable coordinate to j,
+ // and converting to the linear data index. This avoids assumptions about
+ // dimension ordering or stride between content values.
+ int varPos = model.Meta.Variables.IndexOf(variable);
+ int[] indexForValue = (int[])index.Clone();
+ indexForValue[varPos] = j;
+ int dataIndexForValue = ParquetBuilder.GetDataIndex(indexForValue, variableValueCounts);
- if (dataIndex >= 0 && dataIndex < data.Length)
+ if (dataIndexForValue >= 0 && dataIndexForValue < data.Length)
{
- if (dataSymbolMap.ContainsKey(data[dataIndex]))
+ if (dataSymbolMap.ContainsKey(data[dataIndexForValue]))
{
- row[symbolColumnIndex] = dataSymbolMap[data[dataIndex]]; // Set the symbol value
+ row[symbolColumnIndex] = dataSymbolMap[data[dataIndexForValue]]; // Set the symbol value
row[columnIndex] = double.NaN; // Replace the value with double.NaN
}
else
{
- row[columnIndex] = data[dataIndex];
+ row[columnIndex] = data[dataIndexForValue];
row[symbolColumnIndex] = null; // No symbol
}
}
@@ -250,8 +258,11 @@ private void PopulateNonContentVariableRow(int[] index, object[] row, Dictionary
var value = variable.Values[index[i]].Code;
if (variable.IsTime)
{
- value = variable.Values[index[i]].TimeValue;
- row[dataFieldIndices[variable.Name]] = value; // Original time-value
+ // Use the VALUES ordering (Code) for the displayed time value so it
+ // matches the order of the data array. Some PX files have a
+ // different TIMEVAL ordering; using Code ensures consistency with
+ // the data which follows VALUES(...).
+ row[dataFieldIndices[variable.Name]] = value; // Original time-value (from VALUES)
row[dataFieldIndices["timestamp"]] = ParseTimeScale(value, variable.TimeScale); // Parsed timestamp
}
else
@@ -412,11 +423,12 @@ static int GetDataIndex(int[] index, int[] variableValueCounts)
for (int i = index.Length - 1; i >= 0; i--)
{
- dataIndex += index[i] * multiplier;
- if (i < variableValueCounts.Length - 1) // Adjusting the condition here
+ if (i < variableValueCounts.Length - 1) // Ensure multiplier equals product of dimensions to the right
{
multiplier *= variableValueCounts[i + 1];
}
+
+ dataIndex += index[i] * multiplier;
}
return dataIndex;
@@ -473,16 +485,22 @@ private static PXModel RearrangeValues(PXModel model)
///
/// An array of integers representing the counts of values for each variable.
/// A list of integer arrays representing the data point indices.
- private static List GenerateDataPointIndices(int[] variableValueCounts)
+ private static List GenerateDataPointIndices(int[] variableValueCounts, bool[] isContentMulti)
{
int variableCount = variableValueCounts.Length;
- int[] variableIndexCounts = new int[variableCount];
+
+ // Effective counts treat multi-valued content variables as having count 1
+ // so that rows correspond to combinations of the other variables only.
+ int[] effectiveCounts = new int[variableCount];
+ for (int i = 0; i < variableCount; i++)
+ {
+ effectiveCounts[i] = (isContentMulti != null && i < isContentMulti.Length && isContentMulti[i]) ? 1 : variableValueCounts[i];
+ }
int totalDataPoints = 1;
for (int i = variableCount - 1; i >= 0; i--)
{
- variableIndexCounts[i] = totalDataPoints;
- totalDataPoints *= variableValueCounts[i];
+ totalDataPoints *= effectiveCounts[i];
}
List dataPointIndices = new List(totalDataPoints);
@@ -494,12 +512,14 @@ private static List GenerateDataPointIndices(int[] variableValueCounts)
int tempIndex = dataIndex;
for (int variableIndex = 0; variableIndex < variableCount; variableIndex++)
{
- indices[variableIndex] = tempIndex % variableValueCounts[variableIndex];
- tempIndex /= variableValueCounts[variableIndex];
+ // For multi-valued content variables effectiveCounts will be 1 so this yields 0.
+ indices[variableIndex] = tempIndex % effectiveCounts[variableIndex];
+ tempIndex /= effectiveCounts[variableIndex];
}
dataPointIndices.Add(indices);
}
+
return dataPointIndices;
}
}
diff --git a/UnitTests/Parquet/ParquetSerializationIntegrationTests.cs b/UnitTests/Parquet/ParquetSerializationIntegrationTests.cs
index df6f488..7c8c2e7 100644
--- a/UnitTests/Parquet/ParquetSerializationIntegrationTests.cs
+++ b/UnitTests/Parquet/ParquetSerializationIntegrationTests.cs
@@ -44,8 +44,12 @@ public void ShouldSerializePxModel(string pxFile)
// Sync wrapper around async call
Table table = ReadBackParquetFileSync(outputFile);
- // Assertion: Ensure that the model's matrix size is equal to the table's count.
- Assert.AreEqual(table.Count, model.Data.MatrixSize, $"Mismatch in matrix size for file {fileNameWithoutExtension}.parquet.");
+ // Assertion: Ensure that the table's row count equals the number of observations
+ // for a single ContentsCode. If the model has multiple contents, the serializer
+ // emits additional content columns rather than duplicating rows.
+ int contentCount = model.Meta.ContentVariable != null ? model.Meta.ContentVariable.Values.Count : 1;
+ int expectedRows = model.Data.MatrixSize / contentCount;
+ Assert.AreEqual(expectedRows, table.Count, $"Mismatch in matrix size for file {fileNameWithoutExtension}.parquet.");
// Assertion: Calculate the amount of columns we should have, based on the metadata
// Number of columns in meta, number of columns in table.
diff --git a/UnitTests/TestFiles/14216.px b/UnitTests/TestFiles/14216.px
new file mode 100644
index 0000000..ded6448
--- /dev/null
+++ b/UnitTests/TestFiles/14216.px
@@ -0,0 +1,62 @@
+CHARSET="ANSI";
+AXIS-VERSION="2010";
+CODEPAGE="iso-8859-1";
+LANGUAGE="no";
+CREATION-DATE="20260223 22:57";
+DECIMALS=2;
+SHOWDECIMALS=0;
+MATRIX="14216";
+COPYRIGHT=NO;
+SUBJECT-CODE="be";
+SUBJECT-AREA="Befolkning";
+TITLE="14216: Areal og befolkning i tettsteder, etter tettsted, statistikkvariabel og år";
+CONTENTS="14216: Areal og befolkning i tettsteder,";
+STUB="tettsted";
+HEADING="statistikkvariabel","år";
+CONTVARIABLE="statistikkvariabel";
+VARIABLECODE("tettsted")="TettSted";
+VALUES("tettsted")="Oslo";
+VARIABLECODE("statistikkvariabel")="ContentsCode";
+VALUES("statistikkvariabel")="Areal av tettsted (km²)","Bosatte";
+VARIABLECODE("år")="Tid";
+VALUES("år")="2025","2024";
+TIMEVAL("år")=TLIST(A1),"2024","2025";
+CODES("tettsted")="0801";
+CODES("statistikkvariabel")="Areal","Bosatte";
+CODES("år")="2025","2024";
+PRESTEXT("tettsted")=2;
+PRESTEXT("år")=0;
+PRECISION("statistikkvariabel","Areal av tettsted (km²)")=2;
+UNITS="km²";
+LAST-UPDATED("Areal av tettsted (km²)")="20251027 08:00";
+STOCKFA("Areal av tettsted (km²)")="S";
+DAYADJ("Areal av tettsted (km²)")=NO;
+SEASADJ("Areal av tettsted (km²)")=NO;
+REFPERIOD("Areal av tettsted (km²)")="01.01";
+UNITS("Areal av tettsted (km²)")="km²";
+CONTACT("Areal av tettsted (km²)")="Bjørn Lie Rapp, Statistisk sentralbyrå# +47 47 97 17 27#rnl@ssb.no##Vilni Verner Holst Bloch, Statistisk sentralbyrå# +47 99 85 23 42#vvh@ssb.no##";
+LAST-UPDATED("Bosatte")="20251027 08:00";
+STOCKFA("Bosatte")="S";
+DAYADJ("Bosatte")=NO;
+SEASADJ("Bosatte")=NO;
+REFPERIOD("Bosatte")="01.01";
+UNITS("Bosatte")="personer";
+CONTACT("Bosatte")="Bjørn Lie Rapp, Statistisk sentralbyrå# +47 47 97 17 27#rnl@ssb.no##Vilni Verner Holst Bloch, Statistisk sentralbyrå# +47 99 85 23 42#vvh@ssb.no##";
+DATABASE="Ekstern PROD database O_STATMETA_24 som 2.4";
+SOURCE="Statistisk sentralbyrå";
+INFOFILE="None";
+NOTE="Ikke medregnet personer uten opplysninger om bostedstrøk.";
+META-ID="KORTNAVN:beftett";
+META-ID("tettsted")="urn:ssb:classification:klass:110,urn:ssb:conceptvariable:vardok:141";
+DATASYMBOL1="..";
+DATASYMBOL2="...";
+DATASYMBOL3=":";
+DATASYMBOLSUM=".";
+DATASYMBOLNIL="-";
+DATANOTESUM=".";
+TABLEID="14216";
+VARIABLE-TYPE("tettsted")="V";
+VARIABLE-TYPE("år")="T";
+DATA=
+275.87 276.30 1110887.00 1098061.00
+;
\ No newline at end of file