feat: add KthLargestMTuple model (#405) (#805)

isPANN · claude · web-flow · commit a4b8656b421e · 2026-03-30T01:42:17.000+08:00
* feat: add KthLargestMTuple model (issue #405) Add the Kth Largest m-Tuple counting problem (Garey & Johnson MP10). This is the first aggregate-only model using Value = Sum<u64>, which required a fix to the example_db model_specs_are_optimal test to gracefully handle models without witness support. Closes #405 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix formatting after merge conflict resolution Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix paper: correct PP-completeness claim and broken solve command - Replace false NP-completeness claim with accurate PP-completeness description citing Haase & Kiefer (2016) - Fix `pred solve` command to use `--solver brute-force` (no ILP path) - Add haase2016 BibTeX entry Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Remove unused K field from KthLargestMTuple K was stored but never used in evaluate() — the model is a pure counting problem. The G&J decision version (count >= K?) is noted in the paper but not part of the computational model. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Simplify paper paragraph: remove K references from counting model Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Revert K removal: keep K field for G&J decision formulation The K threshold is needed for the standard PARTITION → KthLargestMTuple reduction (G&J R86). Without K, the counting version has no known many-one reductions — only Turing reductions exist. Retains the paper fixes: PP-completeness claim, --solver brute-force. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ
@@ -199,6 +199,7 @@
   "SumOfSquaresPartition": [Sum of Squares Partition],
   "TimetableDesign": [Timetable Design],
   "TwoDimensionalConsecutiveSets": [2-Dimensional Consecutive Sets],
+  "KthLargestMTuple": [$K$th Largest $m$-Tuple],
 )
 
 // Definition label: "def:<ProblemName>" — each definition block must have a matching label
@@ -4705,6 +4706,32 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76],
   ]
 }
 
+#{
+  let x = load-model-example("KthLargestMTuple")
+  let sets = x.instance.sets
+  let k = x.instance.k
+  let bound = x.instance.bound
+  let config = x.optimal_config
+  let m = sets.len()
+  // Count qualifying tuples by enumerating the Cartesian product
+  let total = sets.fold(1, (acc, s) => acc * s.len())
+  [
+    #problem-def("KthLargestMTuple")[
+      Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$.
+    ][
+      The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is _not known to be in NP_, because a "yes" certificate may need to exhibit $K$ qualifying tuples and $K$ can be exponentially large. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.].
+
+      *Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$).
+
+      #pred-commands(
+        "pred create --example KthLargestMTuple -o kth-largest-m-tuple.json",
+        "pred solve kth-largest-m-tuple.json --solver brute-force",
+        "pred evaluate kth-largest-m-tuple.json --config " + config.map(str).join(","),
+      )
+    ]
+  ]
+}
+
 #{
   let x = load-model-example("SequencingWithReleaseTimesAndDeadlines")
   let n = x.instance.lengths.len()
diff --git a/docs/paper/references.bib b/docs/paper/references.bib
@@ -1455,6 +1455,16 @@ @techreport{plaisted1976
   year        = {1976}
 }
 
+@article{haase2016,
+  author  = {Haase, Christoph and Kiefer, Stefan},
+  title   = {The Complexity of the {K}th Largest Subset Problem and Related Problems},
+  journal = {Information Processing Letters},
+  volume  = {116},
+  number  = {2},
+  pages   = {111--115},
+  year    = {2016}
+}
+
 @article{Murty1972,
   author  = {Murty, Katta G.},
   title   = {A fundamental problem in linear inequalities with applications to the travelling salesman problem},
diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs
@@ -249,6 +249,7 @@ Flags by problem type:
   ProductionPlanning             --num-periods, --demands, --capacities, --setup-costs, --production-costs, --inventory-costs, --cost-bound
   SubsetSum                       --sizes, --target
   ThreePartition                  --sizes, --bound
+  KthLargestMTuple                --sets, --k, --bound
   QuadraticDiophantineEquations    --coeff-a, --coeff-b, --coeff-c
   SumOfSquaresPartition           --sizes, --num-groups
   ExpectedRetrievalCost           --probabilities, --num-sectors
diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs
@@ -24,9 +24,9 @@ use problemreductions::models::misc::{
     AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
     ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation,
     ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, GroupingBySwapping,
-    JobShopScheduling, KnownValue, LongestCommonSubsequence, MinimumTardinessSequencing,
-    MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, ProductionPlanning, QueryArg,
-    RectilinearPictureCompression, ResourceConstrainedScheduling,
+    JobShopScheduling, KnownValue, KthLargestMTuple, LongestCommonSubsequence,
+    MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack,
+    ProductionPlanning, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
     SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
     SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
     SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
@@ -732,6 +732,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
         "IntegerKnapsack" => "--sizes 3,4,5,2,7 --values 4,5,7,3,9 --capacity 15",
         "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11",
         "ThreePartition" => "--sizes 4,5,6,4,6,5 --bound 15",
+        "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12",
         "QuadraticDiophantineEquations" => "--coeff-a 3 --coeff-b 5 --coeff-c 53",
         "BoyceCoddNormalFormViolation" => {
             "--n 6 --sets \"0,1:2;2:3;3,4:5\" --target 0,1,2,3,4,5"
@@ -2423,6 +2424,40 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
             )
         }
 
+        // KthLargestMTuple
+        "KthLargestMTuple" => {
+            let sets_str = args.sets.as_deref().ok_or_else(|| {
+                anyhow::anyhow!(
+                    "KthLargestMTuple requires --sets, --k, and --bound\n\n\
+                     Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12"
+                )
+            })?;
+            let k_val = args.k.ok_or_else(|| {
+                anyhow::anyhow!(
+                    "KthLargestMTuple requires --k\n\n\
+                     Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12"
+                )
+            })?;
+            let bound = args.bound.ok_or_else(|| {
+                anyhow::anyhow!(
+                    "KthLargestMTuple requires --bound\n\n\
+                     Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12"
+                )
+            })?;
+            let bound = u64::try_from(bound).map_err(|_| {
+                anyhow::anyhow!("KthLargestMTuple requires a positive integer --bound")
+            })?;
+            let sets: Vec<Vec<u64>> = sets_str
+                .split(';')
+                .map(|group| util::parse_comma_list(group))
+                .collect::<Result<_, _>>()?;
+            (
+                ser(KthLargestMTuple::try_new(sets, k_val as u64, bound)
+                    .map_err(anyhow::Error::msg)?)?,
+                resolved_variant.clone(),
+            )
+        }
+
         // QuadraticDiophantineEquations
         "QuadraticDiophantineEquations" => {
             let a = args.coeff_a.ok_or_else(|| {
diff --git a/src/models/misc/kth_largest_m_tuple.rs b/src/models/misc/kth_largest_m_tuple.rs
@@ -0,0 +1,208 @@
+//! Kth Largest m-Tuple problem implementation.
+//!
+//! Given m sets of positive integers and thresholds K and B, count how many
+//! distinct m-tuples (one element per set) have total size at least B.
+//! The answer is YES iff the count is at least K. Garey & Johnson MP10.
+
+use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry};
+use crate::traits::Problem;
+use crate::types::Sum;
+use serde::de::Error as _;
+use serde::{Deserialize, Deserializer, Serialize};
+
+inventory::submit! {
+    ProblemSchemaEntry {
+        name: "KthLargestMTuple",
+        display_name: "Kth Largest m-Tuple",
+        aliases: &[],
+        dimensions: &[],
+        module_path: module_path!(),
+        description: "Count m-tuples whose total size meets a bound and compare against a threshold K",
+        fields: &[
+            FieldInfo { name: "sets", type_name: "Vec<Vec<u64>>", description: "m sets, each containing positive integer sizes" },
+            FieldInfo { name: "k", type_name: "u64", description: "Threshold K (answer YES iff count >= K)" },
+            FieldInfo { name: "bound", type_name: "u64", description: "Lower bound B on tuple sum" },
+        ],
+    }
+}
+
+inventory::submit! {
+    ProblemSizeFieldEntry {
+        name: "KthLargestMTuple",
+        fields: &["num_sets", "total_tuples"],
+    }
+}
+
+/// The Kth Largest m-Tuple problem.
+///
+/// Given sets `X_1, ..., X_m` of positive integers, a threshold `K`, and a
+/// bound `B`, count how many distinct m-tuples `(x_1, ..., x_m)` in
+/// `X_1 x ... x X_m` satisfy `sum(x_i) >= B`. The answer is YES iff the
+/// count is at least `K`.
+///
+/// # Representation
+///
+/// Variable `i` selects an element from set `X_i`, ranging over `{0, ..., |X_i|-1}`.
+/// `evaluate` returns `Sum(1)` if the tuple sum >= B, else `Sum(0)`.
+/// The aggregate over all configurations gives the total count of qualifying tuples.
+///
+/// # Example
+///
+/// ```
+/// use problemreductions::models::misc::KthLargestMTuple;
+/// use problemreductions::{Problem, Solver, BruteForce};
+///
+/// let problem = KthLargestMTuple::new(
+///     vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]],
+///     14,
+///     12,
+/// );
+/// let solver = BruteForce::new();
+/// let value = solver.solve(&problem);
+/// // 14 of the 18 tuples have sum >= 12
+/// assert_eq!(value, problemreductions::types::Sum(14));
+/// ```
+#[derive(Debug, Clone, Serialize)]
+pub struct KthLargestMTuple {
+    sets: Vec<Vec<u64>>,
+    k: u64,
+    bound: u64,
+}
+
+impl KthLargestMTuple {
+    fn validate(sets: &[Vec<u64>], k: u64, bound: u64) -> Result<(), String> {
+        if sets.is_empty() {
+            return Err("KthLargestMTuple requires at least one set".to_string());
+        }
+        if sets.iter().any(|s| s.is_empty()) {
+            return Err("Every set must be non-empty".to_string());
+        }
+        if sets.iter().any(|s| s.contains(&0)) {
+            return Err("All sizes must be positive (> 0)".to_string());
+        }
+        if k == 0 {
+            return Err("Threshold K must be positive".to_string());
+        }
+        if bound == 0 {
+            return Err("Bound B must be positive".to_string());
+        }
+        Ok(())
+    }
+
+    /// Try to create a new KthLargestMTuple instance.
+    pub fn try_new(sets: Vec<Vec<u64>>, k: u64, bound: u64) -> Result<Self, String> {
+        Self::validate(&sets, k, bound)?;
+        Ok(Self { sets, k, bound })
+    }
+
+    /// Create a new KthLargestMTuple instance.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the inputs are invalid.
+    pub fn new(sets: Vec<Vec<u64>>, k: u64, bound: u64) -> Self {
+        Self::try_new(sets, k, bound).unwrap_or_else(|msg| panic!("{msg}"))
+    }
+
+    /// Returns the sets.
+    pub fn sets(&self) -> &[Vec<u64>] {
+        &self.sets
+    }
+
+    /// Returns the threshold K.
+    pub fn k(&self) -> u64 {
+        self.k
+    }
+
+    /// Returns the bound B.
+    pub fn bound(&self) -> u64 {
+        self.bound
+    }
+
+    /// Returns the number of sets (m).
+    pub fn num_sets(&self) -> usize {
+        self.sets.len()
+    }
+
+    /// Returns the total number of m-tuples (product of set sizes).
+    pub fn total_tuples(&self) -> usize {
+        self.sets.iter().map(|s| s.len()).product()
+    }
+}
+
+#[derive(Deserialize)]
+struct KthLargestMTupleDef {
+    sets: Vec<Vec<u64>>,
+    k: u64,
+    bound: u64,
+}
+
+impl<'de> Deserialize<'de> for KthLargestMTuple {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let data = KthLargestMTupleDef::deserialize(deserializer)?;
+        Self::try_new(data.sets, data.k, data.bound).map_err(D::Error::custom)
+    }
+}
+
+impl Problem for KthLargestMTuple {
+    const NAME: &'static str = "KthLargestMTuple";
+    type Value = Sum<u64>;
+
+    fn variant() -> Vec<(&'static str, &'static str)> {
+        crate::variant_params![]
+    }
+
+    fn dims(&self) -> Vec<usize> {
+        self.sets.iter().map(|s| s.len()).collect()
+    }
+
+    fn evaluate(&self, config: &[usize]) -> Sum<u64> {
+        if config.len() != self.num_sets() {
+            return Sum(0);
+        }
+        for (i, &choice) in config.iter().enumerate() {
+            if choice >= self.sets[i].len() {
+                return Sum(0);
+            }
+        }
+        let total: u64 = config
+            .iter()
+            .enumerate()
+            .map(|(i, &choice)| self.sets[i][choice])
+            .sum();
+        if total >= self.bound {
+            Sum(1)
+        } else {
+            Sum(0)
+        }
+    }
+}
+
+// Best known: brute-force enumeration of all tuples, O(total_tuples * num_sets).
+// No sub-exponential exact algorithm is known for the general case.
+crate::declare_variants! {
+    default KthLargestMTuple => "total_tuples * num_sets",
+}
+
+#[cfg(feature = "example-db")]
+pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
+    // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14.
+    // 14 of 18 tuples have sum >= 12. The config [2,1,2] picks (8,6,7) with sum=21 >= 12.
+    vec![crate::example_db::specs::ModelExampleSpec {
+        id: "kth_largest_m_tuple",
+        instance: Box::new(KthLargestMTuple::new(
+            vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]],
+            14,
+            12,
+        )),
+        optimal_config: vec![2, 1, 2],
+        optimal_value: serde_json::json!(1),
+    }]
+}
+
+#[cfg(test)]
+#[path = "../../unit_tests/models/misc/kth_largest_m_tuple.rs"]
+mod tests;
diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs
@@ -77,6 +77,7 @@ mod flow_shop_scheduling;
 mod grouping_by_swapping;
 mod job_shop_scheduling;
 mod knapsack;
+mod kth_largest_m_tuple;
 mod longest_common_subsequence;
 mod minimum_tardiness_sequencing;
 mod multiprocessor_scheduling;
@@ -119,6 +120,7 @@ pub use flow_shop_scheduling::FlowShopScheduling;
 pub use grouping_by_swapping::GroupingBySwapping;
 pub use job_shop_scheduling::JobShopScheduling;
 pub use knapsack::Knapsack;
+pub use kth_largest_m_tuple::KthLargestMTuple;
 pub use longest_common_subsequence::LongestCommonSubsequence;
 pub use minimum_tardiness_sequencing::MinimumTardinessSequencing;
 pub use multiprocessor_scheduling::MultiprocessorScheduling;
@@ -186,5 +188,6 @@ pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::M
     specs.extend(subset_sum::canonical_model_example_specs());
     specs.extend(three_partition::canonical_model_example_specs());
     specs.extend(cosine_product_integration::canonical_model_example_specs());
+    specs.extend(kth_largest_m_tuple::canonical_model_example_specs());
     specs
 }
diff --git a/src/models/mod.rs b/src/models/mod.rs
@@ -40,14 +40,15 @@ pub use misc::{
     AdditionalKey, BinPacking, CapacityAssignment, CbqRelation, ConjunctiveBooleanQuery,
     ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables, CosineProductIntegration,
     EnsembleComputation, ExpectedRetrievalCost, Factoring, FlowShopScheduling, GroupingBySwapping,
-    JobShopScheduling, Knapsack, LongestCommonSubsequence, MinimumTardinessSequencing,
-    MultiprocessorScheduling, PaintShop, Partition, PrecedenceConstrainedScheduling,
-    ProductionPlanning, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
-    SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
-    SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
-    SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
-    StackerCrane, StaffScheduling, StringToStringCorrection, SubsetSum, SumOfSquaresPartition,
-    Term, ThreePartition, TimetableDesign,
+    JobShopScheduling, Knapsack, KthLargestMTuple, LongestCommonSubsequence,
+    MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition,
+    PrecedenceConstrainedScheduling, ProductionPlanning, QueryArg, RectilinearPictureCompression,
+    ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines,
+    SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime,
+    SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines,
+    SequencingWithinIntervals, ShortestCommonSupersequence, StackerCrane, StaffScheduling,
+    StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, ThreePartition,
+    TimetableDesign,
 };
 pub use set::{
     ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, IntegerKnapsack, MaximumSetPacking,
diff --git a/src/unit_tests/example_db.rs b/src/unit_tests/example_db.rs
diff --git a/src/unit_tests/models/misc/kth_largest_m_tuple.rs b/src/unit_tests/models/misc/kth_largest_m_tuple.rs