Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -2013,6 +2013,11 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
}
}

// If PK-FK inference failed, check for NDV=0 on join keys and use joinFactor heuristic
if (inferredRowCount == -1 && hasZeroNdvJoinKey(joinKeys, joinStats)) {
inferredRowCount = computeJoinFactorEstimate(conf, Collections.max(rowCounts), rowCounts.size());
}

List<Long> distinctVals = Lists.newArrayList();

// these ndvs are later used to compute unmatched rows and num of nulls for outer joins
Expand Down Expand Up @@ -2136,7 +2141,6 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
} else {

// worst case when there are no column statistics
float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR);
int numParents = parents.size();
long crossRowCount = 1;
long crossDataSize = 1;
Expand Down Expand Up @@ -2182,14 +2186,8 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
newNumRows = crossRowCount;
newDataSize = crossDataSize;
} else {
if (numParents > 1) {
newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor);
} else {
// MUX operator with 1 parent
newNumRows = StatsUtils.safeMult(maxRowCount, joinFactor);
newDataSize = StatsUtils.safeMult(maxDataSize, joinFactor);
}
newNumRows = computeJoinFactorEstimate(conf, maxRowCount, numParents);
newDataSize = computeJoinFactorEstimate(conf, maxDataSize, numParents);
}

Statistics wcStats = new Statistics(newNumRows, newDataSize, 0, 0);
Expand Down Expand Up @@ -2748,6 +2746,29 @@ private long computeRowCountAssumingInnerJoin(List<Long> rowCountParents, long d
return result;
}

@VisibleForTesting
static long computeJoinFactorEstimate(HiveConf conf, long maxValue, int numParents) {
float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR);
if (numParents > 1) {
return StatsUtils.safeMult(StatsUtils.safeMult(maxValue, (numParents - 1)), joinFactor);
}
return StatsUtils.safeMult(maxValue, joinFactor);
}

@VisibleForTesting
static boolean hasZeroNdvJoinKey(Map<Integer, List<String>> joinKeys,
Map<Integer, Statistics> joinStats) {
return joinKeys.entrySet().stream().anyMatch(entry -> {
Statistics posStats = joinStats.get(entry.getKey());
if (posStats.getNumRows() <= 1) {
return false;
}
return entry.getValue().stream()
.map(posStats::getColumnStatisticsFromColName)
.anyMatch(cs -> cs != null && cs.getCountDistint() == 0L);
});
}

private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys,
Map<Integer, Statistics> joinStats, int numAttr) {
int joinColIdx = 0;
Expand Down
Loading
Loading