Skip to content

Commit 5810073

Browse files
committed
Implement join primary key rules based on functional dependencies
- Modify heading.join() to determine PK based on A → B / B → A rules - A → B: PK = PK(A), A's attributes first - B → A (not A → B): PK = PK(B), B's attributes first - Both: prefer left operand, PK = PK(A) - Neither: PK = PK(A) ∪ PK(B) - Add comprehensive tests for join PK determination - Test cases cover all four scenarios plus non-commutativity Co-authored-by: dimitri-yatsenko<dimitri@datajoint.com>
1 parent c548e65 commit 5810073

File tree

2 files changed

+424
-5
lines changed

2 files changed

+424
-5
lines changed

src/datajoint/heading.py

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -471,15 +471,96 @@ def select(self, select_list, rename_map=None, compute_map=None):
471471
def join(self, other):
472472
"""
473473
Join two headings into a new one.
474+
475+
The primary key of the result depends on the functional dependency relationship:
476+
- A → B (self determines other): PK = PK(self), self's attributes first
477+
- B → A (other determines self) and not A → B: PK = PK(other), other's attributes first
478+
- Neither: PK = PK(self) ∪ PK(other), self's attributes first
479+
480+
A → B holds iff every attribute in PK(B) is either in PK(A) or secondary in A.
481+
B → A holds iff every attribute in PK(A) is either in PK(B) or secondary in B.
482+
474483
It assumes that self and other are headings that share no common dependent attributes.
475484
"""
476-
return Heading(
477-
[self.attributes[name].todict() for name in self.primary_key]
478-
+ [other.attributes[name].todict() for name in other.primary_key if name not in self.primary_key]
479-
+ [self.attributes[name].todict() for name in self.secondary_attributes if name not in other.primary_key]
480-
+ [other.attributes[name].todict() for name in other.secondary_attributes if name not in self.primary_key]
485+
# Check functional dependencies
486+
self_determines_other = all(
487+
name in self.primary_key or name in self.secondary_attributes for name in other.primary_key
488+
)
489+
other_determines_self = all(
490+
name in other.primary_key or name in other.secondary_attributes for name in self.primary_key
481491
)
482492

493+
seen = set()
494+
result_attrs = []
495+
496+
if self_determines_other:
497+
# A → B: use PK(A), A's attributes first
498+
# 1. All of A's PK attrs (as PK)
499+
for name in self.primary_key:
500+
result_attrs.append(dict(self.attributes[name].todict(), in_key=True))
501+
seen.add(name)
502+
# 2. B's PK attrs not already included (as secondary, determined by A's PK)
503+
for name in other.primary_key:
504+
if name not in seen:
505+
result_attrs.append(dict(other.attributes[name].todict(), in_key=False))
506+
seen.add(name)
507+
# 3. A's secondary attrs not already included
508+
for name in self.secondary_attributes:
509+
if name not in seen:
510+
result_attrs.append(dict(self.attributes[name].todict(), in_key=False))
511+
seen.add(name)
512+
# 4. B's secondary attrs not already included
513+
for name in other.secondary_attributes:
514+
if name not in seen:
515+
result_attrs.append(dict(other.attributes[name].todict(), in_key=False))
516+
seen.add(name)
517+
518+
elif other_determines_self:
519+
# B → A (and not A → B): use PK(B), B's attributes first
520+
# 1. All of B's PK attrs (as PK)
521+
for name in other.primary_key:
522+
result_attrs.append(dict(other.attributes[name].todict(), in_key=True))
523+
seen.add(name)
524+
# 2. A's PK attrs not already included (as secondary, determined by B's PK)
525+
for name in self.primary_key:
526+
if name not in seen:
527+
result_attrs.append(dict(self.attributes[name].todict(), in_key=False))
528+
seen.add(name)
529+
# 3. B's secondary attrs not already included
530+
for name in other.secondary_attributes:
531+
if name not in seen:
532+
result_attrs.append(dict(other.attributes[name].todict(), in_key=False))
533+
seen.add(name)
534+
# 4. A's secondary attrs not already included
535+
for name in self.secondary_attributes:
536+
if name not in seen:
537+
result_attrs.append(dict(self.attributes[name].todict(), in_key=False))
538+
seen.add(name)
539+
540+
else:
541+
# Neither: use PK(A) ∪ PK(B), A's attributes first
542+
# 1. All of A's PK attrs (as PK)
543+
for name in self.primary_key:
544+
result_attrs.append(dict(self.attributes[name].todict(), in_key=True))
545+
seen.add(name)
546+
# 2. B's PK attrs not already included (as PK)
547+
for name in other.primary_key:
548+
if name not in seen:
549+
result_attrs.append(dict(other.attributes[name].todict(), in_key=True))
550+
seen.add(name)
551+
# 3. A's secondary attrs not already included
552+
for name in self.secondary_attributes:
553+
if name not in seen:
554+
result_attrs.append(dict(self.attributes[name].todict(), in_key=False))
555+
seen.add(name)
556+
# 4. B's secondary attrs not already included
557+
for name in other.secondary_attributes:
558+
if name not in seen:
559+
result_attrs.append(dict(other.attributes[name].todict(), in_key=False))
560+
seen.add(name)
561+
562+
return Heading(result_attrs)
563+
483564
def set_primary_key(self, primary_key):
484565
"""
485566
Create a new heading with the specified primary key.

0 commit comments

Comments
 (0)