diff --git a/scripts/create_addresses_table.py b/scripts/create_addresses_table.py
new file mode 100644
index 0000000..3d8df49
--- /dev/null
+++ b/scripts/create_addresses_table.py
@@ -0,0 +1,593 @@
+import argparse
+import sqlite3
+import logging
+from typing import Optional, List, Tuple, Dict
+from dataclasses import dataclass
+from datetime import datetime
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+class AddressHierarchyBuilder:
+    """
+    Address hierarchy relationship builder - based on Prof. Michael Fuller's VB code logic
+    Handles time segment splitting and multi-level belongs relationships
+    Preserves gaps in data to tell the most continuous story possible
+    """
+    
+    def __init__(self, db_path: str = "latest.db"):
+        self.db_path = db_path
+        self.conn = None
+        self.cursor = None
+        
+    def __enter__(self):
+        self.conn = sqlite3.connect(self.db_path)
+        self.conn.row_factory = sqlite3.Row
+        self.cursor = self.conn.cursor()
+        return self
+        
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.conn:
+            if exc_type is None:
+                self.conn.commit()
+            else:
+                self.conn.rollback()
+            self.conn.close()
+            
+    def execute(self, sql: str, params: tuple = ()) -> int:
+        """Execute SQL and return affected row count"""
+        self.cursor.execute(sql, params)
+        return self.cursor.rowcount
+    
+    def safe_min(self, *values):
+        """Safe min function that ignores None values"""
+        valid_values = [v for v in values if v is not None]
+        return min(valid_values) if valid_values else None
+    
+    def safe_max(self, *values):
+        """Safe max function that ignores None values"""
+        valid_values = [v for v in values if v is not None]
+        return max(valid_values) if valid_values else None
+        
+    def clean_belongs_data(self):
+        """
+        Clean invalid data in ADDR_BELONGS_DATA
+        This is a key step in Michael's code
+        """
+        logger.info("Cleaning belongs data...")
+        
+        # Create temporary table to store cleaned data
+        self.execute("DROP TABLE IF EXISTS CLEANED_BELONGS_DATA")
+        self.execute("""
+            CREATE TEMP TABLE CLEANED_BELONGS_DATA (
+                c_addr_id INTEGER,
+                c_belongs_to INTEGER,
+                c_firstyear INTEGER,
+                c_lastyear INTEGER
+            )
+        """)
+        
+        # Get all belongs relationships
+        self.cursor.execute("""
+            SELECT abd.*, 
+                   ac1.c_firstyear as addr_first, 
+                   ac1.c_lastyear as addr_last,
+                   ac2.c_firstyear as belongs_first, 
+                   ac2.c_lastyear as belongs_last
+            FROM ADDR_BELONGS_DATA abd
+            JOIN ADDR_CODES ac1 ON abd.c_addr_id = ac1.c_addr_id
+            LEFT JOIN ADDR_CODES ac2 ON abd.c_belongs_to = ac2.c_addr_id
+        """)
+        
+        rows = self.cursor.fetchall()
+        valid_count = 0
+        invalid_count = 0
+        
+        for row in rows:
+            # Rule 1: Exclude Unknown (c_belongs_to = 0 or NULL)
+            if not row['c_belongs_to'] or row['c_belongs_to'] == 0:
+                invalid_count += 1
+                continue
+                
+            # Rule 2: belongs_to unit must exist
+            if row['belongs_first'] is None or row['belongs_last'] is None:
+                logger.warning(f"Belongs_to unit {row['c_belongs_to']} does not exist")
+                invalid_count += 1
+                continue
+            
+            # Get time values, handle None cases
+            abd_first = row['c_firstyear'] if row['c_firstyear'] is not None else row['addr_first']
+            abd_last = row['c_lastyear'] if row['c_lastyear'] is not None else row['addr_last']
+            
+            # Calculate effective time range
+            effective_first = self.safe_max(abd_first, row['addr_first'], row['belongs_first'])
+            effective_last = self.safe_min(abd_last, row['addr_last'], row['belongs_last'])
+            
+            if effective_first is None or effective_last is None:
+                logger.warning(f"Time range contains NULL values: {row['c_addr_id']} -> {row['c_belongs_to']}")
+                invalid_count += 1
+                continue
+            
+            if effective_first > effective_last:
+                logger.warning(f"Invalid time range: {row['c_addr_id']} -> {row['c_belongs_to']} "
+                             f"({effective_first} > {effective_last})")
+                invalid_count += 1
+                continue
+                
+            # Insert cleaned data
+            self.execute("""
+                INSERT INTO CLEANED_BELONGS_DATA 
+                (c_addr_id, c_belongs_to, c_firstyear, c_lastyear)
+                VALUES (?, ?, ?, ?)
+            """, (row['c_addr_id'], row['c_belongs_to'], 
+                  effective_first, effective_last))
+            valid_count += 1
+            
+        logger.info(f"Data cleaning completed: {valid_count} valid, {invalid_count} invalid")
+        
+    def build_time_segments_with_gaps(self):
+        """
+        Build time segments including gaps
+        This preserves the gaps in data and tells the most continuous story
+        """
+        logger.info("Building time segments with gap filling...")
+        
+        # Create result table
+        self.execute("DROP TABLE IF EXISTS TIME_SEGMENTS")
+        self.execute("""
+            CREATE TEMP TABLE TIME_SEGMENTS (
+                c_addr_id INTEGER,
+                segment_start INTEGER,
+                segment_end INTEGER,
+                belongs_chain TEXT,
+                level1_id INTEGER,
+                level1_start INTEGER,
+                level1_end INTEGER,
+                level2_id INTEGER,
+                level2_start INTEGER,
+                level2_end INTEGER,
+                level3_id INTEGER,
+                level3_start INTEGER,
+                level3_end INTEGER,
+                level4_id INTEGER,
+                level4_start INTEGER,
+                level4_end INTEGER,
+                level5_id INTEGER,
+                level5_start INTEGER,
+                level5_end INTEGER
+            )
+        """)
+        
+        # Get all addresses with valid year data
+        self.cursor.execute("""
+            SELECT c_addr_id, c_firstyear, c_lastyear 
+            FROM ADDR_CODES 
+            WHERE c_firstyear IS NOT NULL AND c_lastyear IS NOT NULL
+        """)
+        addresses = self.cursor.fetchall()
+        
+        logger.info(f"Processing {len(addresses)} addresses with valid year data...")
+        
+        for addr_row in addresses:
+            addr_id = addr_row['c_addr_id']
+            addr_first = addr_row['c_firstyear']
+            addr_last = addr_row['c_lastyear']
+            
+            # Skip if years are invalid
+            if addr_first is None or addr_last is None or addr_first > addr_last:
+                logger.warning(f"Skipping address {addr_id} with invalid years: {addr_first}-{addr_last}")
+                continue
+            
+            # Get all level 1 belongs relationships for this address
+            self.cursor.execute("""
+                SELECT DISTINCT c_belongs_to, c_firstyear, c_lastyear
+                FROM CLEANED_BELONGS_DATA
+                WHERE c_addr_id = ?
+                ORDER BY c_firstyear
+            """, (addr_id,))
+            
+            level1_belongs = self.cursor.fetchall()
+            
+            if not level1_belongs:
+                # No belongs relationship for entire period
+                self._insert_segment(addr_id, addr_first, addr_last, {})
+            else:
+                # Process each L1 relationship and fill gaps
+                current_year = addr_first
+                
+                for l1 in level1_belongs:
+                    l1_start = l1['c_firstyear']
+                    l1_end = l1['c_lastyear']
+                    l1_id = l1['c_belongs_to']
+                    
+                    # If there's a gap before this L1 relationship
+                    if current_year < l1_start:
+                        # Insert gap record with only L1 (no deeper levels)
+                        gap_chain = {'level1': {
+                            'id': l1_id,
+                            'start': current_year,
+                            'end': l1_start - 1
+                        }}
+                        self._insert_segment(addr_id, current_year, l1_start - 1, gap_chain)
+                    
+                    # Process the actual L1 period with its nested relationships
+                    self._process_level1_with_gaps(addr_id, l1_id, l1_start, l1_end)
+                    
+                    current_year = l1_end + 1
+                
+                # Fill gap at the end if needed
+                if addr_last is not None and current_year <= addr_last:
+                    # Use the last L1 belongs for the gap
+                    if level1_belongs:
+                        last_l1 = level1_belongs[-1]
+                        gap_chain = {'level1': {
+                            'id': last_l1['c_belongs_to'],
+                            'start': current_year,
+                            'end': addr_last
+                        }}
+                        self._insert_segment(addr_id, current_year, addr_last, gap_chain)
+    
+    def _process_level1_with_gaps(self, addr_id: int, l1_id: int, l1_start: int, l1_end: int):
+        """
+        Process a Level 1 belongs period, filling gaps in Level 2+ relationships
+        """
+        if l1_start is None or l1_end is None:
+            return
+            
+        # Get Level 2 relationships for this L1
+        self.cursor.execute("""
+            SELECT DISTINCT c_belongs_to, c_firstyear, c_lastyear
+            FROM CLEANED_BELONGS_DATA
+            WHERE c_addr_id = ? 
+              AND c_firstyear <= ?
+              AND c_lastyear >= ?
+            ORDER BY c_firstyear
+        """, (l1_id, l1_end, l1_start))
+        
+        level2_belongs = self.cursor.fetchall()
+        
+        if not level2_belongs:
+            # No Level 2 for entire L1 period
+            chain = {'level1': {'id': l1_id, 'start': l1_start, 'end': l1_end}}
+            self._insert_segment(addr_id, l1_start, l1_end, chain)
+        else:
+            # Process L2 relationships and fill gaps
+            current_year = l1_start
+            
+            for l2 in level2_belongs:
+                # Calculate intersection with L1 period
+                l2_effective_start = max(l2['c_firstyear'], l1_start)
+                l2_effective_end = min(l2['c_lastyear'], l1_end)
+                
+                if l2_effective_start > l2_effective_end:
+                    continue
+                
+                # Fill gap before this L2 if needed
+                if current_year < l2_effective_start:
+                    gap_chain = {
+                        'level1': {'id': l1_id, 'start': current_year, 'end': l2_effective_start - 1}
+                    }
+                    self._insert_segment(addr_id, current_year, l2_effective_start - 1, gap_chain)
+                
+                # Process the actual L2 period with deeper levels
+                self._process_level2_with_gaps(addr_id, l1_id, l1_start, l1_end,
+                                              l2['c_belongs_to'], l2_effective_start, l2_effective_end)
+                
+                current_year = l2_effective_end + 1
+            
+            # Fill gap at the end of L1 period if needed
+            if current_year <= l1_end:
+                gap_chain = {
+                    'level1': {'id': l1_id, 'start': current_year, 'end': l1_end}
+                }
+                self._insert_segment(addr_id, current_year, l1_end, gap_chain)
+    
+    def _process_level2_with_gaps(self, addr_id: int, l1_id: int, l1_start: int, l1_end: int,
+                                 l2_id: int, l2_start: int, l2_end: int):
+        """
+        Process Level 2 and deeper, continuing to fill gaps
+        """
+        if l2_start is None or l2_end is None:
+            return
+            
+        # Get Level 3 relationships
+        self.cursor.execute("""
+            SELECT DISTINCT c_belongs_to, c_firstyear, c_lastyear
+            FROM CLEANED_BELONGS_DATA
+            WHERE c_addr_id = ? 
+              AND c_firstyear <= ?
+              AND c_lastyear >= ?
+            ORDER BY c_firstyear
+        """, (l2_id, l2_end, l2_start))
+        
+        level3_belongs = self.cursor.fetchall()
+        
+        if not level3_belongs:
+            # No Level 3 for entire L2 period
+            chain = {
+                'level1': {'id': l1_id, 'start': l1_start, 'end': l1_end},
+                'level2': {'id': l2_id, 'start': l2_start, 'end': l2_end}
+            }
+            self._insert_segment(addr_id, l2_start, l2_end, chain)
+        else:
+            # Process L3 relationships and fill gaps
+            current_year = l2_start
+            
+            for l3 in level3_belongs:
+                # Calculate intersection
+                l3_effective_start = max(l3['c_firstyear'], l2_start)
+                l3_effective_end = min(l3['c_lastyear'], l2_end)
+                
+                if l3_effective_start > l3_effective_end:
+                    continue
+                
+                # Fill gap before this L3
+                if current_year < l3_effective_start:
+                    gap_chain = {
+                        'level1': {'id': l1_id, 'start': l1_start, 'end': l1_end},
+                        'level2': {'id': l2_id, 'start': current_year, 'end': l3_effective_start - 1}
+                    }
+                    self._insert_segment(addr_id, current_year, l3_effective_start - 1, gap_chain)
+                
+                # Create segment with L3
+                chain = {
+                    'level1': {'id': l1_id, 'start': l1_start, 'end': l1_end},
+                    'level2': {'id': l2_id, 'start': l2_start, 'end': l2_end},
+                    'level3': {'id': l3['c_belongs_to'], 'start': l3_effective_start, 'end': l3_effective_end}
+                }
+                
+                # Continue to L4 and L5 if needed
+                self._process_deeper_levels(addr_id, chain, l3['c_belongs_to'], 
+                                           l3_effective_start, l3_effective_end, 3)
+                
+                current_year = l3_effective_end + 1
+            
+            # Fill gap at end of L2 period
+            if current_year <= l2_end:
+                gap_chain = {
+                    'level1': {'id': l1_id, 'start': l1_start, 'end': l1_end},
+                    'level2': {'id': l2_id, 'start': current_year, 'end': l2_end}
+                }
+                self._insert_segment(addr_id, current_year, l2_end, gap_chain)
+    
+    def _process_deeper_levels(self, addr_id: int, chain: Dict, parent_id: int, 
+                              start: int, end: int, current_level: int):
+        """
+        Generic processor for levels 4 and 5
+        """
+        if start is None or end is None:
+            return
+            
+        if current_level >= 5:
+            # Already at max depth, save the segment
+            self._insert_segment(addr_id, start, end, chain)
+            return
+        
+        next_level = current_level + 1
+        
+        # Get next level relationships
+        self.cursor.execute("""
+            SELECT DISTINCT c_belongs_to, c_firstyear, c_lastyear
+            FROM CLEANED_BELONGS_DATA
+            WHERE c_addr_id = ? 
+              AND c_firstyear <= ?
+              AND c_lastyear >= ?
+            ORDER BY c_firstyear
+        """, (parent_id, end, start))
+        
+        next_belongs = self.cursor.fetchall()
+        
+        if not next_belongs:
+            # No deeper level, save current chain
+            self._insert_segment(addr_id, start, end, chain)
+        else:
+            # Process with gaps
+            current_year = start
+            
+            for nb in next_belongs:
+                nb_start = max(nb['c_firstyear'], start)
+                nb_end = min(nb['c_lastyear'], end)
+                
+                if nb_start > nb_end:
+                    continue
+                
+                # Fill gap before
+                if current_year < nb_start:
+                    self._insert_segment(addr_id, current_year, nb_start - 1, chain)
+                
+                # Create new chain with next level
+                new_chain = chain.copy()
+                new_chain[f'level{next_level}'] = {
+                    'id': nb['c_belongs_to'],
+                    'start': nb_start,
+                    'end': nb_end
+                }
+                
+                # Continue deeper
+                self._process_deeper_levels(addr_id, new_chain, nb['c_belongs_to'],
+                                          nb_start, nb_end, next_level)
+                
+                current_year = nb_end + 1
+            
+            # Fill gap at end
+            if current_year <= end:
+                self._insert_segment(addr_id, current_year, end, chain)
+                                               
+    def _insert_segment(self, addr_id: int, start: int, end: int, chain: Dict):
+        """Insert a time segment record"""
+        if start is None or end is None:
+            return
+            
+        values = [addr_id, start, end, str(chain)]
+        
+        # Add level information
+        for i in range(1, 6):
+            if f'level{i}' in chain:
+                values.extend([
+                    chain[f'level{i}']['id'],
+                    chain[f'level{i}'].get('start', start),
+                    chain[f'level{i}'].get('end', end)
+                ])
+            else:
+                values.extend([None, None, None])
+        
+        placeholders = ','.join(['?' for _ in values])
+        self.execute(f"""
+            INSERT INTO TIME_SEGMENTS VALUES ({placeholders})
+        """, tuple(values))
+            
+    def build_final_addresses_table(self):
+        """Build final ADDRESSES table"""
+        logger.info("Building final ADDRESSES table...")
+        
+        # Drop old table
+        self.execute("DROP TABLE IF EXISTS ADDRESSES")
+        
+        # Create new table matching Michael's structure
+        self.execute("""
+            CREATE TABLE ADDRESSES (
+                c_addr_id INTEGER,
+                c_name TEXT,
+                c_name_chn TEXT,
+                c_admin_type TEXT,
+                c_firstyear INTEGER,
+                c_lastyear INTEGER,
+                c_belongs_firstyear INTEGER,
+                c_belongs_lastyear INTEGER,
+                x_coord REAL,
+                y_coord REAL,
+                belongs1_ID INTEGER,
+                belongs1_Name TEXT,
+                belongs1_Name_chn TEXT,
+                belongs2_ID INTEGER,
+                belongs2_Name TEXT,
+                belongs2_Name_chn TEXT,
+                belongs3_ID INTEGER,
+                belongs3_Name TEXT,
+                belongs3_Name_chn TEXT,
+                belongs4_ID INTEGER,
+                belongs4_Name TEXT,
+                belongs4_Name_chn TEXT,
+                belongs5_ID INTEGER,
+                belongs5_Name TEXT,
+                belongs5_Name_chn TEXT
+            )
+        """)
+        
+        # Build final data from TIME_SEGMENTS
+        self.execute("""
+            INSERT INTO ADDRESSES
+            SELECT 
+                ts.c_addr_id,
+                ac.c_name,
+                ac.c_name_chn,
+                ac.c_admin_type,
+                ac.c_firstyear,
+                ac.c_lastyear,
+                ts.segment_start as c_belongs_firstyear,
+                ts.segment_end as c_belongs_lastyear,
+                ac.x_coord,
+                ac.y_coord,
+                ts.level1_id,
+                a1.c_name,
+                a1.c_name_chn,
+                ts.level2_id,
+                a2.c_name,
+                a2.c_name_chn,
+                ts.level3_id,
+                a3.c_name,
+                a3.c_name_chn,
+                ts.level4_id,
+                a4.c_name,
+                a4.c_name_chn,
+                ts.level5_id,
+                a5.c_name,
+                a5.c_name_chn
+            FROM TIME_SEGMENTS ts
+            JOIN ADDR_CODES ac ON ts.c_addr_id = ac.c_addr_id
+            LEFT JOIN ADDR_CODES a1 ON ts.level1_id = a1.c_addr_id
+            LEFT JOIN ADDR_CODES a2 ON ts.level2_id = a2.c_addr_id
+            LEFT JOIN ADDR_CODES a3 ON ts.level3_id = a3.c_addr_id
+            LEFT JOIN ADDR_CODES a4 ON ts.level4_id = a4.c_addr_id
+            LEFT JOIN ADDR_CODES a5 ON ts.level5_id = a5.c_addr_id
+            ORDER BY ts.c_addr_id, ts.segment_start
+        """)
+        
+        count = self.cursor.rowcount
+        logger.info(f"ADDRESSES table created with {count} records")
+        
+        # Verify example cases
+        self._verify_example_cases()
+        
+    def _verify_example_cases(self):
+        """Verify the specific cases mentioned in Michael's emails"""
+        # Check Jiangle (100149)
+        logger.info("Verifying Jiangle (100149)...")
+        self.cursor.execute("""
+            SELECT c_belongs_firstyear, c_belongs_lastyear, 
+                   belongs1_Name_chn, belongs2_Name_chn, belongs3_Name_chn
+            FROM ADDRESSES 
+            WHERE c_addr_id = 100149 
+            ORDER BY c_belongs_firstyear
+        """)
+        
+        results = self.cursor.fetchall()
+        if results:
+            logger.info(f"Jiangle has {len(results)} records:")
+            for row in results:
+                logger.info(f"  {row['c_belongs_firstyear']}-{row['c_belongs_lastyear']}: "
+                           f"{row['belongs1_Name_chn']} -> {row['belongs2_Name_chn'] or ''} -> "
+                           f"{row['belongs3_Name_chn'] or ''}")
+        
+        # Check Jun county (4524) if it exists
+        self.cursor.execute("""
+            SELECT c_belongs_firstyear, c_belongs_lastyear,
+                   belongs1_Name_chn, belongs2_Name_chn, belongs3_Name_chn, belongs4_Name_chn
+            FROM ADDRESSES 
+            WHERE c_addr_id = 4524 
+            ORDER BY c_belongs_firstyear
+            LIMIT 10
+        """)
+        
+        results = self.cursor.fetchall()
+        if results:
+            logger.info(f"\nJun county (4524) has {len(results)} records (showing first 10):")
+            for row in results:
+                logger.info(f"  {row['c_belongs_firstyear']}-{row['c_belongs_lastyear']}: "
+                           f"{row['belongs1_Name_chn']} -> {row['belongs2_Name_chn'] or ''} -> "
+                           f"{row['belongs3_Name_chn'] or ''} -> {row['belongs4_Name_chn'] or ''}")
+                       
+    def run(self):
+        """Execute complete build process"""
+        try:
+            logger.info("="*60)
+            logger.info("Starting address hierarchy build with gap preservation...")
+            logger.info("="*60)
+            
+            # 1. Clean data
+            self.clean_belongs_data()
+            
+            # 2. Build time segments with gaps
+            self.build_time_segments_with_gaps()
+            
+            # 3. Generate final table
+            self.build_final_addresses_table()
+            
+            logger.info("="*60)
+            logger.info("Build completed!")
+            logger.info("="*60)
+            
+        except Exception as e:
+            logger.error(f"Build process error: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+
+# Usage
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Build the ADDRESSES table from the CBDB SQLite database.")
+    parser.add_argument("--db", default="latest.db", help="Path to the SQLite database file to process")
+    args = parser.parse_args()
+
+    with AddressHierarchyBuilder(args.db) as builder:
+        builder.run()