@@ -463,290 +463,35 @@ def resolve_dtype(
463463 return dtype , chain , store_name
464464
465465
466- # =============================================================================
467- # Built-in Attribute Types
468- # =============================================================================
469-
470-
471- class DJBlobType (AttributeType ):
472- """
473- Built-in type for DataJoint's native serialization format.
474-
475- This type handles serialization of arbitrary Python objects (including NumPy arrays,
476- dictionaries, lists, etc.) using DataJoint's binary blob format. The format includes:
477-
478- - Protocol headers (``mYm`` for MATLAB-compatible, ``dj0`` for Python-native)
479- - Optional compression (zlib)
480- - Support for NumPy arrays, datetime objects, UUIDs, and nested structures
481-
482- The ``<djblob>`` type is the explicit way to specify DataJoint's serialization.
483- It stores data in a MySQL ``LONGBLOB`` column.
484-
485- Example:
486- @schema
487- class ProcessedData(dj.Manual):
488- definition = '''
489- data_id : int
490- ---
491- results : <djblob> # Serialized Python objects
492- raw_bytes : longblob # Raw bytes (no serialization)
493- '''
494-
495- Note:
496- Plain ``longblob`` columns store and return raw bytes without serialization.
497- Use ``<djblob>`` when you need automatic serialization of Python objects.
498- Existing schemas using implicit blob serialization should migrate to ``<djblob>``
499- using ``dj.migrate.migrate_blob_columns()``.
500- """
501-
502- type_name = "djblob"
503- dtype = "longblob"
504-
505- def encode (self , value : Any , * , key : dict | None = None ) -> bytes :
506- """
507- Serialize a Python object to DataJoint's blob format.
508-
509- Args:
510- value: Any serializable Python object (dict, list, numpy array, etc.)
511- key: Primary key values (unused for blob serialization).
512-
513- Returns:
514- Serialized bytes with protocol header and optional compression.
515- """
516- from . import blob
517-
518- return blob .pack (value , compress = True )
519-
520- def decode (self , stored : bytes , * , key : dict | None = None ) -> Any :
521- """
522- Deserialize DataJoint blob format back to a Python object.
523-
524- Args:
525- stored: Serialized blob bytes.
526- key: Primary key values (unused for blob serialization).
527-
528- Returns:
529- The deserialized Python object.
530- """
531- from . import blob
532-
533- return blob .unpack (stored , squeeze = False )
534-
535-
536- class DJBlobExternalType (AttributeType ):
537- """
538- Built-in type for externally-stored DataJoint blobs.
539-
540- Similar to ``<djblob>`` but stores data in external blob storage instead
541- of inline in the database. Useful for large objects.
542-
543- The store name is specified when defining the column type.
544-
545- Example:
546- @schema
547- class LargeData(dj.Manual):
548- definition = '''
549- data_id : int
550- ---
551- large_array : blob@mystore # External storage with auto-serialization
552- '''
466+ def get_adapter (context : dict | None , adapter_name : str ) -> tuple [AttributeType , str | None ]:
553467 """
468+ Get an attribute type by name.
554469
555- # Note: This type isn't directly usable via <djblob_external> syntax
556- # It's used internally when blob@store syntax is detected
557- type_name = "djblob_external"
558- dtype = "blob@store" # Placeholder - actual store is determined at declaration time
559-
560- def encode (self , value : Any , * , key : dict | None = None ) -> bytes :
561- """Serialize a Python object to DataJoint's blob format."""
562- from . import blob
563-
564- return blob .pack (value , compress = True )
565-
566- def decode (self , stored : bytes , * , key : dict | None = None ) -> Any :
567- """Deserialize DataJoint blob format back to a Python object."""
568- from . import blob
569-
570- return blob .unpack (stored , squeeze = False )
571-
572-
573- class ContentType (AttributeType ):
574- """
575- Built-in type for content-addressed storage with deduplication.
576-
577- The ``<content>`` type stores data using content-addressed storage. Data is
578- identified by its SHA256 hash and stored in a hierarchical directory structure.
579- Duplicate content is automatically deduplicated - storing the same bytes twice
580- will only create one copy in storage.
581-
582- The database column stores JSON metadata including the content hash, store name,
583- and size. The actual content is stored in external storage.
584-
585- This type is primarily used as a building block for other types like ``<xblob>``
586- and ``<xattach>``, but can also be used directly for raw binary content.
587-
588- Example:
589- @schema
590- class RawContent(dj.Manual):
591- definition = '''
592- content_id : int
593- ---
594- data : <content@mystore> # Content-addressed storage
595- '''
596-
597- # Insert raw bytes
598- table.insert1({'content_id': 1, 'data': b'raw binary content'})
599-
600- # Fetch returns the original bytes
601- data = (table & 'content_id=1').fetch1('data')
602- assert data == b'raw binary content'
603-
604- Storage Structure:
605- Content is stored at: ``_content/{hash[:2]}/{hash[2:4]}/{hash}``
606- This hierarchical structure prevents too many files in a single directory.
607-
608- Note:
609- The store parameter is required for ``<content>`` unless a default store
610- is configured. Use ``<content@store_name>`` syntax to specify the store.
611- """
470+ This is a compatibility function used by heading and declare modules.
612471
613- type_name = "content"
614- dtype = "json"
615-
616- def encode (self , value : bytes , * , key : dict | None = None , store_name : str | None = None ) -> dict :
617- """
618- Store content and return metadata.
619-
620- Computes the SHA256 hash of the content and stores it using content-addressed
621- storage. If content with the same hash already exists, it is not re-uploaded
622- (deduplication).
623-
624- Args:
625- value: Raw bytes to store.
626- key: Primary key values (unused for content storage).
627- store_name: Store to use. If None, uses default store from config.
628-
629- Returns:
630- Metadata dict with keys: hash, store, size
631-
632- Raises:
633- TypeError: If value is not bytes.
634- """
635- if not isinstance (value , bytes ):
636- raise TypeError (f"<content> type expects bytes, got { type (value ).__name__ } " )
637-
638- from .content_registry import put_content
639-
640- return put_content (value , store_name = store_name )
641-
642- def decode (self , stored : dict , * , key : dict | None = None ) -> bytes :
643- """
644- Retrieve content by its hash.
645-
646- Args:
647- stored: Metadata dict with 'hash' and optionally 'store' keys.
648- key: Primary key values (unused for content retrieval).
649-
650- Returns:
651- The original bytes.
652-
653- Raises:
654- MissingExternalFile: If content is not found.
655- DataJointError: If hash verification fails.
656- """
657- from .content_registry import get_content
658-
659- content_hash = stored ["hash" ]
660- store_name = stored .get ("store" )
661- return get_content (content_hash , store_name = store_name )
662-
663- def validate (self , value : Any ) -> None :
664- """Validate that value is bytes."""
665- if not isinstance (value , bytes ):
666- raise TypeError (f"<content> type expects bytes, got { type (value ).__name__ } " )
667-
668-
669- class XBlobType (AttributeType ):
670- """
671- Built-in type for externally-stored serialized blobs with deduplication.
672-
673- The ``<xblob>`` type combines DataJoint's blob serialization with content-addressed
674- storage. Objects are serialized using the djblob format, then stored externally
675- using content-addressed storage for automatic deduplication.
676-
677- This type is ideal for large objects (NumPy arrays, pandas DataFrames, etc.)
678- that may be duplicated across multiple rows.
679-
680- Example:
681- @schema
682- class LargeArrays(dj.Manual):
683- definition = '''
684- array_id : int
685- ---
686- data : <xblob@mystore> # External serialized blob with deduplication
687- '''
688-
689- # Insert NumPy array
690- import numpy as np
691- table.insert1({'array_id': 1, 'data': np.random.rand(1000, 1000)})
472+ Args:
473+ context: Ignored (legacy parameter, kept for API compatibility).
474+ adapter_name: The type name, with or without angle brackets.
475+ May include store parameter (e.g., "<xblob@cold>").
692476
693- # Fetch returns the original array
694- data = (table & 'array_id=1').fetch1('data')
477+ Returns:
478+ Tuple of (AttributeType instance, store_name or None).
695479
696- Note:
697- - For internal storage (in database), use ``<djblob>``
698- - For external storage without serialization, use ``<content>``
699- - The store parameter is required unless a default store is configured
480+ Raises:
481+ DataJointError: If the type is not found.
700482 """
483+ type_name , store_name = parse_type_spec (adapter_name )
701484
702- type_name = "xblob"
703- dtype = "<content>" # Composition: uses ContentType for storage
704-
705- def encode (self , value : Any , * , key : dict | None = None , store_name : str | None = None ) -> bytes :
706- """
707- Serialize a Python object to bytes.
708-
709- The object is serialized using DataJoint's blob format. The resulting
710- bytes are then passed to the underlying ``<content>`` type for storage.
711-
712- Args:
713- value: Any serializable Python object.
714- key: Primary key values (unused).
715- store_name: Store parameter (passed through to content storage).
716-
717- Returns:
718- Serialized bytes (will be stored by ContentType).
719- """
720- from . import blob
721-
722- return blob .pack (value , compress = True )
723-
724- def decode (self , stored : bytes , * , key : dict | None = None ) -> Any :
725- """
726- Deserialize bytes back to a Python object.
727-
728- Args:
729- stored: Serialized bytes retrieved from content storage.
730- key: Primary key values (unused).
485+ if is_type_registered (type_name ):
486+ return get_type (type_name ), store_name
731487
732- Returns:
733- The deserialized Python object.
734- """
735- from . import blob
488+ raise DataJointError (f"Attribute type <{ type_name } > is not registered. " "Use @dj.register_type to register custom types." )
736489
737- return blob .unpack (stored , squeeze = False )
738-
739-
740- def _register_builtin_types () -> None :
741- """
742- Register DataJoint's built-in attribute types.
743-
744- Called automatically during module initialization.
745- """
746- register_type (DJBlobType )
747- register_type (ContentType )
748- register_type (XBlobType )
749490
491+ # =============================================================================
492+ # Auto-register built-in types
493+ # =============================================================================
750494
751- # Register built-in types when module is loaded
752- _register_builtin_types ()
495+ # Import builtin_types module to register built-in types (DJBlobType, ContentType, etc.)
496+ # This import has a side effect: it registers the types via @register_type decorators
497+ from . import builtin_types as _builtin_types # noqa: F401, E402
0 commit comments