@@ -43,15 +43,15 @@ class Analysis(dj.Computed):
4343- Path derived from content hash (SHA256)
4444- Many-to-one: multiple rows can reference same object
4545- Reference counted for garbage collection
46- - Returns ` ObjectRef ` for lazy access (same as regular OAS )
46+ - ** Transparent access ** : Returns same type as internal variant (Python object or file path )
4747
4848``` python
4949class ProcessedData (dj .Computed ):
5050 definition = """
5151 -> RawData
5252 ---
53- features : <djblob@main> # Serialized Python object, deduplicated
54- source_file : <attach@main> # File attachment, deduplicated
53+ features : <djblob@main> # Returns Python object (fetched transparently)
54+ source_file : <attach@main> # Returns local file path (downloaded transparently)
5555 """
5656```
5757
@@ -118,23 +118,27 @@ def garbage_collect(schema):
118118 (ContentRegistry() & {' content_hash' : content_hash}).delete()
119119```
120120
121- ### ObjectRef for Content-Addressed Objects
121+ ### Transparent Access for Content-Addressed Objects
122122
123- Content-addressed objects return ` ObjectRef ` just like regular OAS objects :
123+ Content-addressed objects return the same types as their internal counterparts :
124124
125125``` python
126126row = (ProcessedData & key).fetch1()
127127
128- # Both return ObjectRef
129- results_ref = row[' features' ] # <djblob@store>
130- file_ref = row[' source_file' ] # <attach@store>
128+ # <djblob@store> returns Python object (like <djblob>)
129+ features = row[' features' ] # dict, array, etc. - fetched and deserialized
131130
132- # Same interface as regular OAS
133- results_ref.download(' /local/path' )
134- data = results_ref.load() # For djblob: deserialize
135- local_path = file_ref.download() # For attach: download, return path
131+ # <attach@store> returns local file path (like <attach>)
132+ local_path = row[' source_file' ] # '/downloads/data.csv' - downloaded automatically
133+
134+ # Only object@store returns ObjectRef for explicit lazy access
135+ ref = row[' results' ] # ObjectRef - user controls when to download
136136```
137137
138+ This makes external storage transparent - users work with Python objects and file paths,
139+ not storage references. The ` @store ` suffix only affects where data is stored, not how
140+ it's accessed.
141+
138142## AttributeType Implementations
139143
140144### ` <djblob> ` - Internal Serialized Blob
@@ -180,13 +184,12 @@ class DJBlobExternalType(AttributeType):
180184
181185 return content_hash
182186
183- def decode (self , content_hash , * , key = None , store = None ) -> ObjectRef:
184- # Return ObjectRef for lazy access
185- return ObjectRef(
186- path = content_path(content_hash),
187- store = store,
188- loader = blob.unpack # Custom loader for deserialization
189- )
187+ def decode (self , content_hash , * , key = None , store = None ) -> Any:
188+ # Fetch and deserialize - transparent to user
189+ from . import blob
190+ path = content_path(content_hash)
191+ data = store.get(path)
192+ return blob.unpack(data)
190193```
191194
192195### ` <attach> ` - Internal File Attachment
@@ -227,7 +230,7 @@ class AttachExternalType(AttributeType):
227230 path.name.encode() + b " \0 " + data
228231 ).hexdigest()
229232
230- # Store as folder with original filename preserved
233+ # Store with original filename preserved
231234 obj_path = content_path(content_hash)
232235 if not store.exists(obj_path):
233236 store.put(f " { obj_path} / { path.name} " , data)
@@ -239,26 +242,29 @@ class AttachExternalType(AttributeType):
239242
240243 return content_hash
241244
242- def decode (self , content_hash , * , key = None , store = None ) -> ObjectRef:
243- return ObjectRef(
244- path = content_path(content_hash),
245- store = store,
246- # ObjectRef handles file download
247- )
245+ def decode (self , content_hash , * , key = None , store = None ) -> str :
246+ # Download and return local path - transparent to user
247+ obj_path = content_path(content_hash)
248+ # List to get filename (stored as {hash}/{filename})
249+ filename = store.list(obj_path)[0 ]
250+ download_path = Path(dj.config[' download_path' ]) / filename
251+ download_path.parent.mkdir(parents = True , exist_ok = True )
252+ store.download(f " { obj_path} / { filename} " , download_path)
253+ return str (download_path)
248254```
249255
250- ## Unified ObjectRef Interface
256+ ## ObjectRef Interface (for ` object@store ` only)
251257
252- All external storage (both path-addressed and content-addressed) returns ` ObjectRef ` :
258+ Only ` object@store ` returns ` ObjectRef ` for explicit lazy access. This is intentional -
259+ large files and folders (Zarr, HDF5, etc.) benefit from user-controlled download/access.
253260
254261``` python
255262class ObjectRef :
256- """ Lazy reference to stored object."""
263+ """ Lazy reference to stored object (object@store only) ."""
257264
258- def __init__ (self , path , store , loader = None ):
265+ def __init__ (self , path , store ):
259266 self .path = path
260267 self .store = store
261- self ._loader = loader # Optional custom deserializer
262268
263269 def download (self , local_path = None ) -> Path:
264270 """ Download object to local filesystem."""
@@ -267,35 +273,33 @@ class ObjectRef:
267273 self .store.download(self .path, local_path)
268274 return local_path
269275
270- def load (self ) -> Any:
271- """ Load and optionally deserialize object."""
272- data = self .store.get(self .path)
273- if self ._loader:
274- return self ._loader(data)
275- return data
276-
277276 def open (self , mode = ' rb' ):
278- """ Open via fsspec for streaming access."""
277+ """ Open via fsspec for streaming/direct access."""
279278 return self .store.open(self .path, mode)
279+
280+ def exists (self ) -> bool :
281+ """ Check if object exists in store."""
282+ return self .store.exists(self .path)
280283```
281284
282285## Summary
283286
284287| Type | Storage | Column | Dedup | Returns |
285288| ------| ---------| --------| -------| ---------|
286- | ` object@store ` | ` {schema}/{table}/{pk}/ ` | JSON | No | ObjectRef |
289+ | ` object@store ` | ` {schema}/{table}/{pk}/ ` | JSON | No | ObjectRef (lazy) |
287290| ` <djblob> ` | Internal DB | LONGBLOB | No | Python object |
288- | ` <djblob@store> ` | ` _content/{hash}/ ` | char(64) | Yes | ObjectRef |
289- | ` <attach> ` | Internal DB | LONGBLOB | No | Local path |
290- | ` <attach@store> ` | ` _content/{hash}/ ` | char(64) | Yes | ObjectRef |
291+ | ` <djblob@store> ` | ` _content/{hash}/ ` | char(64) | Yes | Python object |
292+ | ` <attach> ` | Internal DB | LONGBLOB | No | Local file path |
293+ | ` <attach@store> ` | ` _content/{hash}/ ` | char(64) | Yes | Local file path |
291294
292295## Key Design Decisions
293296
2942971 . ** Unified OAS paradigm** : All external storage uses OAS infrastructure
2952982 . ** Content-addressed region** : ` _content/ ` folder for deduplicated objects
2962993 . ** Reference counting** : Via ` ContentRegistry ` table + query-based orphan detection
297- 4 . ** ObjectRef everywhere** : External types return ObjectRef for consistent lazy access
298- 5 . ** Deduplication** : Content hash determines identity; identical content stored once
300+ 4 . ** Transparent access** : ` <djblob@store> ` and ` <attach@store> ` return same types as internal variants
301+ 5 . ** Lazy access for objects** : Only ` object@store ` returns ObjectRef (for large files/folders)
302+ 6 . ** Deduplication** : Content hash determines identity; identical content stored once
299303
300304## Migration from Legacy ` ~external_* `
301305
0 commit comments