-
My goal is to get a dataset from custom io-zarr backend lazy-loaded. But when I declare a While I have a custom backend array inheriting from import zarr
import xarray as xr
from tempfile import mkdtemp
import numpy as np
from pathlib import Path
from collections import defaultdict
class AccessTrackingStore(zarr.DirectoryStore):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._access_count = {}
self._accessed = defaultdict(set)
def __getitem__(self, key):
for tracked in self._access_count:
if tracked in key:
self._access_count[tracked] += 1
self._accessed[tracked].add(key)
return super().__getitem__(key)
def get_access_count(self, key):
return self._access_count[key]
def set_key_trackers(self, keys_to_track):
if isinstance(keys_to_track, str):
keys_to_track = [keys_to_track]
for k in keys_to_track:
self._access_count[k] = 0
def get_subkeys_accessed(self, key):
return self._accessed[key]
orig_path = Path(mkdtemp())
z = zarr.group(orig_path / "foo.zarr")
z['array'] = np.random.randn(1000, 1000)
store = AccessTrackingStore(orig_path / "foo.zarr")
store.set_key_trackers(['array'])
z = zarr.group(store)
arr = xr.backends.zarr.ZarrArrayWrapper(z['array'])
lazy_arr = xr.core.indexing.LazilyIndexedArray(arr)
# just `.zarray`
var = xr.Variable(('x', 'y'), lazy_arr)
print('Variable read in ', store.get_subkeys_accessed('array'))
# now everything is read in
da = xr.DataArray(var)
print('DataArray read in ', store.get_subkeys_accessed('array')) |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
Thanks for reporting - this is now a issue #8753 and fixed in #8754 (unfortunately github does not seem to crosslink to discussions) - I am closing here. Feel free to open again if you disagree. |
Beta Was this translation helpful? Give feedback.
Thanks for reporting - this is now a issue #8753 and fixed in #8754 (unfortunately github does not seem to crosslink to discussions) - I am closing here. Feel free to open again if you disagree.