Skip to content

Commit 685b463

Browse files
authored
Merge pull request #35 from rok4/feature/taille_pyramide
Ajout de la fonction de calcul de la taille d'une pyramide
2 parents 576cb52 + d5a1819 commit 685b463

File tree

4 files changed

+130
-51
lines changed

4 files changed

+130
-51
lines changed

CHANGELOG.md

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,5 @@
11
## Summary
22

3-
Lecture facilitée de la liste d'une pyramide. Lecture d'informations sur une donnée raster unique depuis un fichier ou une liste de paramètres. Modification de la gestion des vecteurs.
4-
5-
## Changelog
6-
7-
### [Added]
8-
9-
* Raster
10-
* Chargement des informations sur un fichier raster (chemin du fichier, chemin du fichier de masque si applicable, nombre de canaux, boundingbox de l'emprise géographique)
11-
* depuis le fichier raster
12-
* depuis une liste de paramètres provenant d'une utilisation précédente
13-
* Tests unitaires
14-
* Documentation interne des fonctions et classes
15-
16-
* Pyramid
17-
* Fonctions de gestion de la liste : chargement et lecture (via un generator)
18-
* Taille du header d'une dalle stockée dans la variable `ROK4_IMAGE_HEADER_SIZE`
19-
* La proriété `tile_extension` : retourne l'extension d'une tuile de la pyramide en fonction du format
20-
* Des exemples d'utilisation des fonctions principales
21-
22-
### [Changed]
23-
24-
* Vector
25-
* Utilisation de kwargs pour les paramètres du csv
26-
* Gestion des CSV par OGR
27-
* Passage par get_osgeo_path pour la lecture virtuelle
28-
* 2 constructeurs pour les vecteurs : from_file et from_parameters
29-
30-
* README.md
31-
* Modification du bloc code de compilation pour utiliser explicitement python3, et installer certaines dépendances.
32-
* Utils
33-
* Fonction de calcul de la boundix box d'une donnée
34-
* Fonction de détermination du format de variable des couleurs dans une donéne raster
35-
36-
### [Fixed]
37-
38-
* Storage
39-
* Lecture de la taille d'un objet S3 : pas besoin d'enlever des quotes dans le header `Content-Length`
40-
413
<!--
424
### [Added]
435

src/rok4/Pyramid.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ def bbox(self) -> Tuple[float, float, float, float]:
302302
Returns:
303303
Tuple[float, float, float, float]: level terrain extent (xmin, ymin, xmax, ymax)
304304
"""
305+
305306
min_bbox = self.__pyramid.tms.get_level(self.__id).tile_to_bbox(
306307
self.__tile_limits["min_col"], self.__tile_limits["max_row"]
307308
)
@@ -548,7 +549,11 @@ def serializable(self) -> Dict:
548549
Returns:
549550
Dict: descriptor structured object description
550551
"""
551-
serialization = {"tile_matrix_set": self.__tms.name, "format": self.__format}
552+
553+
serialization = {
554+
"tile_matrix_set": self.__tms.name,
555+
"format": self.__format
556+
}
552557

553558
serialization["levels"] = []
554559
sorted_levels = sorted(self.__levels.values(), key=lambda l: l.resolution, reverse=True)
@@ -615,6 +620,7 @@ def storage_root(self) -> str:
615620
Returns:
616621
str: Pyramid's storage root
617622
"""
623+
618624
return self.__storage["root"].split("@", 1)[
619625
0
620626
] # Suppression de l'éventuel hôte de spécification du cluster S3
@@ -664,6 +670,7 @@ def format(self) -> str:
664670

665671
@property
666672
def tile_extension(self) -> str:
673+
667674
if self.__format in [
668675
"TIFF_RAW_UINT8",
669676
"TIFF_LZW_UINT8",
@@ -828,7 +835,7 @@ def get_level(self, level_id: str) -> "Level":
828835
Returns:
829836
The corresponding pyramid's level, None if not present
830837
"""
831-
838+
832839
return self.__levels.get(level_id, None)
833840

834841
def get_levels(self, bottom_id: str = None, top_id: str = None) -> List[Level]:
@@ -913,6 +920,7 @@ def get_levels(self, bottom_id: str = None, top_id: str = None) -> List[Level]:
913920

914921
def write_descriptor(self) -> None:
915922
"""Write the pyramid's descriptor to the final location (in the pyramid's storage root)"""
923+
916924
content = json.dumps(self.serializable)
917925
put_data_str(content, self.__descriptor)
918926

@@ -1011,6 +1019,7 @@ def get_slab_path_from_infos(
10111019
else:
10121020
return slab_path
10131021

1022+
10141023
def get_tile_data_binary(self, level: str, column: int, row: int) -> str:
10151024
"""Get a pyramid's tile as binary string
10161025
@@ -1173,6 +1182,7 @@ def get_tile_data_raster(self, level: str, column: int, row: int) -> numpy.ndarr
11731182
level_object = self.get_level(level)
11741183

11751184
if self.__format == "TIFF_JPG_UINT8" or self.__format == "TIFF_JPG90_UINT8":
1185+
11761186
try:
11771187
img = Image.open(io.BytesIO(binary_tile))
11781188
except Exception as e:
@@ -1350,3 +1360,25 @@ def get_tile_indices(
13501360
x, y = reproject_point((x, y), sr, self.__tms.sr)
13511361

13521362
return (level_object.id,) + level_object.tile_matrix.point_to_indices(x, y)
1363+
1364+
@property
1365+
def size(self) -> int:
1366+
"""Get the size of the pyramid
1367+
1368+
Examples:
1369+
1370+
from rok4.Pyramid import Pyramid
1371+
1372+
try:
1373+
pyramid = Pyramid.from_descriptor("s3://bucket_name/path/to/descriptor.json")
1374+
size = pyramid.size()
1375+
1376+
except Exception as e:
1377+
print("Cannot load the pyramid from its descriptor and get his size")
1378+
1379+
Returns:
1380+
int: size of the pyramid
1381+
"""
1382+
if not hasattr(self,"_Pyramid__size") :
1383+
self.__size = size_path(get_path_from_infos(self.__storage["type"], self.__storage["root"], self.__name))
1384+
return self.__size

src/rok4/Storage.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def __get_s3_client(bucket_name: str) -> Tuple[Dict[str, Union["boto3.client", s
128128

129129
def disconnect_s3_clients() -> None:
130130
"""Clean S3 clients"""
131+
131132
global __S3_CLIENTS, __S3_DEFAULT_CLIENT
132133
__S3_CLIENTS = {}
133134
__S3_DEFAULT_CLIENT = None
@@ -180,6 +181,7 @@ def __get_ceph_ioctx(pool: str) -> "rados.Ioctx":
180181

181182
def disconnect_ceph_clients() -> None:
182183
"""Clean CEPH clients"""
184+
183185
global __CEPH_CLIENT, __CEPH_IOCTXS
184186
__CEPH_CLIENT = None
185187
__CEPH_IOCTXS = {}
@@ -907,3 +909,59 @@ def get_osgeo_path(path: str) -> str:
907909

908910
else:
909911
raise NotImplementedError(f"Cannot get a GDAL/OGR compliant path from {path}")
912+
913+
def size_path(path: str) -> int :
914+
"""Return the size of the path given (or, for the CEPH, the sum of the size of each object of the .list)
915+
916+
Args:
917+
path (str): Source path
918+
919+
Raises:
920+
StorageError: Unhandled link or link issue
921+
MissingEnvironmentError: Missing object storage informations
922+
923+
Returns:
924+
int: size of the path
925+
"""
926+
storage_type, unprefixed_path, tray_name, base_name = get_infos_from_path(path)
927+
928+
if storage_type == StorageType.FILE:
929+
try :
930+
total = 0
931+
with os.scandir(unprefixed_path) as it:
932+
for entry in it:
933+
if entry.is_file():
934+
total += entry.stat().st_size
935+
elif entry.is_dir():
936+
total += size_path(entry.path)
937+
938+
except Exception as e:
939+
raise StorageError("FILE", e)
940+
941+
elif storage_type == StorageType.S3:
942+
s3_client, bucket_name = __get_s3_client(tray_name)
943+
944+
try :
945+
paginator = s3_client["client"].get_paginator('list_objects_v2')
946+
pages = paginator.paginate(
947+
Bucket=bucket_name,
948+
Prefix=base_name+"/",
949+
PaginationConfig={
950+
'PageSize': 10000,
951+
}
952+
)
953+
total = 0
954+
for page in pages:
955+
for key in page['Contents']:
956+
total += key['Size']
957+
958+
except Exception as e:
959+
raise StorageError("S3", e)
960+
961+
962+
elif storage_type == StorageType.CEPH:
963+
raise NotImplementedError
964+
else:
965+
raise StorageError("UNKNOWN", "Unhandled storage type to calculate size")
966+
967+
return total

tests/test_Storage.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ def test_hash_file_ok(mock_file):
2121
except Exception as exc:
2222
assert False, f"FILE md5 sum raises an exception: {exc}"
2323

24-
2524
@mock.patch.dict(os.environ, {}, clear=True)
2625
def test_get_infos_from_path():
2726
assert (StorageType.S3, "toto/titi", "toto", "titi") == get_infos_from_path("s3://toto/titi")
@@ -104,7 +103,6 @@ def test_file_read_ok(mock_file):
104103
except Exception as exc:
105104
assert False, f"FILE read raises an exception: {exc}"
106105

107-
108106
@mock.patch.dict(
109107
os.environ,
110108
{"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"},
@@ -119,7 +117,6 @@ def test_s3_read_nok(mocked_s3_client):
119117
with pytest.raises(StorageError):
120118
data = get_data_str("s3://bucket/path/to/object")
121119

122-
123120
@mock.patch.dict(
124121
os.environ,
125122
{"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"},
@@ -165,7 +162,6 @@ def test_ceph_read_ok(mocked_rados_client):
165162

166163
############ put_data_str
167164

168-
169165
@mock.patch.dict(
170166
os.environ,
171167
{"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"},
@@ -377,7 +373,6 @@ def test_copy_ceph_file_ok(mock_file, mock_makedirs, mocked_rados_client):
377373
except Exception as exc:
378374
assert False, f"CEPH -> FILE copy raises an exception: {exc}"
379375

380-
381376
@mock.patch.dict(
382377
os.environ,
383378
{"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"},
@@ -402,7 +397,6 @@ def test_copy_file_ceph_ok(mock_file, mocked_rados_client):
402397
except Exception as exc:
403398
assert False, f"FILE -> CEPH copy raises an exception: {exc}"
404399

405-
406400
@mock.patch.dict(
407401
os.environ,
408402
{"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"},
@@ -480,7 +474,6 @@ def test_link_hard_nok():
480474
with pytest.raises(StorageError):
481475
link("ceph://pool1/source.ext", "ceph://pool2/destination.ext", True)
482476

483-
484477
@mock.patch.dict(os.environ, {}, clear=True)
485478
@mock.patch("os.symlink", return_value=None)
486479
def test_link_file_ok(mock_link):
@@ -500,7 +493,6 @@ def test_hlink_file_ok(mock_link):
500493
except Exception as exc:
501494
assert False, f"FILE hard link raises an exception: {exc}"
502495

503-
504496
@mock.patch.dict(
505497
os.environ,
506498
{"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"},
@@ -557,7 +549,6 @@ def test_link_s3_nok(mocked_s3_client):
557549

558550
############ get_size
559551

560-
561552
@mock.patch.dict(os.environ, {}, clear=True)
562553
@mock.patch("os.stat")
563554
def test_size_file_ok(mock_stat):
@@ -697,7 +688,6 @@ def test_remove_file_ok(mock_remove):
697688
except Exception as exc:
698689
assert False, f"FILE deletion (not found) raises an exception: {exc}"
699690

700-
701691
@mock.patch.dict(
702692
os.environ,
703693
{"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"},
@@ -753,7 +743,6 @@ def test_get_osgeo_path_file_ok():
753743
except Exception as exc:
754744
assert False, f"FILE osgeo path raises an exception: {exc}"
755745

756-
757746
@mock.patch.dict(
758747
os.environ,
759748
{"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"},
@@ -773,3 +762,41 @@ def test_get_osgeo_path_s3_ok():
773762
def test_get_osgeo_path_nok():
774763
with pytest.raises(NotImplementedError):
775764
get_osgeo_path("ceph://pool/data.ext")
765+
766+
767+
############ size_path
768+
def test_size_path_file_ok():
769+
try:
770+
size = size_path("file://tests/fixtures/TIFF_PBF_MVT")
771+
assert size == 124838
772+
except Exception as exc:
773+
assert False, f"FILE size of the path raises an exception: {exc}"
774+
775+
def test_size_file_nok():
776+
with pytest.raises(StorageError) :
777+
size = size_path("file://tests/fixtures/TIFF_PBF_M")
778+
779+
@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True)
780+
def test_size_path_ceph_nok():
781+
782+
with pytest.raises(NotImplementedError):
783+
size = size_path("ceph://pool/path")
784+
785+
@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True)
786+
@mock.patch('rok4.Storage.boto3.client')
787+
def test_size_path_s3_ok(mocked_s3_client):
788+
789+
disconnect_s3_clients()
790+
pages = [{"Contents" : [{"Size" : 10},{"Size" : 20}]}, {"Contents" : [{"Size" : 50}]}]
791+
paginator = MagicMock()
792+
paginator.paginate.return_value = pages
793+
client = MagicMock()
794+
client.get_paginator.return_value = paginator
795+
mocked_s3_client.return_value = client
796+
797+
try:
798+
size = size_path("s3://bucket/path")
799+
assert size == 80
800+
except Exception as exc:
801+
assert False, f"S3 size of the path raises an exception: {exc}"
802+

0 commit comments

Comments
 (0)