Skip to content

Commit e0a4772

Browse files
GSK-2788 Refactored artifact upload/download to not be dependant of project (#1797)
* GSK-2788 Problem after migrating to 2.7.0 * GSK-2788 Fixed inspection upload and updated test_ml_worker.py * GSK-2788 Updated expected artifact upload URL to remove project_key * GSK-2788 Updated expected artifact upload URL to remove project_key * GSK-2788 Updated expected artifact upload URL to remove project_key * GSK-2788 Updated expected artifact upload URL to remove project_key --------- Co-authored-by: Hartorn <bazire@giskard.ai>
1 parent a2ae079 commit e0a4772

16 files changed

Lines changed: 161 additions & 179 deletions

giskard/core/savable.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,14 @@ def upload(
101101

102102
name = self._get_name()
103103

104-
local_dir = settings.home_dir / settings.cache_dir / (project_key or "global") / name / self.meta.uuid
104+
local_dir = settings.home_dir / settings.cache_dir / name / self.meta.uuid
105105

106106
if not local_dir.exists():
107107
os.makedirs(local_dir)
108108
self.save(local_dir)
109109
logger.debug(f"Saved {name}.{self.meta.uuid}")
110110

111-
client.log_artifacts(local_dir, posixpath.join(project_key or "global", self._get_name(), self.meta.uuid))
111+
client.log_artifacts(local_dir, posixpath.join(self._get_name(), self.meta.uuid))
112112
self.meta = client.save_meta(self._get_meta_endpoint(self.meta.uuid, project_key), self.meta)
113113

114114
return self.meta.uuid
@@ -134,7 +134,7 @@ def download(cls, uuid: str, client: Optional[GiskardClient], project_key: Optio
134134
"""
135135
name = cls._get_name()
136136

137-
local_dir = settings.home_dir / settings.cache_dir / (project_key or "global") / name / uuid
137+
local_dir = settings.home_dir / settings.cache_dir / name / uuid
138138

139139
if client is None:
140140
meta = cls._load_meta_locally(local_dir, uuid)
@@ -148,7 +148,7 @@ def download(cls, uuid: str, client: Optional[GiskardClient], project_key: Optio
148148

149149
if data is None:
150150
assert client is not None, f"Cannot find existing {name} {uuid}"
151-
client.load_artifact(local_dir, posixpath.join(project_key or "global", name, uuid))
151+
client.load_artifact(local_dir, posixpath.join(name, uuid))
152152
data = cls.load(local_dir, uuid, meta)
153153

154154
return data

giskard/datasets/base/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ def upload(self, client: GiskardClient, project_key: str):
497497

498498
with tempfile.TemporaryDirectory(prefix="giskard-dataset-") as local_path:
499499
original_size_bytes, compressed_size_bytes = self.save(Path(local_path), dataset_id)
500-
client.log_artifacts(local_path, posixpath.join(project_key, "datasets", dataset_id))
500+
client.log_artifacts(local_path, posixpath.join("datasets", dataset_id))
501501
client.save_dataset_meta(
502502
project_key,
503503
dataset_id,
@@ -573,7 +573,7 @@ def download(cls, client: Optional[GiskardClient], project_key, dataset_id, samp
573573
Returns:
574574
Dataset: A Dataset object that represents the downloaded dataset.
575575
"""
576-
local_dir = settings.home_dir / settings.cache_dir / project_key / "datasets" / dataset_id
576+
local_dir = settings.home_dir / settings.cache_dir / "datasets" / dataset_id
577577

578578
if client is None:
579579
# internal worker case, no token based http client
@@ -589,7 +589,7 @@ def download(cls, client: Optional[GiskardClient], project_key, dataset_id, samp
589589
category_features=saved_meta["category_features"],
590590
)
591591
else:
592-
client.load_artifact(local_dir, posixpath.join(project_key, "datasets", dataset_id))
592+
client.load_artifact(local_dir, posixpath.join("datasets", dataset_id))
593593
meta: DatasetMeta = client.load_dataset_meta(project_key, dataset_id)
594594

595595
df = cls.load(local_dir / get_file_name("data", "csv.zst", sample))

giskard/ml_worker/websocket/listener.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -354,25 +354,25 @@ def run_model(client: Optional[GiskardClient], params: websocket.RunModelParam,
354354
if client:
355355
client.log_artifact(
356356
tmp_dir / predictions_csv,
357-
f"{params.project_key}/models/inspections/{params.inspectionId}",
357+
f"models/inspections/{params.inspectionId}",
358358
)
359359
else:
360360
log_artifact_local(
361361
tmp_dir / predictions_csv,
362-
f"{params.project_key}/models/inspections/{params.inspectionId}",
362+
f"models/inspections/{params.inspectionId}",
363363
)
364364

365365
calculated_csv = get_file_name("calculated", "csv", params.dataset.sample)
366366
calculated.to_csv(index=False, path_or_buf=tmp_dir / calculated_csv)
367367
if client:
368368
client.log_artifact(
369369
tmp_dir / calculated_csv,
370-
f"{params.project_key}/models/inspections/{params.inspectionId}",
370+
f"models/inspections/{params.inspectionId}",
371371
)
372372
else:
373373
log_artifact_local(
374374
tmp_dir / calculated_csv,
375-
f"{params.project_key}/models/inspections/{params.inspectionId}",
375+
f"models/inspections/{params.inspectionId}",
376376
)
377377
return websocket.Empty()
378378

giskard/ml_worker/websocket/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
)
3535
from giskard.ml_worker.websocket.action import MLWorkerAction
3636
from giskard.models.base import BaseModel
37-
from giskard.path_utils import projects_dir
37+
from giskard.path_utils import artifacts_dir
3838
from giskard.registry.registry import tests_registry
3939
from giskard.registry.slicing_function import SlicingFunction
4040
from giskard.registry.transformation_function import TransformationFunction
@@ -133,9 +133,9 @@ def log_artifact_local(local_file, artifact_path=None):
133133
file_name = os.path.basename(local_file)
134134

135135
if artifact_path:
136-
artifact_file = projects_dir / artifact_path / file_name
136+
artifact_file = artifacts_dir / artifact_path / file_name
137137
else:
138-
artifact_file = projects_dir / file_name
138+
artifact_file = artifacts_dir / file_name
139139
artifact_file.parent.mkdir(parents=True, exist_ok=True)
140140

141141
shutil.copy(local_file, artifact_file)

giskard/models/base/model.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def upload(self, client: GiskardClient, project_key, validate_ds=None, *_args, *
459459
self.save(f)
460460

461461
if client is not None:
462-
client.log_artifacts(f, posixpath.join(project_key, "models", str(self.id)))
462+
client.log_artifacts(f, posixpath.join("models", str(self.id)))
463463
client.save_model_meta(project_key, self.id, self.meta, platform.python_version(), get_size(f))
464464
return str(self.id)
465465

@@ -479,13 +479,13 @@ def download(cls, client: Optional[GiskardClient], project_key, model_id, *_args
479479
Raises:
480480
AssertionError: If the local directory where the model should be saved does not exist.
481481
"""
482-
local_dir = settings.home_dir / settings.cache_dir / project_key / "models" / model_id
482+
local_dir = settings.home_dir / settings.cache_dir / "models" / model_id
483483
if client is None:
484484
# internal worker case, no token based http client [deprecated, to be removed]
485485
assert local_dir.exists(), f"Cannot find existing model {project_key}.{model_id} in {local_dir}"
486486
meta_response, meta = cls.read_meta_from_local_dir(local_dir)
487487
else:
488-
client.load_artifact(local_dir, posixpath.join(project_key, "models", model_id))
488+
client.load_artifact(local_dir, posixpath.join("models", model_id))
489489
meta_response: ModelMetaInfo = client.load_model_meta(project_key, model_id)
490490
# internal worker case, no token based http client
491491
if not local_dir.exists():

giskard/path_utils.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,7 @@
33
from giskard.settings import settings
44

55
run_dir = settings.home_dir / "run"
6-
projects_dir = settings.home_dir / "projects"
7-
8-
9-
def model_path(project_key: str, file_name: str) -> Path:
10-
return projects_dir / project_key / "models" / file_name
11-
12-
13-
def dataset_path(project_key: str, file_name: str) -> Path:
14-
return projects_dir / project_key / "datasets" / file_name
6+
artifacts_dir = settings.home_dir / "artifacts"
157

168

179
def get_size(path: str):

tests/communications/test_websocket_actor.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,10 @@ def test_websocket_actor_run_model_internal(data, model, sample, request):
131131
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
132132
inspection_id = 0
133133

134-
with utils.MockedProjectCacheDir(project_key):
134+
with utils.MockedProjectCacheDir():
135135
# Prepare dataset and model
136-
utils.local_save_model_under_giskard_home_cache(model, project_key)
137-
utils.local_save_dataset_under_giskard_home_cache(dataset, project_key)
136+
utils.local_save_model_under_giskard_home_cache(model)
137+
utils.local_save_dataset_under_giskard_home_cache(dataset)
138138

139139
params = websocket.RunModelParam(
140140
model=websocket.ArtifactRef(project_key=project_key, id=str(model.id)),
@@ -146,7 +146,7 @@ def test_websocket_actor_run_model_internal(data, model, sample, request):
146146
reply = listener.run_model(client=None, params=params)
147147
assert isinstance(reply, websocket.Empty)
148148
# Inspection are logged locally
149-
inspection_path = settings.home_dir / "projects" / project_key / "models" / "inspections" / str(inspection_id)
149+
inspection_path = settings.home_dir / "artifacts" / "models" / "inspections" / str(inspection_id)
150150
assert (inspection_path / get_file_name("predictions", "csv", sample)).exists()
151151
assert (inspection_path / get_file_name("calculated", "csv", sample)).exists()
152152
# Clean up
@@ -170,7 +170,7 @@ def test_websocket_actor_run_model(data, model, sample, request):
170170
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
171171
inspection_id = 0
172172

173-
with utils.MockedProjectCacheDir(project_key):
173+
with utils.MockedProjectCacheDir():
174174
params = websocket.RunModelParam(
175175
model=websocket.ArtifactRef(project_key=project_key, id=str(model.id)),
176176
dataset=websocket.ArtifactRef(project_key=project_key, id=str(dataset.id), sample=sample),
@@ -199,10 +199,10 @@ def test_websocket_actor_run_model_for_data_frame_regression(internal, request):
199199

200200
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
201201

202-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
202+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
203203
# Prepare model
204204
if internal:
205-
utils.local_save_model_under_giskard_home_cache(model, project_key)
205+
utils.local_save_model_under_giskard_home_cache(model)
206206
else:
207207
utils.register_uri_for_model_meta_info(mr, model, project_key)
208208
utils.register_uri_for_model_artifact_info(mr, model, project_key, register_file_contents=True)
@@ -239,10 +239,10 @@ def test_websocket_actor_run_model_for_data_frame_classification(internal, reque
239239

240240
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
241241

242-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
242+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
243243
# Prepare model
244244
if internal:
245-
utils.local_save_model_under_giskard_home_cache(model, project_key)
245+
utils.local_save_model_under_giskard_home_cache(model)
246246
else:
247247
utils.register_uri_for_model_meta_info(mr, model, project_key)
248248
utils.register_uri_for_model_artifact_info(mr, model, project_key, register_file_contents=True)
@@ -279,10 +279,10 @@ def test_websocket_actor_explain_ws_internal(data, model, request):
279279

280280
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
281281

282-
with utils.MockedProjectCacheDir(project_key):
282+
with utils.MockedProjectCacheDir():
283283
# Prepare model and dataset
284-
utils.local_save_model_under_giskard_home_cache(model, project_key)
285-
utils.local_save_dataset_under_giskard_home_cache(dataset, project_key)
284+
utils.local_save_model_under_giskard_home_cache(model)
285+
utils.local_save_dataset_under_giskard_home_cache(dataset)
286286

287287
params = websocket.ExplainParam(
288288
model=websocket.ArtifactRef(project_key=project_key, id=str(model.id)),
@@ -301,7 +301,7 @@ def test_websocket_actor_explain_ws(data, model, request):
301301

302302
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
303303

304-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
304+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
305305
# Prepare model and dataset
306306
utils.register_uri_for_model_meta_info(mr, model, project_key)
307307
utils.register_uri_for_model_artifact_info(mr, model, project_key, register_file_contents=True)
@@ -324,10 +324,10 @@ def test_websocket_actor_explain_text_ws_regression(internal, request):
324324

325325
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
326326

327-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
327+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
328328
# Prepare model and dataset
329329
if internal:
330-
utils.local_save_model_under_giskard_home_cache(model, project_key)
330+
utils.local_save_model_under_giskard_home_cache(model)
331331
else:
332332
utils.register_uri_for_model_meta_info(mr, model, project_key)
333333
utils.register_uri_for_model_artifact_info(mr, model, project_key, register_file_contents=True)
@@ -357,9 +357,9 @@ def test_websocket_actor_explain_text_ws_not_text(request):
357357

358358
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
359359

360-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
360+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
361361
# Prepare model and dataset
362-
utils.local_save_model_under_giskard_home_cache(model, project_key)
362+
utils.local_save_model_under_giskard_home_cache(model)
363363

364364
not_text_feature_name = None
365365
for col_name, col_type in dataset.column_types.items():
@@ -385,10 +385,10 @@ def test_websocket_actor_explain_text_ws_classification(internal, request):
385385

386386
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
387387

388-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
388+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
389389
# Prepare model and dataset
390390
if internal:
391-
utils.local_save_model_under_giskard_home_cache(model, project_key)
391+
utils.local_save_model_under_giskard_home_cache(model)
392392
else:
393393
utils.register_uri_for_model_meta_info(mr, model, project_key)
394394
utils.register_uri_for_model_artifact_info(mr, model, project_key, register_file_contents=True)
@@ -419,10 +419,10 @@ def test_websocket_actor_dataset_processing_empty(internal, request):
419419

420420
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
421421

422-
with utils.MockedProjectCacheDir(project_key), utils.MockedClient(mock_all=False) as (client, mr):
422+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
423423
# Prepare dataset
424424
if internal:
425-
utils.local_save_dataset_under_giskard_home_cache(dataset, project_key)
425+
utils.local_save_dataset_under_giskard_home_cache(dataset)
426426
else:
427427
utils.register_uri_for_dataset_meta_info(mr, dataset, project_key)
428428
utils.register_uri_for_dataset_artifact_info(mr, dataset, project_key, register_file_contents=True)
@@ -460,9 +460,9 @@ def test_websocket_actor_dataset_processing_head_slicing_with_cache(callable_und
460460
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
461461
callable_function_project_key = project_key if callable_under_project else None
462462

463-
with utils.MockedProjectCacheDir(project_key):
463+
with utils.MockedProjectCacheDir():
464464
# Prepare dataset
465-
utils.local_save_dataset_under_giskard_home_cache(dataset, project_key)
465+
utils.local_save_dataset_under_giskard_home_cache(dataset)
466466

467467
head_slice.meta.uuid = str(uuid.uuid4())
468468

@@ -510,9 +510,9 @@ def test_websocket_actor_dataset_processing_do_nothing_transform_with_cache(call
510510

511511
project_key = str(uuid.uuid4()) # Use a UUID to separate the resources used by the tests
512512

513-
with utils.MockedProjectCacheDir(project_key):
513+
with utils.MockedProjectCacheDir():
514514
# Prepare dataset
515-
utils.local_save_dataset_under_giskard_home_cache(dataset, project_key)
515+
utils.local_save_dataset_under_giskard_home_cache(dataset)
516516
callable_function_project_key = project_key if callable_under_project else None
517517

518518
do_nothing.meta.uuid = str(uuid.uuid4())
@@ -552,9 +552,9 @@ def test_websocket_actor_create_sub_dataset(request):
552552
dataset: Dataset = request.getfixturevalue("enron_data")
553553
project_key = str(uuid.uuid4())
554554

555-
with utils.MockedProjectCacheDir(project_key=project_key), utils.MockedClient(mock_all=False) as (client, mr):
555+
with utils.MockedProjectCacheDir(), utils.MockedClient(mock_all=False) as (client, mr):
556556
# Prepare dataset
557-
utils.local_save_dataset_under_giskard_home_cache(dataset, project_key)
557+
utils.local_save_dataset_under_giskard_home_cache(dataset)
558558
utils.register_uri_for_dataset_meta_info(mr, dataset, project_key)
559559

560560
# Prepare dataset upload requests

0 commit comments

Comments
 (0)