Skip to content

Commit f73c2ac

Browse files
committed
Fixing to_mlflow and to_wandb (avoid multiple opened streams on a file)
1 parent 2ad46e2 commit f73c2ac

2 files changed

Lines changed: 63 additions & 35 deletions

File tree

python-client/giskard/datasets/base/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -659,16 +659,27 @@ def copy(self):
659659
def to_mlflow(self, mlflow_client: MlflowClient = None, mlflow_run_id: str = None):
660660
import mlflow
661661

662-
with tempfile.NamedTemporaryFile(prefix="dataset-", suffix=".csv") as f:
662+
# To avoid file being open in write mode and read at the same time,
663+
# First, we'll write it, then make sure to remove it
664+
with tempfile.NamedTemporaryFile(
665+
prefix="dataset-", suffix=".csv", delete=False
666+
) as f:
667+
# Get file path
663668
local_path = f.name
664-
artifact_name = local_path.split("/")[-1]
665-
with open(local_path, "wb") as fw:
666-
uncompressed_bytes = save_df(self.df)
667-
fw.write(uncompressed_bytes)
669+
# Get name from file
670+
artifact_name = Path(f.name).name
671+
# Write the file on disk
672+
f.write(save_df(self.df))
673+
try:
668674
if mlflow_client is None and mlflow_run_id is None:
669675
mlflow.log_artifact(local_path)
670676
elif mlflow_client and mlflow_run_id:
671677
mlflow_client.log_artifact(mlflow_run_id, local_path=local_path)
678+
# Note(Bazire): Should we have a raising else here ?
679+
finally:
680+
# Force deletion of the temps file
681+
Path(f.name).unlink(missing_ok=True)
682+
672683
return artifact_name
673684

674685
def to_wandb(self, **kwargs) -> None:

python-client/giskard/scanner/report.py

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from pathlib import Path
2+
import random
3+
import string
14
import tempfile
25

36
import mlflow
@@ -130,13 +133,19 @@ def to_mlflow(
130133
model_artifact_path = "-for-" + model_artifact_path
131134

132135
with tempfile.NamedTemporaryFile(
133-
prefix="giskard-scan-results" + model_artifact_path + "-", suffix=".html"
136+
prefix="giskard-scan-results" + model_artifact_path + "-", suffix=".html", delete=False
134137
) as f:
138+
# Get file path
135139
scan_results_local_path = f.name
136-
scan_results_artifact_name = scan_results_local_path.split("/")[-1]
137-
scan_summary_artifact_name = "scan-summary" + model_artifact_path + ".json" if summary else None
140+
# Get name from file
141+
scan_results_artifact_name = Path(f.name).name
142+
scan_summary_artifact_name = (
143+
"scan-summary" + model_artifact_path + ".json" if summary else None
144+
)
145+
# Write the file on disk
138146
self.to_html(scan_results_local_path)
139147

148+
try:
140149
if mlflow_client is None and mlflow_run_id is None:
141150
mlflow.log_artifact(scan_results_local_path)
142151
if summary:
@@ -145,6 +154,10 @@ def to_mlflow(
145154
mlflow_client.log_artifact(mlflow_run_id, scan_results_local_path)
146155
if summary:
147156
mlflow_client.log_table(mlflow_run_id, results_df, artifact_file=scan_summary_artifact_name)
157+
finally:
158+
# Force deletion of the temps file
159+
Path(f.name).unlink(missing_ok=True)
160+
148161
return scan_results_artifact_name, scan_summary_artifact_name
149162

150163
def to_wandb(self, **kwargs):
@@ -165,30 +178,34 @@ def to_wandb(self, **kwargs):
165178
from ..utils.analytics_collector import analytics
166179

167180
with wandb_run(**kwargs) as run:
168-
with tempfile.NamedTemporaryFile(prefix="giskard-scan-results-", suffix=".html") as f:
169-
try:
170-
self.to_html(filename=f.name)
171-
wandb_artifact_name = "Vulnerability scan results/" + f.name.split("/")[-1].split(".html")[0]
172-
analytics.track(
173-
"wandb_integration:scan_result",
174-
{
175-
"wandb_run_id": run.id,
176-
"has_issues": self.has_issues(),
177-
"issues_cnt": len(self.issues),
178-
},
179-
)
180-
except Exception as e:
181-
analytics.track(
182-
"wandb_integration:scan_result:error:unknown",
183-
{
184-
"wandb_run_id": run.id,
185-
"error": str(e),
186-
},
187-
)
188-
raise ValueError(
189-
"An error occurred while logging the scan results into wandb. "
190-
"Please submit the traceback as a GitHub issue in the following "
191-
"repository for further assistance: https://github.com/Giskard-AI/giskard."
192-
) from e
193-
194-
run.log({wandb_artifact_name: wandb.Html(open(f.name), inject=False)})
181+
try:
182+
html = self.to_html()
183+
suffix = "".join(
184+
random.choices(string.ascii_lowercase + string.digits, k=8)
185+
)
186+
wandb_artifact_name = (
187+
f"Vulnerability scan results/giskard-scan-results-{suffix}"
188+
)
189+
analytics.track(
190+
"wandb_integration:scan_result",
191+
{
192+
"wandb_run_id": run.id,
193+
"has_issues": self.has_issues(),
194+
"issues_cnt": len(self.issues),
195+
},
196+
)
197+
except Exception as e:
198+
analytics.track(
199+
"wandb_integration:scan_result:error:unknown",
200+
{
201+
"wandb_run_id": run.id,
202+
"error": str(e),
203+
},
204+
)
205+
raise ValueError(
206+
"An error occurred while logging the scan results into wandb. "
207+
"Please submit the traceback as a GitHub issue in the following "
208+
"repository for further assistance: https://github.com/Giskard-AI/giskard."
209+
) from e
210+
211+
run.log({wandb_artifact_name: wandb.Html(html, inject=False)})

0 commit comments

Comments
 (0)