Skip to content

Commit 18a0c60

Browse files
committed
Allow latest pandas version
1 parent d10b897 commit 18a0c60

4 files changed

Lines changed: 215 additions & 181 deletions

File tree

.github/workflows/build-python.yml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,21 +56,29 @@ jobs:
5656
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
5757
os: [ubuntu-latest]
5858
pydantic_v2: [false]
59+
pandas_v1: [false]
5960
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
6061
include:
6162
- python-version: "3.10"
6263
os: windows-2019
6364
pydantic_v2: false
65+
pandas_v1: false
6466
- python-version: "3.10"
6567
os: windows-2022
6668
pydantic_v2: false
69+
pandas_v1: false
6770
- python-version: "3.10"
6871
os: macos-latest
6972
pydantic_v2: false
73+
pandas_v1: false
7074
- python-version: "3.10"
7175
os: ubuntu-latest
7276
pydantic_v2: true
73-
77+
pandas_v1: false
78+
- python-version: "3.10"
79+
os: ubuntu-latest
80+
pydantic_v2: false
81+
pandas_v1: true
7482
continue-on-error: false # https://ncorti.com/blog/howto-github-actions-build-matrix
7583
steps:
7684
- name: Checkout code
@@ -112,6 +120,17 @@ jobs:
112120
pdm run pip freeze | grep '^pydantic'
113121
pdm run pip freeze | grep -q '^pydantic==${{ matrix.pydantic_v2 && '2' || '1' }}\.'
114122
123+
- name: Install pandas v1
124+
if: ${{ matrix.pandas_v1 }}
125+
run: |
126+
pdm run pip uninstall pandas -y
127+
pdm run pip install "pandas<2"
128+
129+
- name: Check Pandas installed version
130+
run: |
131+
pdm run pip freeze | grep '^pandas'
132+
pdm run pip freeze | grep -q '^pandas==${{ matrix.pandas_v1 && '1' || '2' }}\.'
133+
115134
- name: Test code
116135
env:
117136
PYTEST_XDIST_AUTO_NUM_WORKERS: ${{ startsWith(matrix.os,'windows-') && 1 || 2 }}

giskard/testing/tests/drift.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -83,20 +83,19 @@ def _calculate_drift_psi(actual_series, reference_series, max_categories):
8383
expected_distribution = expected_frequencies / len(reference_series)
8484
actual_distribution = actual_frequencies / len(actual_series)
8585
total_psi = 0
86-
output_data = pd.DataFrame(columns=["Modality", "Reference_distribution", "Actual_distribution", "Psi"])
87-
for category in range(len(all_modalities)):
86+
output_data = []
87+
for category, modality in enumerate(all_modalities):
8888
modality_psi = _calculate_psi(category, actual_distribution, expected_distribution)
89-
9089
total_psi += modality_psi
91-
row = {
92-
"Modality": all_modalities[category],
93-
"Reference_distribution": expected_distribution[category],
94-
"Actual_distribution": expected_distribution[category],
95-
"Psi": modality_psi,
96-
}
97-
98-
output_data = output_data.append(pd.Series(row), ignore_index=True)
99-
return total_psi, output_data
90+
output_data.append(
91+
{
92+
"Modality": modality,
93+
"Reference_distribution": expected_distribution[category],
94+
"Actual_distribution": expected_distribution[category],
95+
"Psi": modality_psi,
96+
}
97+
)
98+
return total_psi, pd.DataFrame(output_data)
10099

101100

102101
def _calculate_ks(actual_series, reference_series) -> Ks_2sampResult:
@@ -131,28 +130,29 @@ def _calculate_chi_square(actual_series, reference_series, max_categories):
131130
# so that reference and actual has the same size
132131
# See https://github.com/scipy/scipy/blob/v1.8.0/scipy/stats/_stats_py.py#L6787
133132
k_norm = actual_series.shape[0] / reference_series.shape[0]
134-
output_data = pd.DataFrame(columns=["Modality", "Reference_frequencies", "Actual_frequencies", "Chi_square"])
135-
for i in range(len(all_modalities)):
133+
output_data = []
134+
for i, modality in enumerate(all_modalities):
136135
chi_square_value = (actual_frequencies[i] - expected_frequencies[i] * k_norm) ** 2 / (
137136
expected_frequencies[i] * k_norm
138137
)
139138
chi_square += chi_square_value
140139

141-
row = {
142-
"Modality": all_modalities[i],
143-
"Reference_frequencies": expected_frequencies[i],
144-
"Actual_frequencies": actual_frequencies[i],
145-
"Chi_square": chi_square_value,
146-
}
140+
output_data.append(
141+
{
142+
"Modality": modality,
143+
"Reference_frequencies": expected_frequencies[i],
144+
"Actual_frequencies": actual_frequencies[i],
145+
"Chi_square": chi_square_value,
146+
}
147+
)
147148

148-
output_data = output_data.append(pd.Series(row), ignore_index=True)
149149
# if reference_series and actual_series has only one modality it turns nan (len(all_modalities)=1)
150150
if len(all_modalities) > 1:
151151
chi_cdf = chi2.cdf(chi_square, len(all_modalities) - 1)
152152
p_value = 1 - chi_cdf if chi_cdf != 0 else 0
153153
else:
154154
p_value = 0
155-
return chi_square, p_value, output_data
155+
return chi_square, p_value, pd.DataFrame(output_data)
156156

157157

158158
def _validate_feature_type(gsk_dataset, column_name, feature_type):

0 commit comments

Comments
 (0)