|
6 | 6 | "collapsed": false |
7 | 7 | }, |
8 | 8 | "source": [ |
9 | | - "# German credit scoring [scikit-learn]\n", |
| 9 | + "# Wage classification [scikit-learn]\n", |
10 | 10 | "\n", |
11 | 11 | "Giskard is an open-source framework for testing all ML models, from LLMs to tabular models. Don’t hesitate to give the project a [star on GitHub](https://github.com/Giskard-AI/giskard) ⭐️ if you find it useful!\n", |
12 | 12 | "\n", |
|
75 | 75 | "from urllib.request import urlretrieve\n", |
76 | 76 | "\n", |
77 | 77 | "import pandas as pd\n", |
78 | | - "from sklearn.pipeline import Pipeline\n", |
79 | | - "from sklearn.metrics import accuracy_score\n", |
80 | 78 | "from sklearn.compose import ColumnTransformer\n", |
81 | 79 | "from sklearn.ensemble import RandomForestClassifier\n", |
| 80 | + "from sklearn.metrics import accuracy_score\n", |
82 | 81 | "from sklearn.model_selection import train_test_split\n", |
| 82 | + "from sklearn.pipeline import Pipeline\n", |
83 | 83 | "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", |
84 | 84 | "\n", |
85 | 85 | "from giskard import Model, Dataset, scan, testing, GiskardClient, Suite" |
|
111 | 111 | "TEST_RATIO = 0.2\n", |
112 | 112 | "\n", |
113 | 113 | "DROP_FEATURES = [\n", |
114 | | - " 'education', \n", |
115 | | - " 'native-country', \n", |
| 114 | + " 'education',\n", |
| 115 | + " 'native-country',\n", |
116 | 116 | " 'occupation',\n", |
117 | 117 | " 'marital-status',\n", |
118 | 118 | " 'educational-num'\n", |
|
229 | 229 | }, |
230 | 230 | "outputs": [], |
231 | 231 | "source": [ |
232 | | - "X_train, X_test, y_train, y_test = train_test_split(income_df.drop(columns=TARGET_COLUMN), income_df[TARGET_COLUMN], \n", |
| 232 | + "X_train, X_test, y_train, y_test = train_test_split(income_df.drop(columns=TARGET_COLUMN), income_df[TARGET_COLUMN],\n", |
233 | 233 | " test_size=TEST_RATIO, random_state=RANDOM_SEED)" |
234 | 234 | ] |
235 | 235 | }, |
|
257 | 257 | "source": [ |
258 | 258 | "raw_data = pd.concat([X_test, y_test], axis=1)\n", |
259 | 259 | "giskard_dataset = Dataset(\n", |
260 | | - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", |
| 260 | + " df=raw_data,\n", |
| 261 | + " # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", |
261 | 262 | " target=TARGET_COLUMN, # Ground truth variable.\n", |
262 | 263 | " name=\"salary_data\", # Optional.\n", |
263 | | - " cat_columns=CATEGORICAL_FEATURES # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", |
| 264 | + " cat_columns=CATEGORICAL_FEATURES\n", |
| 265 | + " # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", |
264 | 266 | ")" |
265 | 267 | ] |
266 | 268 | }, |
|
351 | 353 | "outputs": [], |
352 | 354 | "source": [ |
353 | 355 | "giskard_model = Model(\n", |
354 | | - " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", |
| 356 | + " model=pipeline,\n", |
| 357 | + " # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", |
355 | 358 | " model_type=\"classification\", # Either regression, classification or text_generation.\n", |
356 | 359 | " name=\"salary_cls\", # Optional.\n", |
357 | 360 | " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", |
|
618 | 621 | "outputs": [], |
619 | 622 | "source": [ |
620 | 623 | "# Create a Giskard client after having install the Giskard server (see documentation)\n", |
621 | | - "api_key = \"<Giskard API key>\" #This can be found in the Settings tab of the Giskard hub\n", |
| 624 | + "api_key = \"<Giskard API key>\" #This can be found in the Settings tab of the Giskard hub\n", |
622 | 625 | "#hf_token = \"<Your Giskard Space token>\" #If the Giskard Hub is installed on HF Space, this can be found on the Settings tab of the Giskard Hub\n", |
623 | 626 | "\n", |
624 | 627 | "client = GiskardClient(\n", |
|
0 commit comments