Skip to content

Commit ec0f740

Browse files
Merge pull request #1615 from sagar118/GSK-1567-add-number-to-word-transformation
[GSK-2280] Feature: Added Number-to-Words Transformation
2 parents af8dcbd + 1a52ffb commit ec0f740

5 files changed

Lines changed: 479 additions & 786 deletions

File tree

giskard/scanner/robustness/text_perturbation_detector.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Se
2727
from .text_transformations import (
2828
TextAccentRemovalTransformation,
2929
TextLowercase,
30+
TextNumberToWordTransformation,
3031
TextPunctuationRemovalTransformation,
3132
TextTitleCase,
3233
TextTypoTransformation,
@@ -39,5 +40,6 @@ def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Se
3940
TextTitleCase,
4041
TextTypoTransformation,
4142
TextPunctuationRemovalTransformation,
43+
TextNumberToWordTransformation,
4244
TextAccentRemovalTransformation,
4345
]

giskard/scanner/robustness/text_transformations.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import numpy as np
88
import pandas as pd
9+
from num2words import num2words
910

1011
from ...core.core import DatasetProcessFunctionMeta
1112
from ...datasets import Dataset
@@ -273,6 +274,18 @@ def _switch(self, word, language):
273274
return None
274275

275276

277+
class TextNumberToWordTransformation(TextLanguageBasedTransformation):
278+
name = "Transform numbers to words"
279+
280+
def _load_dictionaries(self):
281+
# Regex to match numbers in text
282+
self._regex = re.compile(r"(?<!\d/)(?<!\d\.)\b\d+(?:\.\d+)?\b(?!(?:\.\d+)?@|\d?/?\d)")
283+
284+
def make_perturbation(self, row):
285+
# Replace numbers with words
286+
return self._regex.sub(lambda x: num2words(x.group(), lang=row["language__gsk__meta"]), row[self.column])
287+
288+
276289
class TextReligionTransformation(TextLanguageBasedTransformation):
277290
name = "Switch Religion"
278291

0 commit comments

Comments
 (0)