|
3 | 3 | import random |
4 | 4 | import re |
5 | 5 | from pathlib import Path |
6 | | -from num2words import num2words |
7 | 6 |
|
8 | 7 | import numpy as np |
9 | 8 | import pandas as pd |
| 9 | +from num2words import num2words |
10 | 10 |
|
11 | 11 | from ...core.core import DatasetProcessFunctionMeta |
12 | 12 | from ...datasets import Dataset |
@@ -148,22 +148,6 @@ def make_perturbation(self, text): |
148 | 148 | return "".join(pieces) |
149 | 149 |
|
150 | 150 |
|
151 | | -class TextNumberToWordTransformation(TextTransformation): |
152 | | - name = "Transform numbers to words" |
153 | | - |
154 | | - def __init__(self, column, lang="en"): |
155 | | - super().__init__(column) |
156 | | - # Target language |
157 | | - self.lang = lang |
158 | | - |
159 | | - # Regex to match numbers in text |
160 | | - self._regex = re.compile(r"(?<!\d/)(?<!\d\.)\b\d+(?:\.\d+)?\b(?!(?:\.\d+)?@|\d?/?\d)") |
161 | | - |
162 | | - def make_perturbation(self, text): |
163 | | - # Replace numbers with words |
164 | | - return self._regex.sub(lambda x: num2words(x.group(), lang=self.lang), text) |
165 | | - |
166 | | - |
167 | 151 | class TextLanguageBasedTransformation(TextTransformation): |
168 | 152 | needs_dataset = True |
169 | 153 |
|
@@ -226,6 +210,22 @@ def _switch(self, word, language): |
226 | 210 | return None |
227 | 211 |
|
228 | 212 |
|
| 213 | +class TextNumberToWordTransformation(TextLanguageBasedTransformation): |
| 214 | + name = "Transform numbers to words" |
| 215 | + |
| 216 | + def __init__(self, column, lang="en"): |
| 217 | + super().__init__(column) |
| 218 | + # Target language |
| 219 | + self.lang = lang |
| 220 | + |
| 221 | + # Regex to match numbers in text |
| 222 | + self._regex = re.compile(r"(?<!\d/)(?<!\d\.)\b\d+(?:\.\d+)?\b(?!(?:\.\d+)?@|\d?/?\d)") |
| 223 | + |
| 224 | + def make_perturbation(self, row): |
| 225 | + # Replace numbers with words |
| 226 | + return self._regex.sub(lambda x: num2words(x.group(), lang=row["language__gsk__meta"]), row[self.column]) |
| 227 | + |
| 228 | + |
229 | 229 | class TextReligionTransformation(TextLanguageBasedTransformation): |
230 | 230 | name = "Switch Religion" |
231 | 231 |
|
|
0 commit comments