How to use HolaVPN to resolve googletrans's JSON Decode error: line 1 column 1 (char 0)
How to use HolaVPN to resolve googletrans's JSON Decode error: line 1 column 1 (char 0)
我有一个 18k 行的越南语数据集,我正在尝试使用 googletrans 模块将其翻译成英语。
from googletrans import Translator
translator = Translator()
def trans_text(df, text_field):
df[text_field] = df[text_field].apply(translator.translate, src='vi', dest='en').apply(getattr, args=('text',))
return df
trans_text(df_train.sample(1), "question")
我最终遇到以下 JSONDecode 错误:
JSONDecodeError Traceback (most recent call last)
<ipython-input-21-d6791d78575e> in <module>()
24 df[text_field] = df[text_field].apply(translator.translate, src='vi', dest='en').apply(getattr, args=('text',))
25 return df
---> 26 trans_text(df_train.sample(1), "question")
27
28
<ipython-input-21-d6791d78575e> in trans_text(df, text_field)
22
23 def trans_text(df, text_field):
---> 24 df[text_field] = df[text_field].apply(translator.translate, src='vi', dest='en').apply(getattr, args=('text',))
25 return df
26 trans_text(df_train.sample(1), "question")
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
4198 else:
4199 values = self.astype(object)._values
-> 4200 mapped = lib.map_infer(values, f, convert=convert_dtype)
4201
4202 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/pandas/core/series.py in f(x)
4183
4184 def f(x):
-> 4185 return func(x, *args, **kwds)
4186
4187 else:
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/client.py in translate(self, text, dest, src)
170
171 origin = text
--> 172 data = self._translate(text, dest, src)
173
174 # this code will be updated when the format is changed.
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/client.py in _translate(self, text, dest, src)
79 r = self.session.get(url, params=params)
80
---> 81 data = utils.format_json(r.text)
82 return data
83
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/utils.py in format_json(original)
60 converted = json.loads(original)
61 except ValueError:
---> 62 converted = legacy_format_json(original)
63
64 return converted
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/utils.py in legacy_format_json(original)
52 text = text[:p] + states[j][1] + text[nxt:]
53
---> 54 converted = json.loads(text)
55 return converted
56
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/json/decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/json/decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
我理解这是由于我的 IP 被禁止而引起的。我查找了规避此问题的方法,发现使用 VPN 值得一试。我已经安装了 HolaVPN。但是,作为这一切的新手,我不确定如何重现与 hola 中相同的步骤。关于要遵循的明确程序的任何提示都会有很大帮助。谢谢。
库发出请求并且不检查状态代码就假定请求成功:
79 r = self.session.get(url, params=params)
80
---> 81 data = utils.format_json(r.text)
现在,Google 可能出于任何原因不喜欢您的请求,并且 return 带有匹配状态代码 (4xx, 5xx) 的错误消息。该库仍然尝试将正文解析为 JSON,这不起作用,因为没有正文并引发与原始问题无关的 JSONDecodeError
。你看不到真正的原因。
结论:googletrans
库缺少关键的错误处理。您可能需要对其进行编辑并自行添加错误处理。
我有一个 18k 行的越南语数据集,我正在尝试使用 googletrans 模块将其翻译成英语。
from googletrans import Translator
translator = Translator()
def trans_text(df, text_field):
df[text_field] = df[text_field].apply(translator.translate, src='vi', dest='en').apply(getattr, args=('text',))
return df
trans_text(df_train.sample(1), "question")
我最终遇到以下 JSONDecode 错误:
JSONDecodeError Traceback (most recent call last)
<ipython-input-21-d6791d78575e> in <module>()
24 df[text_field] = df[text_field].apply(translator.translate, src='vi', dest='en').apply(getattr, args=('text',))
25 return df
---> 26 trans_text(df_train.sample(1), "question")
27
28
<ipython-input-21-d6791d78575e> in trans_text(df, text_field)
22
23 def trans_text(df, text_field):
---> 24 df[text_field] = df[text_field].apply(translator.translate, src='vi', dest='en').apply(getattr, args=('text',))
25 return df
26 trans_text(df_train.sample(1), "question")
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
4198 else:
4199 values = self.astype(object)._values
-> 4200 mapped = lib.map_infer(values, f, convert=convert_dtype)
4201
4202 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/pandas/core/series.py in f(x)
4183
4184 def f(x):
-> 4185 return func(x, *args, **kwds)
4186
4187 else:
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/client.py in translate(self, text, dest, src)
170
171 origin = text
--> 172 data = self._translate(text, dest, src)
173
174 # this code will be updated when the format is changed.
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/client.py in _translate(self, text, dest, src)
79 r = self.session.get(url, params=params)
80
---> 81 data = utils.format_json(r.text)
82 return data
83
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/utils.py in format_json(original)
60 converted = json.loads(original)
61 except ValueError:
---> 62 converted = legacy_format_json(original)
63
64 return converted
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/site-packages/googletrans/utils.py in legacy_format_json(original)
52 text = text[:p] + states[j][1] + text[nxt:]
53
---> 54 converted = json.loads(text)
55 return converted
56
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/json/decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
/opt/anaconda3/envs/sam-pycaret/lib/python3.6/json/decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
我理解这是由于我的 IP 被禁止而引起的。我查找了规避此问题的方法,发现使用 VPN 值得一试。我已经安装了 HolaVPN。但是,作为这一切的新手,我不确定如何重现与 hola
库发出请求并且不检查状态代码就假定请求成功:
79 r = self.session.get(url, params=params)
80
---> 81 data = utils.format_json(r.text)
现在,Google 可能出于任何原因不喜欢您的请求,并且 return 带有匹配状态代码 (4xx, 5xx) 的错误消息。该库仍然尝试将正文解析为 JSON,这不起作用,因为没有正文并引发与原始问题无关的 JSONDecodeError
。你看不到真正的原因。
结论:googletrans
库缺少关键的错误处理。您可能需要对其进行编辑并自行添加错误处理。