"No such file" 使用 torchtext.data.TabularDataset 将存储在 G 驱动器中的 csv 数据加载为 torchtext 格式时,
"No such file" when loading csv data stored in G drive to torchtext format using torchtext.data.TabularDataset,
我在 G 盘中存储了一个 csv 文件,并尝试将其加载到 torchtext data.TabularDataset。错误信息是"FileNotFoundError: [Errno 2] No such file or directory: 'https://.....'"
是否无法从g盘直接加载csv文件到torchtext TabularDataset?
这是代码。我还制作了 public colab notebook 数据 public 可用。
import torch
from torchtext import data, datasets
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
TEXT = data.Field(tokenize = 'spacy', batch_first = True, lower=False)
LABEL = data.LabelField(sequential=False, dtype = torch.float)
train = data.TabularDataset(path = 'https://drive.google.com/open?id=1eWMjusU3H34m0uml5SdJvYX6gQuB8zta',
format = 'csv',
fields = [('Insult', LABEL), (None, None), ('Comment', TEXT)],
skip_header=False)
假设您有能力下载此 CSV 文件。我建议您在 torchtext 上使用功能内置的:download_from_url
.
import os
import torch
from torchtext import data, datasets
from torchtext.utils import download_from_url
# download the file
CSV_FILENAME = 'data.csv'
CSV_GDRIVE_URL = 'https://drive.google.com/uc?export=download&id=1eWMjusU3H34m0uml5SdJvYX6gQuB8zta'
download_from_url(CSV_GDRIVE_URL, CSV_FILENAME)
TEXT = data.Field(tokenize = 'spacy', batch_first = True, lower=False) #from torchtext import data
LABEL = data.LabelField(sequential=False, dtype = torch.float)
# if you're on Colab, you'll need this /content
train = data.TabularDataset(path=os.path.join('/content', CSV_FILENAME),
format='csv',
fields = [('Insult', LABEL), (None, None), ('Comment', TEXT)],
skip_header=False )
注意 Google 驱动器 link 不应该是带有 open?id
的驱动器,而是将其更改为 uc?export=download&id
。
我在 G 盘中存储了一个 csv 文件,并尝试将其加载到 torchtext data.TabularDataset。错误信息是"FileNotFoundError: [Errno 2] No such file or directory: 'https://.....'"
是否无法从g盘直接加载csv文件到torchtext TabularDataset?
这是代码。我还制作了 public colab notebook 数据 public 可用。
import torch
from torchtext import data, datasets
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
TEXT = data.Field(tokenize = 'spacy', batch_first = True, lower=False)
LABEL = data.LabelField(sequential=False, dtype = torch.float)
train = data.TabularDataset(path = 'https://drive.google.com/open?id=1eWMjusU3H34m0uml5SdJvYX6gQuB8zta',
format = 'csv',
fields = [('Insult', LABEL), (None, None), ('Comment', TEXT)],
skip_header=False)
假设您有能力下载此 CSV 文件。我建议您在 torchtext 上使用功能内置的:download_from_url
.
import os
import torch
from torchtext import data, datasets
from torchtext.utils import download_from_url
# download the file
CSV_FILENAME = 'data.csv'
CSV_GDRIVE_URL = 'https://drive.google.com/uc?export=download&id=1eWMjusU3H34m0uml5SdJvYX6gQuB8zta'
download_from_url(CSV_GDRIVE_URL, CSV_FILENAME)
TEXT = data.Field(tokenize = 'spacy', batch_first = True, lower=False) #from torchtext import data
LABEL = data.LabelField(sequential=False, dtype = torch.float)
# if you're on Colab, you'll need this /content
train = data.TabularDataset(path=os.path.join('/content', CSV_FILENAME),
format='csv',
fields = [('Insult', LABEL), (None, None), ('Comment', TEXT)],
skip_header=False )
注意 Google 驱动器 link 不应该是带有 open?id
的驱动器,而是将其更改为 uc?export=download&id
。