08-加载数据集
在pytorch
里面Dataset
是一个构造的数据集,DataLoader
用来加载数据集。
构造数据集
Dataset
是一个抽象类,需要定义一个类来继承。DataLoader
是用来导入数据的。
构造框架
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
class DiabetesDataset(Dataset):
def __init__(self,):
pass
def __getitem__(self,index): # dataset[index]
return
def __len__(self): # 数据数目
pass
dataset = DiabetesDataset()
train_loader = DataLoader(dataset=dataset,
batch_size=32,shuffle=True,num_workers=2)
定义数据集
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
class DiabetesDataset(Dataset):
def __init__(self,filepath):
xy = np.loadtxt(filepath,delimiter=',',dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:,-1])
self.y_data = torch.from_numpy(xy[:,[-1]])
def __getitem__(self,index): # dataset[index]
return self.x_data[index],self.y_data[index]
def __len__(self): # 数据数目
return self.len
dataset = DiabetesDataset('./数据集/diabetes/diabetes.csv.gz')
train_loader = DataLoader(dataset=dataset,
batch_size=32,
shuffle=True,num_workers=2)
训练数据
class Model(torch.nn.Module):
def __init__(self,):
super(Model,self).__init__()
self.linear1 = torch.nn.Linear(8,6)
self.linear2 = torch.nn.Linear(6,4)
self.linear3 = torch.nn.Linear(4,1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self,x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(),lr=0.1)
for epoch in range(100):
for i,data in enumerate(train_loader,0):
# prepare data
x,y = data
# forward
y_pred = model(x)
loss = criterion(y_pred,y)
print(epoch,i,loss.item())
# backward
optimizer.zero_grad()
loss.backward()
# update
optimizer.step()
运行后出现如下错误:
RuntimeError: DataLoader worker (pid(s) 20700, 7588) exited unexpectedly
因为torch.utils.data.DataLoader
中设置了num_works=2
,也就是多线程读取。pytorch
在Windows
下的多线程读取好像有点问题,将num_workers
改为0。
if __name__ == '__main__':
for epoch in range(10):
for i,data in enumerate(train_loader,0):
pass
# prepare data
x,y = data
# forward
y_pred = model(x)
loss = criterion(y_pred,y)
print(epoch,i,loss.item())
# backward
optimizer.zero_grad()
loss.backward()
# update
optimizer.step()
例子
手写数字集
最后更新于
这有帮助吗?