导论
卷积主要用于进行空间变换;卷积神经网络的用途是为了自动提取特征。
对图像进行卷积操作,本质上是对图像的某块的所有频道(即一个Batch)进行操作。操作会改变图像的$c,w,h$值。
通道
单通道
用Kernel阵遍历输入阵,对应进行数值乘法再求和,得到输出阵的一个元素。
图中的红色框称为“窗口”,而其特性就是滑动;进行卷积对应相乘运算并求得均值后,滑动窗便开始向右边滑动并根据步长的不同选择滑动幅度。
$\cdots$
import torch
inputs = torch.Tensor([
[3,4,6,5,7],
[2,4,6,8,2],
[1,6,7,8,4],
[9,7,4,6,2],
[3,7,5,4,1]
])
kernel = torch.Tensor([
[1,2,3],
[4,5,6],
[7,8,9]
])
m,n = inputs.shape
mk,nk = kernel.shape
outputs = torch.zeros((m-mk+1,n-nk+1))
mo,no = outputs.shape
for i in range(mo):
for j in range(no):
outputs[i,j] = torch.mul(inputs[i:i+mk,j:j+nk],kernel).sum()
3通道
本质上是将单通道的卷积累加。需要注意的是,输入Kernel的通道数要和输入的通道数相等。该操作将3通道的输入转化成1通道的输出。
import torch
inputs = torch.Tensor([
[3,4,6,5,7],
[2,4,6,8,2],
[1,6,7,8,4],
[9,7,4,6,2],
[3,7,5,4,1]
])
kernel = torch.Tensor([
[
[1,2,3],
[4,5,6],
[7,8,9]
],
[
[9,8,7],
[6,5,4],
[3,2,1]
],
[
[1,4,7],
[2,5,8],
[3,6,9]
],
])
m,n = inputs.shape
zk,mk,nk = kernel.shape
outputs = torch.zeros((m-mk+1,n-nk+1))
mo,no = outputs.shape
for k in range(zk):
for i in range(mo):
for j in range(no):
outputs[i,j] += torch.mul(inputs[i:i+mk,j:j+nk]
,kernel[k,:,:]).sum()
n通道
如果需要得到$m$通道的输出,则要准备$m$份的卷积阵。此时对应的权值是 $m_n_k_{width}*k_{height}$的四维量。
卷积层
import torch
in_channels,out_channels = 5,10
width, height = 100,100
kernel_size = 3
batch_size = 1
inputs = torch.randn(batch_size,in_channels,width,height)
conv_layer = torch.nn.Conv2d(in_channels,out_channels,
kernel_size=kernel_size)
outputs = conv_layer(inputs)
print("inputs.shape = {};
\noutputs.shape = {};
\nconv_layer.weight.shape = {}".format(inputs.shape,
outputs.shape,
conv_layer.weight.shape))
输出的维度:$n_{in}-kernel+1$
特征图
经过一系列卷积对应相乘,求均值运算后,把一张完整的feature map填满。
feature map是每一个feature从原始图像中提取出来的“特征”。其中的值,越接近为1表示对应位置和feature的匹配越完整,越是接近-1,表示对应位置和feature的反面匹配越完整,而值接近0的表示对应位置没有任何匹配或者说没有什么关联。
一个feature作用于图片产生一张feature map,对这张图来说,用的是3个feature,因此最终产生3个 feature map。
padding
padding,即边缘填充,可以分为四类:零填充,常数填充,镜像填充,重复填充。
import torch
import numpy as np
inputs = torch.Tensor([
3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1])
inputs = inputs.view(1,1,5,5)
conv_layer = torch.nn.Conv2d(1,1,kernel_size=3,padding=1,bias=False)
kernel = torch.Tensor(list(range(1,10))).view(1,1,3,3)
conv_layer.weight.data = kernel.data
outputs = conv_layer(inputs)
print(outputs)
以低光照增强任务为例,最终对比效果如下图。零填充会产生边缘伪影,而镜像填充很好地缓解了这一效应。
步长
也就是窗口移动的步福,比如设置步长为2:
import torch
import numpy as np
inputs = torch.Tensor([
3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1])
inputs = inputs.view(1,1,5,5)
conv_layer = torch.nn.Conv2d(1,1,kernel_size=3,
padding=0,stride=2,bias=False)
kernel = torch.Tensor(list(range(1,10))).view(1,1,3,3)
conv_layer.weight.data = kernel.data
outputs = conv_layer(inputs)
print(outputs)
池化层
卷积操作后,得到了一张张有着不同值的feature map,尽管数据量比原图少了很多,但还是过于庞大(比较深度学习动不动就几十万张训练图片),因此接下来的池化操作就可以发挥作用了,它最大的目标就是减少数据量。
池化分为两种:Max Pooling 最大池化、Average Pooling平均池化。顾名思义,最大池化就是取最大值,平均池化就是取平均值。
import torch
import numpy as np
inputs = torch.Tensor([
3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1])
inputs = inputs.view(1,1,5,5)
conv_layer = torch.nn.MaxPool2d(kernel_size=2,padding=0,stride=2)
outputs = conv_layer(inputs)
print(outputs)
在pytorch
中MaxPool2d
中的stride
默认值为2。
import torch
import numpy as np
inputs = torch.Tensor([
3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1])
inputs = inputs.view(1,1,5,5)
conv_layer = torch.nn.AvgPool2d(kernel_size=2)
outputs = conv_layer(inputs)
print(outputs)
一个简单的卷积神经网络
将全连接神经网络改写成卷积神经网络:
import torch
import torch.nn.functional as F # 使用激活函数
class Net(torch.nn.Module):
def __init__(self,):
super(Net,self).__init__()
self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5)
self.conv2 = torch.nn.Conv2d(10,20,kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
self.linear = torch.nn.Linear(320,10)
def forward(self,x):
# 将x从(n,1,28,28)转换到(n,784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size,-1)
x = self.linear(x)
return x
model = Net()
如何使用GPU
迁移模型到GPU
用来选择设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
将模型数据迁移到GPU上
迁移张量到GPU
inputs,target = inputs.to(device),target.to(device)
通过使用.to(device)
将数据迁移到GPU上
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[{:d},{:5d}] loss:{:.3f}'.format(epoch+1,batch_idx+1,
running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
outputs = model(inputs)
_,predicted = torch.max(outputs.data,dim=1)
total += target.size(0)
correct += (predicted == target).sum().item()
print("Accuracy on test set:{:.2%}".format(correct/total))
作业1
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
# 用于处理图像
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,))
])
# 导入训练数据
train_dataset = datasets.MNIST(root='./数据集/mnist/',train=True,download=True,
transform = transform)
# 打乱顺序
train_loader = DataLoader(train_dataset,
shuffle=True,
batch_size=batch_size)
# 导入测试数据
test_dataset = datasets.MNIST(root='./数据集/mnist/',train=False,download=True,
transform = transform)
# 打乱顺序
test_loader = DataLoader(test_dataset,
shuffle=True,
batch_size=batch_size)
class ImageNet(torch.nn.Module):
def __init__(self,):
super(ImageNet,self).__init__()
#(batch,1,28,28) -> (batch,10,24,24) -> (batch,10,24,24) ->
#(batch,10,12,12) -> (batch,20,10,10) -> (batch,20,10,10) ->
#(batch,20,5,5) -> (batch,30,4,4) -> (batch,30,4,4) -> (batch,30,2,2) -> (batch,120)
self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5)
self.conv2 = torch.nn.Conv2d(10,20,kernel_size=3)
self.conv3 = torch.nn.Conv2d(20,30,kernel_size=2)
self.pooling = torch.nn.MaxPool2d(2)
self.linear = torch.nn.Linear(120,10)
def forward(self,x):
batch_size = x.size(0)
x = self.pooling(F.relu(self.conv1(x)))
x = self.pooling(F.relu(self.conv2(x)))
x = self.pooling(F.relu(self.conv3(x)))
x = x.view(batch_size,-1)
x = self.linear(x)
return x
model = ImageNet()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[{:d},{:5d}] loss:{:.3f}'.format(epoch+1,batch_idx+1,
running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
outputs = model(inputs)
_,predicted = torch.max(outputs.data,dim=1)
total += target.size(0)
correct += (predicted == target).sum().item()
print("Accuracy on test set:{:.2%}".format(correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
高级篇
回顾
在这张图片会发现很多重复的,在构造时很复杂,为了减少代码冗余,可以定义为函数或者类。
Inception Module:类似于套娃,重复使用的部分定义为一个模型。
Inception Module
初衷是由于kernel_size
不知道定义多少合适,于是把全部需要的kernel_size
都定义了,类似于一个权重选取。其中Concatenate
是将这些模块沿着通道拼接在一起。
什么是1X1 convolution
$1\times 1$卷积能够跨越不同通道,进行信息融合。
使用$1\times 1$的卷积能够降低运算量。
Inception module
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
class InceptionModule(torch.nn.Module):
def __init__(self,in_channels):
super(InceptionModule,self).__init__()
self.branch1x1 = torch.nn.Conv2d(in_channels,16,kernel_size=1)
self.branch5x5_1 = torch.nn.Conv2d(in_channels,16,kernel_size=1)
self.branch5x5_2 = torch.nn.Conv2d(16,24,kernel_size=5,padding=2)
self.branch3x3_1 = torch.nn.Conv2d(in_channels,16,kernel_size=1)
self.branch3x3_2 = torch.nn.Conv2d(16,24,kernel_size=3,padding=1)
self.branch3x3_3 = torch.nn.Conv2d(24,24,kernel_size=3,padding=1)
self.branch_pool = torch.nn.Conv2d(in_channels,24,kernel_size=1)
def forward(self,x):
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3 = self.branch3x3_1(x)
branch3x3 = self.branch3x3_2(branch3x3)
branch3x3 = self.branch3x3_3(branch3x3)
branch_pool = F.avg_pool2d(x,kernel_size=3,stride=1,padding=1)
branch_pool = self.branch_pool(branch_pool)
outputs = [branch1x1,branch5x5,branch3x3,branch_pool]
return torch.cat(outputs,dim=1)
使用Inception Module
class Net(torch.nn.Module):
def __init__(self,):
super(Net,self).__init__()
self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5)
# 24x3+16 = 88
self.conv2 = torch.nn.Conv2d(88,20,kernel_size=5)
self.incep1 = InceptionModule(in_channels=10)
self.incep2 = InceptionModule(in_channels=20)
self.pool = torch.nn.MaxPool2d(2)
self.linear = torch.nn.Linear(1408,10)
def forward(self,x):
in_size = x.size(0)
x = F.relu(self.pool(self.conv1(x)))
x = self.incep1(x)
x = F.relu(self.pool(self.conv2(x)))
x = self.incep2(x)
x = x.view(in_size,-1)
x = self.linear(x)
return x
其中,self.linear = torch.nn.Linear(1408,10)
的1408可以通过代码计算,先跑一遍,计算x的大小即可。
结果
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[{:d},{:5d}] loss:{:.3f}'.format(epoch+1,batch_idx+1,
running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
outputs = model(inputs)
_,predicted = torch.max(outputs.data,dim=1)
total += target.size(0)
correct += (predicted == target).sum().item()
print("Accuracy on test set:{:.2%}".format(correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
深残余学习
在神经网络中,由于深度学习可能会设置很多层,这样可能会导致梯度消失,为了避免这种情况,一般采用relu
激活函数。
Residual Network
简单的残差网络
class ResidualBlock(torch.nn.Module):
def __init__(self,channels):
super(ResidualBlock,self).__init__()
self.channels = channels
self.conv1 = torch.nn.Conv2d(channels,channels,kernel_size=3,padding=1)
self.conv2 = torch.nn.Conv2d(channels,channels,kernel_size=3,padding=1)
def forward(self,x):
y = F.relu(self.conv1(x))
y = self.conv2(y)
return F.relu(x+y)
class Net(torch.nn.Module):
def __init__(self,):
super(Net,self).__init__()
self.conv1 = torch.nn.Conv2d(1,16,kernel_size=5)
self.conv2 = torch.nn.Conv2d(16,32,kernel_size=5)
self.mp = torch.nn.MaxPool2d(2)
self.rblock1 = ResidualBlock(16)
self.rblock2 = ResidualBlock(32)
self.linear = torch.nn.Linear(512,10)
def forward(self,x):
in_size = x.size(0)
x = self.mp(F.relu(self.conv1(x)))
x = self.rblock1(x)
x = self.mp(F.relu(self.conv2(x)))
x = self.rblock2(x)
x = x.view(in_size,-1)
x = self.linear(x)
return x
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[{:d},{:5d}] loss:{:.3f}'.format(epoch+1,batch_idx+1,
running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
inputs,target = data
inputs,target = inputs.to(device),target.to(device)
outputs = model(inputs)
_,predicted = torch.max(outputs.data,dim=1)
total += target.size(0)
correct += (predicted == target).sum().item()
print("Accuracy on test set:{:.2%}".format(correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
作业2
学习目标
复现经典工作(写代码与读代码是一个循环,跑代码只是证明环境配置成功了)
扩展阅读
CNN:https://zhuanlan.zhihu.com/p/27908027