【pytorch-learning】(二) 模型搭建-训练-测试

模型搭建

自定义模型

基本骨架为 torch.nn.Module

实现一个继承自Module的类,该类中主要包含init和forward方法。

通过forward方法完成网络的前向传递。

详细讲解见我记的B站小土堆的笔记:神经网络搭建

一个简单的代码示例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("dataset/",train=False,transform=torchvision.transforms.ToTensor(),download= True)
dataloader = DataLoader(dataset,batch_size=64)

class Model_conv(nn.Module):
def __init__(self):
super(Model_conv, self).__init__()
self.conv1 = Conv2d(in_channels=3,out_channels=3,kernel_size=3,stride=1,padding=0)
def forward(self,x):
x = self.conv1(x)
return x

model = Model_conv()
print(model)
for data in dataloader:
imgs,target = data
print(imgs.shape)
output = model(imgs)
print(output.shape)

torch.nn.Sequential使用示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Linear, Sequential
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, kernel_size=5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
nn.Flatten(),
Linear(1024, 64),
Linear(64, 10)
)
def forward(self,x):
x = self.model1(x)
return x
model = Model()

迁移学习

修改已有网络模型用于自己的任务

实例一:将VGG16用于数据集CIFAR10的分类

VGG16模型 的dataset是ImagNet

通过torchvision.models.vgg16(pretained=True)(pretained=True会将其权重也下载下来)将模型下载下来之后,默认保存路径是C:\Users\[usename].cache\torch\hub\checkpoints

VGG16用于1000分类(最后全连接层的输出是1000),使用数据集CIFAR10是需要10分类的。因此难点在于如何使用该VGG模型进行迁移。 下文代码展现了两种主要方式

方式一

1
2
3
4
5
6
7
8
9
10
11
import  torchvision
from torch import nn
# vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)

train_data = torchvision.datasets.CIFAR10("dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
##traindata是10分类,而vgg16是1000分类

vgg16_true.add_module('add_linear',nn.Linear(1000,10))
print(vgg16_true)

VGG模型添加了一层之后的结构

多了一层添加的:

(add_linear): Linear(in_features=1000, out_features=10, bias=True)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
(add_linear): Linear(in_features=1000, out_features=10, bias=True)
)

方式二

想要加到其中的classifier里面

1
vgg16_true.classifier.add_module('add_linear',nn.Linear(1000,10))

实例二:resnet18

获取网络中的模型,通过Sequential完成新模型的组建

1
2
3
4
5
6
7
from torchvision.models import resnet18 ##从网络获取模型
trained_model = resnet18(pretrained=True)
#*用于迭代取出list中的内容
model = nn.Sequential(*list(trained_model.children())[:-1],#[b,512,1,1]
nn.Flatten(),#[b,512,1,1]=>[b,512]
nn.Linear(512,5)
).to(device)

模型保存和读取

方式一

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import  torchvision
import torch

#模型
vgg16 = torchvision.models.vgg16(pretrained=False)

# ##保存方式1 模型结构+模型参数
# ### 参数:模型,路径

torch.save(vgg16,"vgg16_modelsave_1.pth")

###读取方式1>>保存方式1
##参数:路径
model = torch.load("vgg16_modelsave_1.pth")
print(model)

方式二(推荐)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import  torchvision
import torch

#模型
vgg16 = torchvision.models.vgg16(pretrained=False)
#保存方式2 模型参数(官方推荐)
##参数:模型.state_dict(),路径
##527M
torch.save(vgg16.state_dict(),"vgg16_modelsave_2.pth")

##读取方式2>>保存方式2
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_modelsave_2.pth"))
print(vgg16)

方式一的报错实例

报错过程:

  1. a.py中创建模型,并使用toch.save(model,”model path”)

  2. b.py中加载该模型:model = torch.load(“model path”)

解决方案:要让该文件能访问到该模型的定义。

  • 在直接加载前重新定义一下该模型

  • 或者 from model_save import * 即在头文件import一下该模型定义文件

模型训练

随机梯度下降

详细理论)

损失函数

通过损失函数计算经过训练的模型其预测结果与真实值的误差,loss越小说明二者越接近。

通过梯度下降搜索极值点。计算loss,通过loss.backward()反向传播进行自动求导获得梯度。

对于模型而言,变化的是各个权重参数w。

CrossEntropyLoss

使用实例

1
2
3
4
5
6
7
8
9
import  torch
from torch import nn
x = torch.tensor([0.1,0.8,0.1])
y = torch.tensor([1])

x = x.reshape(1,3)
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x,y)
print(result_cross)

优化器

决定梯度下降的方式。

使用主要关联四行代码:

  • optim = torch.optim.SGD(model.parameters(),lr=0.01) (SGD为一种优化器)

epoch 内

  • optim.zero_grad() ##将优化器梯度清零,每一次循环注意清零
  • result_loss.backward()
  • optim.step()

训练模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
for i in range(epoch):
print("----epoch {} starting----".format(i))
model.train()
##训练步骤开始
for data in train_dataloader:
imgs,targets = data
outputs = model(imgs)
loss = loss_fn(outputs,targets)

##优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_train_step+=1
if total_train_step % 100 == 0:
print("batch {}---loss:{}".format(total_train_step,loss.item()))


模型测试评估

将测试集传入训练好的模型得到预测输出。

将预测输出和测试集的label进行比较以评估该模型。

有标签的评估指标有:

  • 准确率
  • 召回率
  • 精确率
  • F1-score

另外,如ROC曲线,AUC,AP等也是常见的评估

自定义数据集实战

数据预处理与加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding:UTF-8 -*-


import torch
import os,glob
import random,csv
import time
from torch.utils.data import Dataset,DataLoader
from PIL import Image
from torchvision import transforms

class Pokemen(Dataset):
def __init__(self,root,resize,mode):
'''

:param root: 数据集根目录;
这次的存储形式是根目录下有文件夹,每个文件夹下是数据集图片,文件夹名称是对应的类别
:param resize:
:param mode: train\test\val
'''
super(Pokemen, self).__init__()
self.root = root
self.resize = resize

self.name2label = {}# 存储name对应编码的字典
#遍历根目录下文件夹名称
for name in sorted(os.listdir(os.path.join(root))):
if not os.path.isdir(os.path.join(root,name)):
# not a dir
continue
# 按照先后顺序获得编码
self.name2label[name] = len(self.name2label.keys())
print(self.name2label)

# 从csv文件中读取 image_path-label;
# load_csv方法实现将数据集条目:image_path-label 整理到csv文件中
self.images,self.labels = self.load_csv("image.csv")

#划分数据集
if mode == 'train': #60%
self.images = self.images[:int(0.6*len(self.images))]
self.labels = self.labels[:int(0.6*len(self.labels))]
elif mode == 'val': #20%
self.images = self.images[int(0.6 * len(self.images)):int(0.8*len(self.images))]
self.labels = self.labels[int(0.6 * len(self.labels)):int(0.8*len(self.labels))]
elif mode == 'test':
self.images = self.images[int(0.8 * len(self.images)):]
self.labels = self.labels[int(0.8 * len(self.labels)):]

def load_csv(self,filename):
'''
将数据集的image_path-label存储到csv文件中,并从csv文件加载
:param filename: 存储csv的文件路径
:return: images,labels==> images_path,labels
'''
savepath = os.path.join(self.root,filename)
if not os.path.exists(savepath):
images = []
for name in self.name2label.keys():
#获取对应文件夹下的图片文件,存储到list(images)内
images += glob.glob(os.path.join(self.root,name,'*.png'))
images += glob.glob(os.path.join(self.root, name, '*.jpg'))
images += glob.glob(os.path.join(self.root, name, '*.gif'))
#print(images)
#'./pokeman/squirtle\\00000073.png'
random.shuffle(images) #shuffle
#写入csv
with open(savepath,mode='w',newline='') as f:
writer = csv.writer(f)
for img in images:
name = img.split(os.sep)[-2]
label = self.name2label[name]
writer.writerow([img,label])
print('written into csv file:',savepath)

#从csv文件读取
images,labels = [],[]
with open(savepath) as f:
reader = csv.reader(f)
for row in reader:
img,label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(images) == len(labels)

return images,labels

def __len__(self):
'''
:return: 数据集长度
'''
return len(self.images)

def __getitem__(self, item):
'''

:param item: range in [0,len(images)]
:return: self.images,self.labels
'''

img,label = self.images[item],self.labels[item]

tf = transforms.Compose(
[lambda x:Image.open(x).convert('RGB'), # open image and convert to RGB
transforms.Resize((int(self.resize*1.25),int(self.resize*1.25))),
transforms.RandomRotation(15),
transforms.CenterCrop(self.resize),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]),#values computed from ImageNet,we could use it in other dataset

]
)

img = tf(img)
label = torch.tensor(label)
return img,label
db = Pokemon('./pokeman', 224, 'train')
x,y = next(iter(db))
print(x.shape,y.shape)
{'.ipynb_checkpoints': 0, 'bulbasaur': 1, 'charmander': 2, 'mewtwo': 3, 'pikachu': 4, 'squirtle': 5}
written into csv file:  ./pokeman\image.csv
torch.Size([3, 224, 224]) torch.Size([])

数据集加载:DataLoader

1
2
3
4
5
6
7
import  visdom
import torchvision
import os
db = Pokemon('./pokeman', 224, 'train')
x,y = next(iter(db))
print(x.shape,y.shape)
loader = DataLoader(db,batch_size=32,shuffle=True)

网络创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#Resnet.py
import torch
from torch import nn
from torch.nn import functional as F
class ResBlk(nn.Module):
"""
resnet block
"""
def __init__(self,ch_in,ch_out,stride=1):
"""

:param ch_in:[b,ch,h,w]
:param ch_out:
"""
super(ResBlk, self).__init__()
self.conv1 = nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=stride,padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)

self.extra = nn.Sequential()

if ch_out!=ch_in:
self.extra = nn.Sequential(
nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=stride),
nn.BatchNorm2d(ch_out)
)

def forward(self,x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))

##short cut
#extra module:[b,ch_in,h,w]=>[b,ch_out,h,w]
#element-wise add
#
out = self.extra(x) +out
return out


class ResNet18(nn.Module):

def __init__(self,num_class):
super(ResNet18, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3,16,kernel_size=3,stride=3,padding=0),
nn.BatchNorm2d(16)
)
#followed 4 blocks
#[b,16,h,w]=>[b,32,h,w]
self.blk1 =ResBlk(16,32,stride=3)
# [b,32,h,w]=>[b,64,h,w]
self.blk2 = ResBlk(32,64,stride=3)
#[b,64,h,w]=>[b,128,h,w]
self.blk3 = ResBlk(64,128,stride=2)
# [b,128,h,w]=>[b,256,h,w]
self.blk4 = ResBlk(128,256,stride=2)
self.flatten = nn.Flatten()
self.outlayer = nn.Linear(256*3*3,num_class)

def forward(self,x):
"""

:param x:
:return:
"""
x = F.relu(self.conv1(x))
#[b,64,h,w]=>[b,1024,h,w]
x = self.blk1(x)
x = self.blk2(x)
x = self.blk3(x)
x = self.blk4(x)
print("after conv:",x.shape)
# x = F.adaptive_max_pool2d(x,[1,1])
# print("after pool,",x.shape)
x = self.flatten(x)
x = self.outlayer(x)

return x


def main():
blk = ResBlk(64,128)
tmp = torch.rand(2,64,224,224)
out = blk(tmp)
print('block',out.shape)


x = torch.rand(2,3,224,224)
model = ResNet18(5)
out = model(x)
print(out.shape)

p = sum(map(lambda p:p.numel(),model.parameters()))
print('parameters size:',p)

if __name__ == '__main__':
main()

Train and test

1
2
3
4
5
6
7
8
9
10
11
12
for epoch in range(epoch):
train(train_db)
if epoch%10==0:
val_acc = evaluate(val_db)

if val_loss is the best:
save_ckpt()
if out_of_patience():
break

load_ckpt()# checkpoint model
test_acc = evaluate(test_db)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#train_scratch.py
import torch
from torch import optim,nn
import torchvision
from torch.utils.data import DataLoader

from pokeman import Pokemon
from Resnet import ResNet18

batchsz = 32
lr = 1e-2
epochs = 10

device = torch.device('cuda')
torch.manual_seed(1234)##随机种子


train_db = Pokemon('pokeman',224,mode='train')
val_db = Pokemon('pokeman',224,mode='val')
test_db = Pokemon('pokeman',224,mode='test')
train_loader = DataLoader(train_db,batch_size=batchsz,shuffle=True,num_workers=4)
val_loader = DataLoader(val_db,batch_size=batchsz,shuffle=True,num_workers=2)
test_loader = DataLoader(test_db,batch_size=batchsz,shuffle=True,num_workers=2)

def evaluate(model,loader):
correct = 0
total = len(loader.dataset)
for x,y in loader:
x,y = x.to(device),y.to(device)
with torch.no_grad():
logits = model(x)
pred = logits.argmax(dim=1)
correct += torch.eq(pred,y).sum().float().item()
return correct/total


def main():
model = ResNet18(5).to(device)
optimizer = optim.Adam(model.parameters(),lr=lr)
criteon = nn.CrossEntropyLoss()

best_acc,best_epoch=0,0
global_step=0
for epoch in range(epochs):
for step,(img,label) in enumerate(train_loader):

# x:[b,3,224,224], y:[5]
img,label = img.to(device),label.to(device)

logits = model(img)
loss = criteon(logits,label)

optimizer.zero_grad()
loss.backward()
optimizer.step()


if epoch%1 ==0:
val_acc = evaluate(model,val_loader)
if val_acc>best_acc:
best_epoch = epoch
best_acc = val_acc

torch.save(model.state_dict(),'best.mdl')
viz.line([val_acc], [epoch], win='val_acc', update='append')
print('best acc:',best_acc,'best epoch',best_epoch)

model.load_state_dict(torch.load('best.mdl'))
print('loaded from checkpoint!')

test_acc = evaluate(model,test_loader)
print('test acc',test_acc)


if __name__ == '__main__':
main()

best acc: 0.8583690987124464 best epoch 5

test acc 0.8497854077253219

optional: 迁移学习

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import  torch

from torch import optim,nn
import visdom

import torchvision
from torch.utils.data import DataLoader

from pokeman import Pokemon
from torchvision.models import resnet18 ##从网络获取模型

batchsz = 32
lr = 1e-2
epochs = 10

device = torch.device('cuda')
torch.manual_seed(1234)##随机种子


train_db = Pokemon('pokeman',224,mode='train')
val_db = Pokemon('pokeman',224,mode='val')
test_db = Pokemon('pokeman',224,mode='test')
train_loader = DataLoader(train_db,batch_size=batchsz,shuffle=True,num_workers=4)
val_loader = DataLoader(val_db,batch_size=batchsz,shuffle=True,num_workers=2)
test_loader = DataLoader(test_db,batch_size=batchsz,shuffle=True,num_workers=2)

viz = visdom.Visdom()

def evaluate(model,loader):
correct = 0
total = len(loader.dataset)
for x,y in loader:
x,y = x.to(device),y.to(device)
with torch.no_grad():
logits = model(x)
pred = logits.argmax(dim=1)
correct = torch.eq(pred,y).sum().float().item()
return correct/total


def main():
# model = ResNet18(5).to(device)
#-----------------------------------------------------------------------------------
trained_model = resnet18(pretrained=True)
#*用于迭代取出list中的内容
model = nn.Sequential(*list(trained_model.children())[:-1],#[b,512,1,1]
nn.Flatten(),#[b,512,1,1]=>[b,512]
nn.Linear(512,5)
).to(device)
# x = torch.rand(2,3,224,224)
# print(model(x).shape)
#---------------------------------------------------------------------------------------
optimizer = optim.Adam(model.parameters(),lr=lr)
criteon = nn.CrossEntropyLoss()

best_acc,best_epoch=0,0
viz.line([0],[-1],win='loss',opts=dict(title='loss'))
viz.line([0],[-1],win='val_acc',opts=dict(title='val_acc'))
global_step=0
for epoch in range(epochs):
for step,(img,label) in enumerate(train_loader):

# x:[b,3,224,224], y:[5]
img,label = img.to(device),label.to(device)

logits = model(img)
loss = criteon(logits,label)

optimizer.zero_grad()
loss.backward()
optimizer.step()
viz.line([loss.item()], [global_step], win='loss', update='append')
global_step+=1

if epoch%1 ==0:
val_acc = evaluate(model,val_loader)
if val_acc>best_acc:
best_epoch = epoch
best_acc = val_acc

torch.save(model.state_dict(),'best.mdl')
viz.line([val_acc], [epochs], win='val_acc', update='append')
print('best acc:',best_acc,'best epoch',best_epoch)

model.load_state_dict(torch.load('best.mdl'))
print('loaded from checkpoint!')

test_acc = evaluate(model,test_loader)
print('test acc',test_acc)


if __name__ == '__main__':
main()

best acc: 0.8412017167381974 best epoch 8

test acc 0.8025751072961373