手写体识别

手写体识别

这个手写体识别我是用的是Pytorch库,有两种实现方法,第一种是:简单的神经网络,第二种是:卷积神经网络

简单神经网络的实现

加载库

1
2
3
4
5
6
7
8
9
10
# 加载必要的库
import torch
# nn模块,有一些模型
import torch.nn as nn
# nn中的functional模块,里面包含损失函数等
import torch.nn.functional as F
# 优化器
import torch.optim as optim
# 数据处理
from torchvision import datasets, transforms

定义超参数

1
2
3
4
5
6
# 定义超参数
# 参数:模型f(x, θ)中的θ称作模型的参数,可以通过优化算法进行学习
# 超参数:用来定义模型结构或优化策略
BATCH_SIZE = 16 # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")# 用来决定是用cpu计算还是GPU计算
EPOCHS = 10 # 训练数据的轮次

图像处理:

1
2
3
4
5
# 构建pipeline, 对图像进行处理
pipeline = transforms.Compose([
transforms.ToTensor(), # 将图像转换成tensor格式
transforms.Normalize((0.1307),(0.3081)) # 进行数据标准化
])

下载数据集

1
2
3
4
5
6
7
8
9
10
# 下载数据集
from torch.utils.data import DataLoader

# 从datasets中的MNIST模块中下载数据集,并将数据做 pipeline 中的处理
train_set = datasets.MNIST("data", train = True, download = True, transform=pipeline)
test_set = datasets.MNIST("data", train = False, download = True, transform=pipeline)

# 加载数据,通过使用DataLoader来处理数据,能够让数据一个BATCH一个BATCH的进行读取,shuffle是进行随机组合
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

我们先看一下train_set中的 数据长什么样子:

1
2
3
4
5
for data, target in train_set:
print(data)
print(target)
print(len(data))
break

最后结果为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
tensor([[[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
.
.
.
[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242]]])
# 这个数据代表的数字
5
# 这个数据的长度
1

如果经过了DataLoader之后我们再看一下数据的格式

1
2
3
4
5
for data, target in train_loader:
print(data)
print(target)
print(len(data))
break

最后结果为:可以看出现在每一个data里面包含16个数据,就是我们之前设置的BATCH_SIZE的大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
tensor([[[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
...,
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242]]],


[[[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
...,
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242]]],


[[[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
...,
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242]]],


...,


[[[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
...,
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242]]],


[[[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
...,
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242]]],


[[[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
...,
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242],
[-0.4242, -0.4242, -0.4242, ..., -0.4242, -0.4242, -0.4242]]]])
tensor([9, 1, 1, 3, 4, 9, 0, 4, 2, 7, 4, 1, 0, 5, 3, 7])
16

数据展示

从我们刚才下载好的数据中读取一个图像数据

1
2
with open("./data/mnist/raw/train-images-idx3-ubyte", "rb") as f:
file = f.read()

数据展示

1
2
3
4
5
6
7
image1 = [int(str(item).encode("ascii"), 12) for item in file[16:16+784]]
m = 0
for i in image1:
m += 1
print("%3d"%(i),end="")
if m % 28 == 0:
print()

image-20220719152703501

图像展示:

1
2
3
4
5
6
7
8
import cv2
import numpy as np
import matplotlib.pyplot as plt

# 将数组转换成 28*28的这样是一个图像的大小
image1_np = np.array(image1, dtype=np.uint8).reshape(28,28)
# imshow展示图像
plt.imshow(image1_np, cmap="gray")

image-20220719153120794

构建神经网络

我们就是按照这个神经网络来进行构建的

image-20220719153349262

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 构建网络模型1 单纯的神经网络
# 这里我们创建一个类,并且继承与nn.Module

class Digit1(nn.Module):
def __init__(self):
super().__init__()
# 隐层一,需要的权重矩阵
self.hidden1 = nn.Linear(784, 128)
# 隐层二:需要的权重矩阵
self.hidden2 = nn.Linear(128, 256)
# 因为我们输入的这张图片有10中可能,所以我们输出的是10
self.out = nn.Linear(256, 10)

# 构建向前传播的函数
def forward(self, x):
# 我们在每一层之间都要加上一个激活函数,用来进行非线性变化
# 这里我们需要进行reshape操作,因为我们读进来的是n * width * height 格式的数据,如果我们需要进行,简单神经网络,来训练模型的话,我们需要将其,变成一个一维的数据
x = x.reshape(-1, 784)
x = F.relu(self.hidden1())
x = F.relu(self.hidden2(x))
x = self.out(x)
return x

定义优化器

1
2
3
# 定义优化器
model = Digit1().to(DEVICE)
optimizer = optim.Adam(model.parameters())

定义训练和测试方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# 定义训练方法
def train_model(model, device, train_loader, optimizer, epoch):
# 模型训练
# 我们在训练模型之前,都需要进行model.train()来进行初始化
model.train()
# 开始从训练模块中取数据
for batch_index, (data, target) in enumerate(train_loader):
# 部署到DEVICE上去
data, target = data.to(device), target.to(device)
# 梯度初始化为零
optimizer.zero_grad()
# 训练后的结果
output = model(data)
# 计算损失(预测值和真实值之间的差异)
loss = F.cross_entropy(output, target) # 传入真实值和预测值
# 反向传播
loss.backward()
# 参数的优化
optimizer.step()

# 每3000输出一次
if batch_index % 3000 == 0:
print("Train Epoch: {}\t Loss: {:.6f}".format(epoch, loss.item()))


# 定义测试方法
def test_model(model, device, test_loader):
# 模型的验证,在进行模型验证的时候我们都需要使用eval函数来进行初始化
model.eval()
# 正确率
correct = 0.0
# 测试损失
test_loss = 0.0
# 因为我们是测试集,model都已经训练好了,所以我们不需要在进行梯度的运算和方向传播了
with torch.no_grad(): # 不会计算梯度,也不会进行反向传播
for data, target in test_loader:
# 部署到device上
data, target = data.to(device), target.to(device)
# 测试数据
output = model(data)
# 计算测试损失
test_loss += F.cross_entropy(output, target).item()
# 找到概率最大值的下标
pred = output.max(1, keepdim=True)[1]

# 累计正确值
correct += pred.eq(target.view_as(pred)).sum().item()
# 取测试集损失的凭据值
test_loss /= len(test_loader.dataset)
print("Test - Average loss : {:.4f}, Accuracy : {:.3f}\n".format(
test_loss, 100.0*correct / len(test_loader.dataset)))

验证最后结果

1
2
3
for epoch in range(1, EPOCHS + 1):
train_model(model, DEVICE, train_loader, optimizer, epoch)
test_model(model, DEVICE, test_loader)

训练结果为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
Train Epoch: 1	 Loss: 0.046003
Train Epoch: 1 Loss: 0.026450
Test - Average loss : 0.0066, Accuracy : 96.790

Train Epoch: 2 Loss: 0.265133
Train Epoch: 2 Loss: 0.016022
Test - Average loss : 0.0073, Accuracy : 96.610

Train Epoch: 3 Loss: 0.456771
Train Epoch: 3 Loss: 0.009684
Test - Average loss : 0.0063, Accuracy : 97.180

Train Epoch: 4 Loss: 0.030720
Train Epoch: 4 Loss: 0.016792
Test - Average loss : 0.0059, Accuracy : 97.470

Train Epoch: 5 Loss: 0.007621
Train Epoch: 5 Loss: 0.072565
Test - Average loss : 0.0063, Accuracy : 97.490

Train Epoch: 6 Loss: 0.325914
Train Epoch: 6 Loss: 0.000457
Test - Average loss : 0.0092, Accuracy : 96.870

Train Epoch: 7 Loss: 0.054087
Train Epoch: 7 Loss: 0.010863
Test - Average loss : 0.0070, Accuracy : 97.730

Train Epoch: 8 Loss: 0.029681
Train Epoch: 8 Loss: 0.000008
Test - Average loss : 0.0072, Accuracy : 97.560

Train Epoch: 9 Loss: 0.001351
Train Epoch: 9 Loss: 0.106437
Test - Average loss : 0.0076, Accuracy : 97.640

Train Epoch: 10 Loss: 0.191070
Train Epoch: 10 Loss: 0.001624
Test - Average loss : 0.0078, Accuracy : 97.820

可以看出通过10轮训练,我们可以得到最后的准确率达到了97.82,其实还不算高,我们可以设置更多的轮数来提高准确率

卷积神经网络实现

卷积神经网络和简单神经网络的区别点在于,卷积神经网络传入网络的是一张图像,而不是一些特征,所以我们在传入的网络的时候不需要进行reshape操作

加载库

1
2
3
4
5
6
7
8
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

首先读取数据

  • 分别构建训练集和测试集(验证集)
  • DataLoader来迭代取数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 定义超参数
input_size = 28 # 图像的总尺寸 28 * 28
num_classes = 10 # 标签的种类数
num_epochs = 3 # 训练的总循环周期
batch_size = 64 # 一个批次的大小, 64张图片

# 训练集
train_dataset = datasets.MNIST(root = "./data",
train=True,
transform=transforms.ToTensor(),
download=True
)

# 测试集
test_dataset = datasets.MNIST(root = "./data",
train=False,
transform=transforms.ToTensor(),
)

# 构建一个batch数据
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
batch_size=batch_size,
shuffle=True)

这里DataLoader的功能和上面简单神经网络中的功能一致

卷积神经网络的构建

  • 一般卷积层,relu层,池化层,可以写成一个套餐
  • 注意卷积最后结果还是一个特征图,需要把图转换成向量,才能做分类,后者回归任务
  • nn.Sequential()函数是构建一个Sequential容器,模块按照构造函数中传递的顺序添加的模块中。

下面说一下不使用 nn.Sequential() 函数和使用nn.Sequential() 函数的区别

不使用这个函数我们这样构建网络:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import torch
import torch.nn as nn

class Net(nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = nn.Linear(n_feature, n_hidden)
self.predict = nn.Linear(n_hidden, n_output)

def forward(self, x):
x = F.relu(self.hidden(x)) # hidden后接relu层
x = self.predict(x)
return x

model_1 = Net(1, 10, 1)
print(model_1)

使用这个函数的时候我们构建网络如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch
import torch.nn as nn

class Net(nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net,self).__init__()
self.net_1 = nn.Sequential(
nn.Linear(n_feature, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_output)
)

def forward(self,x):
x = self.net_1(x)
return x

model_2 = Net(1,10,1)
print(model_2)

使用torch.nn.Sequential会自动加入激励函数, 但是 model_1 中, 激励函数实际上是在 forward() 功能中才被调用的。这就是使用Sequential函数的好处

下面就是我们使用Sequential模块来构建CNN网络
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# 这里和上面一样,如果我们想要构建一个网络模型的话,我们就需要继承与nn.Module
class CNN(nn.Module):
def __init__(self):
super().__init__()

# 先进行隐层1
self.conv1 = nn.Sequential( # 输入大小1 * 28 * 28
# 先进行卷积层
nn.Conv2d(
in_channels=1, # 灰度图, 输入特征图的个数
out_channels=16, # 要得到多少个特征图, 输出特征图的个数
kernel_size = 5, # 卷积核的大小
stride = 1, # 步长
padding = 2, # 如果希望卷积后大小跟原来的一样,需要设置padding=(kernel_size-1)/2 if stride = 1
), # 输出的特征图为(16,28,28) 28 = 28 - 5 + 2*2 + 1

# 在进行ReLU层,使用ReLU激活函数进行非线性变换
nn.ReLU(), # relu层

# 最大池化层,进行降维,方便计算,并且提高所提取特征的鲁棒性
nn.MaxPool2d(kernel_size = 2) # 进行池化操作,(2 * 2区域), 输出结果:(16, 14, 14)
)

# 在进行隐层2
self.conv2 = nn.Sequential( # 下一个套餐输入(16, 14, 14)
# 先进行卷积层
nn.Conv2d(16,32,5,1,2), # 输出 (32, 14, 14)
# 使用ReLU激活函数
nn.ReLU(), # relu层
# 最大池化层
nn.MaxPool2d(2) # 输出 (32, 7, 7)
)

# 进行全连接层,提出最后特征
self.out = nn.Linear(32*7*7, 10) # 全连接层得到的结果


# 设置向前传递的函数
def forward(self, x):
# 进行隐层1
x = self.conv1(x)

# 进行隐层2
x = self.conv2(x)

# 因为在卷积中我们传入的是一个三维图像,所以在进行全连接层之前,我们需要将其转换成一位向量
x = x.view(x.size(0), -1) # flatten操作,结果为:(batch_size, 32*7*7)
output = self.out(x)
return output

计算准确率作为评估标准

1
2
3
4
5
6
7
8
def accuracy(predictions, labels):
# 我们找出预测函数中,概率最大的哪一个,并记录下它的下标
pred = torch.max(predictions.data, 1)[1]
# 我们从预测的标签中找出和实际标签中相同的标签,并计算这样的总和
rights = pred.eq(labels.data.view_as(pred)).sum()

# 最后返回相同标签的个数
return rights, len(labels)

训练网络模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# 实例化
net = CNN()

# 损失函数
criterion = nn.CrossEntropyLoss()

# 优化器
optimizer = optim.Adam(net.parameters(), lr = 0.001) # 定义优化器,普通的随机梯度下降算法

# 开始训练,一共训练num_epochs 次
for epoch in range(num_epochs):
# 当前epoch的结果保存下来
# 计算所有预测准确的个数
train_rights = []

# 将每一个训练集都进行预测
for batch_idx, (data, target) in enumerate(train_loader): # 针对容器中的每一个批次进行循环
# 我们在进行模型训练的时候,都要先进行.train() 操作,进行训练初始化
net.train()

# 得出预测结果
output = net(data)

# 计算损失值
loss = criterion(output,target)

# 将梯度设置为零
optimizer.zero_grad()

# 反向传播更新参数
loss.backward()

# 优化参数
optimizer.step()

# 计算准确的个数
right = accuracy(output, target)

# 将准确的个数保存下来
train_rights.append(right)

# 当我们训练100个Batch之后我们就打印一下结果
if batch_idx % 100 == 0:
# 开始进行预测,在预测之前,我们需要进行.eval() 进行预测初始化
net.eval()
# 预测的准确的个数
val_rights = []

# 循环每一个测试集
for (data , target) in test_loader:
# 获取预测结果
output = net(data)

# 计算准确个数
right = accuracy(output, target)

# 记录下来
val_rights.append(right)

# 准确率计算
# 第一个参数是准确的个数, 第二个参数是一共有多少个参加了计算准确率这个函数
train_r = (sum([tup[0] for tup in train_rights]), sum([tup[1] for tup in train_rights]))
val_r = (sum([tup[0] for tup in val_rights]), sum([tup[1] for tup in val_rights]))

print("当前epoch: {} [{}/{} ({:.0f} % )]\t损失:{:.6f}\t训练集准确率:{:.2f}%\t 测试集准确率: {:.2f}%".format(
epoch, batch_idx*batch_size, len(train_loader.dataset),
100. * batch_idx / len(train_loader),
loss.data,
100. * train_r[0].numpy() / train_r[1],
100. * val_r[0].numpy() / val_r[1]
))

验证最后结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
当前epoch: 0 [0/60000  (0 % )]	损失:2.306798	训练集准确率:7.81%	 测试集准确率: 10.78%
当前epoch: 0 [6400/60000 (11 % )] 损失:0.315599 训练集准确率:76.72% 测试集准确率: 92.71%
当前epoch: 0 [12800/60000 (21 % )] 损失:0.171727 训练集准确率:84.92% 测试集准确率: 95.23%
当前epoch: 0 [19200/60000 (32 % )] 损失:0.077342 训练集准确率:88.35% 测试集准确率: 96.22%
当前epoch: 0 [25600/60000 (43 % )] 损失:0.104684 训练集准确率:90.36% 测试集准确率: 97.07%
当前epoch: 0 [32000/60000 (53 % )] 损失:0.029630 训练集准确率:91.62% 测试集准确率: 97.42%
当前epoch: 0 [38400/60000 (64 % )] 损失:0.029886 训练集准确率:92.58% 测试集准确率: 97.52%
当前epoch: 0 [44800/60000 (75 % )] 损失:0.021757 训练集准确率:93.34% 测试集准确率: 97.95%
当前epoch: 0 [51200/60000 (85 % )] 损失:0.055081 训练集准确率:93.84% 测试集准确率: 97.98%
当前epoch: 0 [57600/60000 (96 % )] 损失:0.013972 训练集准确率:94.25% 测试集准确率: 98.01%
当前epoch: 1 [0/60000 (0 % )] 损失:0.026483 训练集准确率:98.44% 测试集准确率: 98.03%
当前epoch: 1 [6400/60000 (11 % )] 损失:0.054051 训练集准确率:98.21% 测试集准确率: 98.42%
当前epoch: 1 [12800/60000 (21 % )] 损失:0.043712 训练集准确率:98.23% 测试集准确率: 98.23%
当前epoch: 1 [19200/60000 (32 % )] 损失:0.078410 训练集准确率:98.17% 测试集准确率: 98.44%
当前epoch: 1 [25600/60000 (43 % )] 损失:0.162021 训练集准确率:98.13% 测试集准确率: 98.51%
当前epoch: 1 [32000/60000 (53 % )] 损失:0.036997 训练集准确率:98.08% 测试集准确率: 97.98%
当前epoch: 1 [38400/60000 (64 % )] 损失:0.052898 训练集准确率:98.12% 测试集准确率: 98.09%
当前epoch: 1 [44800/60000 (75 % )] 损失:0.038637 训练集准确率:98.12% 测试集准确率: 98.50%
当前epoch: 1 [51200/60000 (85 % )] 损失:0.026636 训练集准确率:98.19% 测试集准确率: 98.57%
当前epoch: 1 [57600/60000 (96 % )] 损失:0.021691 训练集准确率:98.21% 测试集准确率: 98.54%
当前epoch: 2 [0/60000 (0 % )] 损失:0.118587 训练集准确率:96.88% 测试集准确率: 98.70%
当前epoch: 2 [6400/60000 (11 % )] 损失:0.016604 训练集准确率:99.04% 测试集准确率: 98.74%
当前epoch: 2 [12800/60000 (21 % )] 损失:0.006324 训练集准确率:98.90% 测试集准确率: 98.66%
当前epoch: 2 [19200/60000 (32 % )] 损失:0.003277 训练集准确率:98.81% 测试集准确率: 98.73%
当前epoch: 2 [25600/60000 (43 % )] 损失:0.008369 训练集准确率:98.74% 测试集准确率: 98.74%
当前epoch: 2 [32000/60000 (53 % )] 损失:0.014443 训练集准确率:98.75% 测试集准确率: 98.70%
当前epoch: 2 [38400/60000 (64 % )] 损失:0.026617 训练集准确率:98.77% 测试集准确率: 98.76%
当前epoch: 2 [44800/60000 (75 % )] 损失:0.038173 训练集准确率:98.77% 测试集准确率: 98.83%
当前epoch: 2 [51200/60000 (85 % )] 损失:0.135648 训练集准确率:98.77% 测试集准确率: 98.92%
当前epoch: 2 [57600/60000 (96 % )] 损失:0.006428 训练集准确率:98.78% 测试集准确率: 98.87%

我们可以看出卷积神经网络比普通神经网络好