Python RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5]

1. Problem introduction

Today, when using Python to train a model, the data set is read and preprocessed by using the functions provided by python. The network uses the custom CNN, and then there is such a small error as shown in the title when running.

2. Operation error

As follows:

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5], but got 2-dimensional input of size [32, 784] instead

3. Code

First of all, my own customized CNN network is as follows:

class MNIST_Model(nn.Module):
    def __init__(self, n_in):
        super(MNIST_Model, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=n_in,
                      out_channels=32,
                      kernel_size=(5, 5),
                      padding=2,
                      stride=1),
        )

        self.maxp1 = nn.MaxPool2d(
                       kernel_size=(2, 2))

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32,
                      out_channels=64,
                      kernel_size=(5, 5),
                      padding=0,
                      stride=1),
        )

        self.maxp2 = nn.MaxPool2d(kernel_size=(2, 2))
        
        self.fc1 = nn.Sequential(
            nn.Linear(in_features=64 * 5 * 5, out_features=200)  # Mnist
        )

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=200, out_features=10),
            nn.ReLU()
        )


    def forward(self, x):
        x = self.conv1(x)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = x.contiguous().view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

Then there is the code in the training model

#Instantiate the network, considering only the use of the CPU
model = model.MNIST_Model(1)
net = model.to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
What is the use of #momentum:momentum factor?
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=momentum)


#Start training First define the array that stores the loss function and accuracy
losses = []
acces = []
#For testing
eval_losses = []
eval_acces = []

for epoch in range(nums_epoches):
    #Clear each training first
    train_loss = 0
    train_acc = 0
    # Set the model to training mode
    model.train()
    #Dynamic learning rate
    if epoch%5 == 0:
        optimizer.param_groups[0]['lr'] *= 0.1
    for img,label in train_loader:
        #Forward propagation, passing the image data into the model
        # out outputs 10 dimensions, respectively the probability of each number, i.e. the score for each category
        out = model(img)
        # Note here that the parameter out is 64*10 and label is a one-dimensional 64
        loss = criterion(out,label)
        #backpropagation
        #optimizer.zero_grad() means to set the gradient to zero, that is, the derivative of loss with respect to weight becomes zero
        optimizer.zero_grad()
        loss.backward()
        #This method updates all the parameters, and once the gradient has been calculated by a function such as backward(), we can call this function
        optimizer.step()
        
        # Record the error 
        train_loss += loss.item()
        
        #Calculate the accuracy of the classification, find the subscript with the highest probability
        _,pred = out.max(1)
        num_correct = (pred == label).sum().item()#record the number of correct labels
        acc = num_correct/img.shape[0]
        train_acc += acc
    losses.append(train_loss/len(train_loader))
    acces.append(train_acc/len(train_loader))
    
    eval_loss = 0
    eval_acc = 0
    model.eval()
    for img,label in test_loader:
        img = img.view(img.size(0),-1)
        
        out = model(img)
        loss = criterion(out,label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        eval_loss += loss.item()
        
        _,pred = out.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct/img.shape[0]
        eval_acc += acc
    eval_losses.append(eval_loss/len(test_loader))
    eval_acces.append(eval_acc/len(test_loader))
    

    print('epoch:{},Train Loss:{:.4f},Train Acc:{:.4f},Test Loss:{:.4f},Test Acc:{:.4f}'
             .format(epoch,train_loss/len(train_loader),train_acc/len(train_loader),
                    eval_loss/len(test_loader),eval_acc/len(test_loader)))

4. Analyze the reasons

Locate error location

Traceback (most recent call last):
  File "train.py", line 73, in <module>
    out = model(img)
  File "/home/gzdx/anaconda3/envs/Torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/gzdx/wyf/PARAD/model.py", line 48, in forward
    x = self.conv1(x)
  File "/home/gzdx/anaconda3/envs/Torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/gzdx/anaconda3/envs/Torch/lib/python3.7/site-packages/torch/nn/modules/container.py", line 119, in forward
    input = module(input)
  File "/home/gzdx/anaconda3/envs/Torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/gzdx/anaconda3/envs/Torch/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 399, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "/home/gzdx/anaconda3/envs/Torch/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 396, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5], but got 2-dimensional input of size [32, 784] instead

As you can see, this is roughly the result of our incoming data input into CNN network, and then due to different dimensions. Because we input four dimensions, but we get two dimensions.

  File "train.py", line 73, in <module>
    out = model(img)

5. Solutions

For this kind of problem, there are many different solutions on the Internet. This person also refers to some ideas given by others on the Internet, and then modifies them by himself, and the error is solved, as shown below:

for i,data in enumerate(train_loader):
        #Forward propagation, passing the image data into the model
        # out output 10 dimensions, respectively the probability of each number, i.e. the score of each category
        inputs, labels = data
        inputs,labels = data[0].to(device), data[1].to(device)
        # inputs torch.Size([32, 1, 28, 28])
        out = model(inputs)

The solution is also very simple. At the beginning of the training, the data will be assigned according to this reading method, and then it will be passed into the model without the above error.

6. Complete code

import numpy as np
import model
import torch

#Importing PyTorch's built-in mnist data
from torchvision.datasets import mnist

#Import pre-processing module
from torchvision import transforms
from torch.utils.data import DataLoader

#Importing neural network tools
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

#Define the hyperparameters to be used later
train_batch_size = 32
test_batch_size = 32

#Learning rate and number of training sessions
learning_rate = 0.01
nums_epoches = 50

#Parameters used when optimizer
lr = 0.1
momentum = 0.5

#Use compose to specify the preprocessor
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5],[0.5])])

#Download the data, create a new data folder in the project folder to store the downloaded data
train_dataset = mnist.MNIST('./data', train=True, transform=transform, target_transform=None, download=False)
test_dataset = mnist.MNIST('./data', train=False, transform=transform, target_transform=None, download=False)

#Data loaders, combined datasets and samplers, and single or multi-process iterators on datasets
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#Instantiate the network, considering only the use of the CPU
model = model.MNIST_Model(1)
net = model.to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
What is the use of #momentum:momentum factor?
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=momentum)




#Start the training by defining an array that stores the loss function and the accuracy
losses = []
acces = []
# test with
eval_losses = []
eval_acces = []

for epoch in range(nums_epoches):
    #Clear each training first
    train_loss = 0
    train_acc = 0
    # Set the model to training mode
    model.train()

    #动态学习率
    if epoch%5 == 0:
        optimizer.param_groups[0]['lr'] *= 0.1
    for i,data in enumerate(train_loader):
        #Forward propagation, passing the image data into the model
        # out output 10 dimensions, respectively the probability of each number, i.e. the score of each category
        inputs, labels = data
        inputs,labels = data[0].to(device), data[1].to(device)
        out = model(inputs)
        #Note here that the parameter out is 64*10 and label is 64 in one dimension
        loss = criterion(out,labels)
        #backpropagation
        #optimizer.zero_grad() means to set the gradient to zero, that is, to make the derivative of loss with respect to weight zero
        optimizer.zero_grad()
        loss.backward()
        # This method updates all the parameters, and once the gradient has been calculated by a function like backward(), we can call this function
        optimizer.step()
        
        #Record the error 
        train_loss += loss.item()
        
        # Calculate the accuracy of the classification, find the subscript with the highest probability
        _,pred = out.max(1)
        num_correct = (pred == labels).sum().item() # Record the number of correct labels
        acc = num_correct/inputs.shape[0]
        train_acc += acc
    losses.append(train_loss/len(train_loader))
    acces.append(train_acc/len(train_loader))
    print('Finished Training') 

    # save
    PATH = './model/mnist_net.pth'
    torch.save(net.state_dict(), PATH)
    
    eval_loss = 0
    eval_acc = 0
    model.eval()
    for i,data in enumerate(test_loader):
        inputs, labels = data
        inputs,labels = data[0].to(device), data[1].to(device)
        out = model(inputs)
        loss = criterion(out,labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        eval_loss += loss.item()
        
        _,pred = out.max(1)
        num_correct = (pred == labels).sum().item()
        acc = num_correct/inputs.shape[0]
        eval_acc += acc
    eval_losses.append(eval_loss/len(test_loader))
    eval_acces.append(eval_acc/len(test_loader))
    

    print('epoch:{},Train Loss:{:.4f},Train Acc:{:.4f},Test Loss:{:.4f},Test Acc:{:.4f}'
             .format(epoch,train_loss/len(train_loader),train_acc/len(train_loader),
                    eval_loss/len(test_loader),eval_acc/len(test_loader)))


Read More: