[Solved] RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the

1. Problems

When I was practicing using pytorch today, I prepared to use GPU, and the following errors occurred:

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

 

2. Code (adjusted and can run correctly)

import torch.optim
import torchvision.datasets

# Preparing the data set
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from time import time

print(torch.cuda.is_available())

train_data = torchvision.datasets.CIFAR10(root="./dataset",
                                          train=True,
                                          transform=torchvision.transforms.ToTensor(),
                                          download=True)

test_data = torchvision.datasets.CIFAR10(root="./dataset",
                                         train=False,
                                         transform=torchvision.transforms.ToTensor(),
                                         download=True)
print("training_set_data_length: %d" % len(train_data))
print("Test set data length: %d" % len(test_data))

# Load data using DataLoader
train_data_loader = DataLoader(train_data, batch_size=64)
test_data_loader = DataLoader(test_data, batch_size=64)


# Build the neural network (in a separate .py file)
class Net(nn.Module):
    def __init__(self) -> None:
        super(Net, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, 1, 2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 32, 5, 1, 2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 5, 1, 2),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(1024, 64),
            nn.Linear(64, 10)

        )

    def forward(self, x):
        x = self.model(x)
        return x


# Create a network model
net = Net()
# Only the model, data, and loss function can run on the GPU
# if torch.cuda.is_available():
#     net = net.cuda()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)


# Loss function
loss_fn = nn.CrossEntropyLoss()
# if torch.cuda.is_available():
#     loss_fn = loss_fn.cuda()
loss_fn.to(device)

# Optimizer
learning_rate = 1e-2 # 0.01
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)

# Set the parameters of the training network
# Record the number of training sessions
total_train_step = 0
# Record the number of training sessions
total_test_step = 0
# Number of training rounds
epoch = 10

start_time = time()
writer = SummaryWriter("./logs/train")
for i in range(epoch):
    print("------Round %d of training ------" % (i + 1))

    # Training steps
    for data in train_data_loader:
        imgs, targets = data
        if torch.cuda.is_available():
            imgs, targets = imgs.cuda(), targets.cuda()
        # imgs.to(device)
        # targets.to(device)
        outputs = net(imgs)
        loss = loss_fn(outputs, targets)

        # Optimizer optimization model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_step += 1
        if total_train_step % 100 == 0:
            end_time = time()
            print(end_time - start_time)
            print("training_step: {}, loss: {}".format(total_train_step, loss.item())) # .item() can convert the tensor type to a number
            writer.add_scalar("train_loss", loss.item(), total_train_step)

    # Test Steps
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad(): # Zero out the gradient when testing. No need to adjust the gradient for optimization
        for data in test_data_loader:
            imgs, targets = data
            if torch.cuda.is_available():
                imgs, targets = imgs.cuda(), targets.cuda()
            # imgs.to(device)
            # targets.to(device)
            outputs = net(imgs)
            loss = loss_fn(outputs, targets)

            total_test_loss += loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
    print("loss on the overall test set: {}".format(total_test_loss))
    print("Percent correct on the overall test set: {}".format(total_accuracy/len(test_data)))

    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy", total_accuracy/len(test_data), total_test_step)
    total_test_step += 1

    # Save the model
    # torch.save(net.state_dict(), "model_{}.pth".format(i))
    # print("Round {} training model saved".format(i))

writer.close()

3. Solutions

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for data in train_data_loader:
   imgs, targets = data
    if torch.cuda.is_available():
        imgs, targets = imgs.cuda(), targets.cuda()
    # imgs.to(device)
    # targets.to(device)
    outputs = net(imgs)

In the above code, IMGs and targets cannot use .to(device), so the input type (torch.Floattensor) will appear after use. If it is not GPU type, it can only be used in another way:

if torch.cuda.is_available():
    imgs, targets = imgs.cuda(), targets.cuda()

This can solve the problem that the input and weight types do not match.

4. Reference

https://stackoverflow.com/questions/59013109/runtimeerror-input-type-torch-floattensor-and-weight-type-torch-cuda-floatte

Read More: