Python implements the principle of RNN
I’ve tweaked the code a little bit so it can do gradient descent.
import numpy as np
import torch
from torch import nn
class Rnn(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, bidirectional=False):
super(Rnn, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
def forward(self, x):
'''
:param x: [seq, batch_size, embedding]
:return: out, hidden
'''
# x.shape [sep, batch, feature]
# hidden.shape [hidden_size, batch]
# Whh0.shape [hidden_size, hidden_size] Wih0.shape [hidden_size, feature]
# Whh1.shape [hidden_size, hidden_size] Wih1.size [hidden_size, hidden_size]
out = []
x, hidden = np.array(x), [np.zeros((self.hidden_size, x.shape[1])) for i in range(self.num_layers)]
Wih = [np.random.random((self.hidden_size, self.hidden_size)) for i in range(1, self.num_layers)]
Wih0 = np.random.random((self.hidden_size, x.shape[2]))
Whh = [np.random.random((self.hidden_size, self.hidden_size)) for i in range(self.num_layers)]
# x, hidden, Wih, Whh = torch.from_numpy(x), torch.tensor(hidden), torch.tensor(Wih), torch.tensor(Whh)
x = torch.from_numpy(x)
hidden = torch.tensor(hidden)
Wih0 = torch.tensor(Wih0, requires_grad=True)
Wih, Whh = torch.tensor(Wih, requires_grad=True), torch.tensor(Whh, requires_grad=True)
time = x.shape[0]
for i in range(time):
hidden[0] = torch.tanh((torch.matmul(Wih0, torch.transpose(x[i, ...], 1, 0)) +
torch.matmul(Whh[0], hidden[0])
))
for i in range(1, self.num_layers):
hidden[i] = torch.tanh((torch.matmul(Wih[i-1], hidden[i-1]) +
torch.matmul(Whh[i], hidden[i])
))
out.append(hidden[self.num_layers-1])
# If the element in the list is a tensor, it cannot be converted with torch.tensor() and an error will be reported
return torch.stack([i for i in out]), hidden
def sigmoid(x):
return 1.0/(1.0 + 1.0/np.exp(x))
if __name__ == '__main__':
a = torch.tensor([1, 2, 3])
print(torch.cuda.is_available(), type(a))
rnn = Rnn(1, 5, 4)
input = np.random.random((6, 2, 1))
out, h = rnn(input)
print(f'seq is {input.shape[0]}, batch_size is {input.shape[1]} ', 'out.shape ', out.shape, ' h.shape ', h.shape)
# print(sigmoid(np.random.random((2, 3))))
#
# element-wise multiplication
# print(np.array([1, 2])*np.array([2, 1]))
The divider
First of all, the code is just for understanding. The gradient descent part is not written. The default parameters have been fixed, so it does not affect understanding. Code mainly to achieve the principle of RNN, only use NUMPY library, can not be used for GPU acceleration.
import numpy as np
class Rnn():
def __init__(self, input_size, hidden_size, num_layers, bidirectional=False):
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
def feed(self, x):
'''
:param x: [seq, batch_size, embedding]
:return: out, hidden
'''
# x.shape [sep, batch, feature]
# hidden.shape [hidden_size, batch]
# Whh0.shape [hidden_size, hidden_size] Wih0.shape [hidden_size, feature]
# Whh1.shape [hidden_size, hidden_size] Wih1.size [hidden_size, hidden_size]
out = []
x, hidden = np.array(x), [np.zeros((self.hidden_size, x.shape[1])) for i in range(self.num_layers)]
Wih = [np.random.random((self.hidden_size, self.hidden_size)) for i in range(1, self.num_layers)]
Wih.insert(0, np.random.random((self.hidden_size, x.shape[2])))
Whh = [np.random.random((self.hidden_size, self.hidden_size)) for i in range(self.num_layers)]
time = x.shape[0]
for i in range(time):
hidden[0] = np.tanh((np.dot(Wih[0], np.transpose(x[i, ...], (1, 0))) +
np.dot(Whh[0], hidden[0])
))
for i in range(1, self.num_layers):
hidden[i] = np.tanh((np.dot(Wih[i], hidden[i-1]) +
np.dot(Whh[i], hidden[i])
))
out.append(hidden[self.num_layers-1])
return np.array(out), np.array(hidden)
def sigmoid(x):
return 1.0/(1.0 + 1.0/np.exp(x))
if __name__ == '__main__':
rnn = Rnn(1, 5, 4)
input = np.random.random((6, 2, 1))
out, h = rnn.feed(input)
print(f'seq is {input.shape[0]}, batch_size is {input.shape[1]} ', 'out.shape ', out.shape, ' h.shape ', h.shape)
# print(sigmoid(np.random.random((2, 3))))
#
# element-wise multiplication
# print(np.array([1, 2])*np.array([2, 1]))