some RNN demo for learning

GRU

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# GRU
class netA(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,output_size,bidirectional):
'''
input_size: embedding_dim
bidirectional: True表示双向GRU , 其输出为 hidden_size * 2
'''
super(netA, self).__init__()

#self.embedding = nn.Embedding(20,input_size)

self.gru = nn.GRU(input_size=input_size,hidden_size = hidden_size,bidirectional = bidirectional,
batch_first=True, num_layers= num_layers, dropout=0.1)

if bidirectional:
self.in_features = hidden_size*2
else:
self.in_features = hidden_size

self.fc = nn.Sequential(
nn.Dropout(0.2),
nn.Tanh(),
nn.Linear(in_features = self.in_features, out_features = output_size),
nn.Tanh()
)

def forward(self,x):

x,h = self.gru(x)
out = self.fc(x)

return x,h,out
  • 数据准备
1
2
3
4
5
vocab_size = 100
embedding_dim = 10
hidden_size = 30
num_layers = 3
output_size = 6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
embedding = nn.Embedding(vocab_size,embedding_dim)
input_seq = torch.randn(5, 3)
input_seq
>>>
tensor([[ 0.1037, -0.9751, -0.5190],
[ 0.1117, 1.2962, 0.8708],
[-0.3048, -0.5265, -0.4555],
[-0.3947, 1.5599, 1.0967],
[ 0.0376, 0.7062, 0.2559]])

input_seq = input_seq.long()
embedded_seq = embedding(input_seq)
embedded_seq.shape
>>>
torch.Size([5, 3, 10])
1
2
3
4
5
6
7
8
9
10
11
12
# bidirectional = True
net1 = netA(input_size = embedding_dim ,hidden_size = hidden_size ,bidirectional = True,
num_layers = num_layers ,output_size = output_size)

x,h,out = net1(embedded_seq)
print(x.shape) # [batch_size, seq_len, hidden_size*2] due to bidirectional = True
print(h.shape) # [num_layers*2, batch_size, hidden_size]
print(out.shape) #
>>>
torch.Size([5, 3, 60])
torch.Size([6, 5, 30])
torch.Size([5, 3, 6])
1
2
3
4
5
6
7
8
9
10
11
12
# bidirectional = False
net1 = netA(input_size = embedding_dim ,hidden_size = hidden_size ,bidirectional = False,
num_layers = num_layers ,output_size = output_size)

x,h,out = net1(embedded_seq)
print(x.shape) # [batch_size, seq_len, hidden_size]
print(h.shape) # [num_layers, batch_size, hidden_size]
print(out.shape)
>>>
torch.Size([5, 3, 30])
torch.Size([3, 5, 30])
torch.Size([5, 3, 6])

LSTM

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#LSTM
class netB(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,output_size,bidirectional):
'''
input_size: embedding_dim
bidirectional: 双向网络 , 其输出为 hidden_size * 2
'''
super(netB, self).__init__()

#self.embedding = nn.Embedding(20,input_size)

self.gru = nn.LSTM(input_size=input_size,hidden_size = hidden_size,num_layers= num_layers,
bidirectional = bidirectional, batch_first=True, dropout=0.1)

if bidirectional:
self.in_features = hidden_size*2
else:
self.in_features = hidden_size

self.softmax = nn.Softmax(dim=1)

self.fc = nn.Sequential(
nn.Dropout(0.2),
nn.Tanh(),
nn.Linear(in_features = self.in_features, out_features = output_size)
)

def forward(self,x):
# hn(最后的隐藏状态)和 cn(最后的细胞状态)
# (num_layers * num_directions, batch_size, hidden_size)
x,(hn,cn) = self.gru(x)
out = self.fc(x)
out =self.softmax(out)

return x,(hn,cn),out

1
2
3
4
5
6
7
8
9
10
11
12
13
net2 = netB(input_size = embedding_dim ,hidden_size = hidden_size ,bidirectional = False,
num_layers = num_layers ,output_size = output_size)

x,(h,c),out = net2(embedded_seq)
print(x.shape)
print(out.shape)
print(h.shape)
print(c.shape)
>>>
torch.Size([5, 3, 30])
torch.Size([5, 3, 6])
torch.Size([2, 5, 30])# (num_layers * num_directions, batch_size, hidden_size)
torch.Size([2, 5, 30])

combine GRU with LSTM

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class netA(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,output_size,bidirectional):
'''
input_size: embedding_dim
bidirectional: True表示双向GRU , 其输出为 hidden_size * 2
'''
super(netA, self).__init__()

#self.embedding = nn.Embedding(20,input_size)

self.gru = nn.GRU(input_size=input_size,hidden_size = hidden_size,num_layers= num_layers,
bidirectional = bidirectional, batch_first=True, dropout=0.1)

if bidirectional:
self.in_features = hidden_size*2
else:
self.in_features = hidden_size

self.fc = nn.Sequential(
nn.Dropout(0.2),
nn.Tanh(),
nn.Linear(in_features = self.in_features, out_features = output_size),
nn.Tanh()
)

def forward(self,x):

x,h = self.gru(x)
out = self.fc(x)

return out


class netB(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,output_size,bidirectional):
'''
input_size: embedding_dim
bidirectional: 双向网络 , 其输出为 hidden_size * 2
'''
super(netB, self).__init__()


self.gru = nn.LSTM(input_size=input_size,hidden_size = hidden_size,num_layers= num_layers,
bidirectional = bidirectional, batch_first=True, dropout=0.1)

if bidirectional:
self.in_features = hidden_size*2
else:
self.in_features = hidden_size

self.softmax = nn.Softmax(dim=1)

self.fc = nn.Sequential(
nn.Dropout(0.2),
nn.Tanh(),
nn.Linear(in_features = self.in_features, out_features = output_size)
)

def forward(self,x):
x,(hn,cn) = self.gru(x)
out = self.fc(x)
out =self.softmax(out)

return out


class netC(nn.Module):
def __init__(self,vocab_size,input_size,hidden_size,num_layers,output_size,bidirectional):
super(netC, self).__init__()

self.embedding = nn.Embedding(vocab_size,input_size)
#与之前的相比,修改输出,只输出out
self.net1 = netA(input_size,hidden_size,num_layers,output_size,bidirectional)
self.net2 = netB(input_size,hidden_size,num_layers,output_size,bidirectional)


def forward(self,x):
x = self.embedding(x)

x_A = self.net1(x) #[5, 3, 6]
x_B = self.net2(x) #[5, 3, 6]

out = torch.cat([x_A,x_B],dim=1)

return out
1
net3 = netC(vocab_size = vocab_size,input_size = embedding_dim ,hidden_size = hidden_size ,bidirectional = False,num_layers = num_layers ,output_size = output_size)
1
2
3
4
5
6
7
input_seq = torch.randn(5, 3)
input_seq = input_seq.long()

out = net3(input_seq)
out.shape()
>>>
torch.Size([5, 6, 6])