PyTorch 中是否存在干净且可扩展的 LSTM 实现?
Does a clean and extendable LSTM implementation exists in PyTorch?
我想自己创建一个LSTM
class,但是,我不想再次从头开始重写classic LSTM
函数。
在 PyTorch 的代码中挖掘,我只发现一个肮脏的实现涉及至少 3-4 classes 继承:
- https://github.com/pytorch/pytorch/blob/98c24fae6b6400a7d1e13610b20aa05f86f77070/torch/nn/modules/rnn.py#L323
- https://github.com/pytorch/pytorch/blob/98c24fae6b6400a7d1e13610b20aa05f86f77070/torch/nn/modules/rnn.py#L12
- https://github.com/pytorch/pytorch/blob/98c24fae6b6400a7d1e13610b20aa05f86f77070/torch/nn/_functions/rnn.py#L297
某个地方是否存在 clean PyTorch 实现?任何链接都会有所帮助。
例如,我知道 TensorFlow 中存在 LSTM
的干净实现,但我需要派生 PyTorch 一个。
为了一个明确的例子,我正在寻找的是一个像 this 一样干净的实现,但是在 PyTorch 中:
我找到的最佳实现在这里
https://github.com/pytorch/benchmark/blob/master/rnns/benchmarks/lstm_variants/lstm.py
它甚至实现了四种不同的循环丢失变体,这非常有用!
如果你把丢失的部分拿走,你会得到
import math
import torch as th
import torch.nn as nn
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
super(LSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.bias = bias
self.i2h = nn.Linear(input_size, 4 * hidden_size, bias=bias)
self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
self.reset_parameters()
def reset_parameters(self):
std = 1.0 / math.sqrt(self.hidden_size)
for w in self.parameters():
w.data.uniform_(-std, std)
def forward(self, x, hidden):
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
preact = self.i2h(x) + self.h2h(h)
# activations
gates = preact[:, :3 * self.hidden_size].sigmoid()
g_t = preact[:, 3 * self.hidden_size:].tanh()
i_t = gates[:, :self.hidden_size]
f_t = gates[:, self.hidden_size:2 * self.hidden_size]
o_t = gates[:, -self.hidden_size:]
c_t = th.mul(c, f_t) + th.mul(i_t, g_t)
h_t = th.mul(o_t, c_t.tanh())
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
return h_t, (h_t, c_t)
PS:存储库包含 LSTM 和其他 RNN 的更多变体:
https://github.com/pytorch/benchmark/tree/master/rnns/benchmarks.
看看吧,说不定你心目中的扩展已经有了!
编辑:
如评论中所述,您可以包装上面的 LSTM 单元格以处理顺序输出:
import math
import torch as th
import torch.nn as nn
class LSTMCell(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
# As before
def reset_parameters(self):
# As before
def forward(self, x, hidden):
if hidden is None:
hidden = self._init_hidden(x)
# Rest as before
@staticmethod
def _init_hidden(input_):
h = th.zeros_like(input_.view(1, input_.size(1), -1))
c = th.zeros_like(input_.view(1, input_.size(1), -1))
return h, c
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
super().__init__()
self.lstm_cell = LSTMCell(input_size, hidden_size, bias)
def forward(self, input_, hidden=None):
# input_ is of dimensionalty (1, time, input_size, ...)
outputs = []
for x in torch.unbind(input_, dim=1):
hidden = self.lstm_cell(x, hidden)
outputs.append(hidden[0].clone())
return torch.stack(outputs, dim=1)
我没有测试代码,因为我正在使用 convLSTM 实现。如果有什么问题请告诉我。
更新:修复了链接。
我制作了一个简单通用的框架来自定义 LSTM:
https://github.com/daehwannam/pytorch-rnn-util
您可以通过设计 LSTM 单元并将它们提供给 LSTMFrame
来实现自定义 LSTM。
自定义 LSTM 的示例是 LayerNormLSTM
包中的:
# snippet from rnn_util/seq.py
class LayerNormLSTM(LSTMFrame):
def __init__(self, input_size, hidden_size, num_layers=1, dropout=0, r_dropout=0, bidirectional=False, layer_norm_enabled=True):
r_dropout_layer = nn.Dropout(r_dropout)
rnn_cells = tuple(
tuple(
LayerNormLSTMCell(
input_size if layer_idx == 0 else hidden_size * (2 if bidirectional else 1),
hidden_size,
dropout=r_dropout_layer,
layer_norm_enabled=layer_norm_enabled)
for _ in range(2 if bidirectional else 1))
for layer_idx in range(num_layers))
super().__init__(rnn_cells, dropout, bidirectional)
LayerNormLSTM
具有 PyTorch 的标准 LSTM 和附加选项的关键选项,r_dropout
和 layer_norm_enabled
:
# example.py
import torch
import rnn_util
bidirectional = True
num_directions = 2 if bidirectional else 1
rnn = rnn_util.LayerNormLSTM(10, 20, 2, dropout=0.3, r_dropout=0.25,
bidirectional=bidirectional, layer_norm_enabled=True)
# rnn = torch.nn.LSTM(10, 20, 2, bidirectional=bidirectional)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2 * num_directions, 3, 20)
c0 = torch.randn(2 * num_directions, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))
print(output.size())
我想自己创建一个LSTM
class,但是,我不想再次从头开始重写classic LSTM
函数。
在 PyTorch 的代码中挖掘,我只发现一个肮脏的实现涉及至少 3-4 classes 继承:
- https://github.com/pytorch/pytorch/blob/98c24fae6b6400a7d1e13610b20aa05f86f77070/torch/nn/modules/rnn.py#L323
- https://github.com/pytorch/pytorch/blob/98c24fae6b6400a7d1e13610b20aa05f86f77070/torch/nn/modules/rnn.py#L12
- https://github.com/pytorch/pytorch/blob/98c24fae6b6400a7d1e13610b20aa05f86f77070/torch/nn/_functions/rnn.py#L297
某个地方是否存在 clean PyTorch 实现?任何链接都会有所帮助。
例如,我知道 TensorFlow 中存在 LSTM
的干净实现,但我需要派生 PyTorch 一个。
为了一个明确的例子,我正在寻找的是一个像 this 一样干净的实现,但是在 PyTorch 中:
我找到的最佳实现在这里
https://github.com/pytorch/benchmark/blob/master/rnns/benchmarks/lstm_variants/lstm.py
它甚至实现了四种不同的循环丢失变体,这非常有用!
如果你把丢失的部分拿走,你会得到
import math
import torch as th
import torch.nn as nn
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
super(LSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.bias = bias
self.i2h = nn.Linear(input_size, 4 * hidden_size, bias=bias)
self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
self.reset_parameters()
def reset_parameters(self):
std = 1.0 / math.sqrt(self.hidden_size)
for w in self.parameters():
w.data.uniform_(-std, std)
def forward(self, x, hidden):
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
preact = self.i2h(x) + self.h2h(h)
# activations
gates = preact[:, :3 * self.hidden_size].sigmoid()
g_t = preact[:, 3 * self.hidden_size:].tanh()
i_t = gates[:, :self.hidden_size]
f_t = gates[:, self.hidden_size:2 * self.hidden_size]
o_t = gates[:, -self.hidden_size:]
c_t = th.mul(c, f_t) + th.mul(i_t, g_t)
h_t = th.mul(o_t, c_t.tanh())
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
return h_t, (h_t, c_t)
PS:存储库包含 LSTM 和其他 RNN 的更多变体:
https://github.com/pytorch/benchmark/tree/master/rnns/benchmarks.
看看吧,说不定你心目中的扩展已经有了!
编辑:
如评论中所述,您可以包装上面的 LSTM 单元格以处理顺序输出:
import math
import torch as th
import torch.nn as nn
class LSTMCell(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
# As before
def reset_parameters(self):
# As before
def forward(self, x, hidden):
if hidden is None:
hidden = self._init_hidden(x)
# Rest as before
@staticmethod
def _init_hidden(input_):
h = th.zeros_like(input_.view(1, input_.size(1), -1))
c = th.zeros_like(input_.view(1, input_.size(1), -1))
return h, c
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
super().__init__()
self.lstm_cell = LSTMCell(input_size, hidden_size, bias)
def forward(self, input_, hidden=None):
# input_ is of dimensionalty (1, time, input_size, ...)
outputs = []
for x in torch.unbind(input_, dim=1):
hidden = self.lstm_cell(x, hidden)
outputs.append(hidden[0].clone())
return torch.stack(outputs, dim=1)
我没有测试代码,因为我正在使用 convLSTM 实现。如果有什么问题请告诉我。
更新:修复了链接。
我制作了一个简单通用的框架来自定义 LSTM: https://github.com/daehwannam/pytorch-rnn-util
您可以通过设计 LSTM 单元并将它们提供给 LSTMFrame
来实现自定义 LSTM。
自定义 LSTM 的示例是 LayerNormLSTM
包中的:
# snippet from rnn_util/seq.py
class LayerNormLSTM(LSTMFrame):
def __init__(self, input_size, hidden_size, num_layers=1, dropout=0, r_dropout=0, bidirectional=False, layer_norm_enabled=True):
r_dropout_layer = nn.Dropout(r_dropout)
rnn_cells = tuple(
tuple(
LayerNormLSTMCell(
input_size if layer_idx == 0 else hidden_size * (2 if bidirectional else 1),
hidden_size,
dropout=r_dropout_layer,
layer_norm_enabled=layer_norm_enabled)
for _ in range(2 if bidirectional else 1))
for layer_idx in range(num_layers))
super().__init__(rnn_cells, dropout, bidirectional)
LayerNormLSTM
具有 PyTorch 的标准 LSTM 和附加选项的关键选项,r_dropout
和 layer_norm_enabled
:
# example.py
import torch
import rnn_util
bidirectional = True
num_directions = 2 if bidirectional else 1
rnn = rnn_util.LayerNormLSTM(10, 20, 2, dropout=0.3, r_dropout=0.25,
bidirectional=bidirectional, layer_norm_enabled=True)
# rnn = torch.nn.LSTM(10, 20, 2, bidirectional=bidirectional)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2 * num_directions, 3, 20)
c0 = torch.randn(2 * num_directions, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))
print(output.size())