[自然语言处理]RNN

时间:2024-10-10 07:33:12

1 传统RNN模型与LSTM

import torch
import torch.nn as nn

torch.manual_seed(8)


def dm01():
    '''
    参数1:输入向量的维数
    参数2:隐藏层神经元的个数
    参数3:隐藏层的层数
    :return:
    '''
    rnn = nn.RNN(5, 6, 1)
    '''
    参数1:句子长度sequence_length
    参数2:一个批次的样本数量batch_size
    参数3:每个单词的向量维数vector_dim
    '''
    input = torch.randn(1, 3, 5)
    '''
    参数1:隐藏层的层数
    参数2:一个批次的样本数量batch_size
    参数3:隐层层神经元个数 
    '''
    h0 = torch.randn(1, 3, 6)
    output, hn = rnn(input, h0)
    print(f'output-->{output.shape} {output}')
    print(f'hn-->{hn.shape} {hn}')
    print(f'rnn模型-->{rnn}')


def dm02():
    rnn = nn.RNN(5, 6, 1)
    input = torch.randn(4, 3, 5)
    h0 = torch.randn(1, 3, 6)
    output, hn = rnn(input, h0)
    print(f'output-->{output.shape} {output}')
    print(f'hn-->{hn.shape} {hn}')
    print(f'rnn模型-->{rnn}')


def dm03():
    rnn = nn.RNN(5, 6, 1)
    input = torch.randn(4, 1, 5)
    print(f'input {input}')
    hidden = torch.zeros(1, 1, 6)
    # 一个一个地送字符
    for i in range(4):
        tmp = input[i][0]
        print(f'tmp.shape {tmp.shape}')
        output, hidden = rnn(tmp.unsqueeze(0).unsqueeze(0), hidden)
        print(f'{i} {output}')
        print(f'{i} {hidden}')
        print('*' * 80)

    hidden = torch.zeros(1, 1, 6)
    output, hn = rnn(input, hidden)
    print(f'output2 {output} {output.shape}')
    print(f'hn {hn} {hn.shape}')


# 改变隐藏层数
def dm04():
    rnn = nn.RNN(5, 6, 2)
    input = torch.randn(4, 3, 5)
    h0 = torch.randn(2, 3, 6)
    output, hn = rnn(input, h0)
    print(f'output-->{output.shape} {output}')
    print(f'hn-->{hn.shape} {hn}')
    print(f'rnn模型-->{rnn}')


# 改变batch_size参数
def dm05():
    rnn = nn.RNN(5, 6, 1, batch_first=True)
    input = torch.randn(3, 4, 5)
    h0 = torch.randn(1, 3, 6)
    output, hn = rnn(input, h0)
    print(f'output-->{output.shape} {output}')
    print(f'hn-->{hn.shape} {hn}')
    print(f'rnn模型-->{rnn}')


# LSTM
def dm06():
    rnn = nn.LSTM(5, 6, 2)
    input = torch.randn(1, 3, 5)
    h0 = torch.randn(2, 3, 6)
    c0 = torch.randn(2, 3, 6)
    output, (hn, cn) = rnn(input, (h0, c0))
    print(f'output {output}')
    print(f'hn {hn}')
    print(f'cn {cn}')


if __name__ == '__main__':
    # dm01()
    # dm02()
    # dm03()
    # dm04()
    # dm05()
    dm06()
D:\nlplearning\nlpbase\python.exe D:\nlpcoding\rnncode.py 
output tensor([[[ 0.0207, -0.1121, -0.0706,  0.1167, -0.3322, -0.0686],
         [ 0.1256,  0.1328,  0.2361,  0.2237, -0.0203, -0.2709],
         [-0.2668, -0.2721, -0.2168,  0.4734,  0.2420,  0.0349]]],
       grad_fn=<MkldnnRnnLayerBackward0>)
hn tensor([[[ 0.1501, -0.2106,  0.0213,  0.1309,  0.3074, -0.2038],
         [ 0.3639, -0.0394, -0.1912,  0.1282,  0.0369, -0.1094],
         [ 0.1217, -0.0517,  0.1884, -0.1100, -0.5018, -0.4512]],

        [[ 0.0207, -0.1121, -0.0706,  0.1167, -0.3322, -0.0686],
         [ 0.1256,  0.1328,  0.2361,  0.2237, -0.0203, -0.2709],
         [-0.2668, -0.2721, -0.2168,  0.4734,  0.2420,  0.0349]]],
       grad_fn=<StackBackward0>)
cn tensor([[[ 0.2791, -0.7362,  0.0501,  0.2612,  0.4655, -0.2338],
         [ 0.7902, -0.0920, -0.4955,  0.3865,  0.0868, -0.1612],
         [ 0.2312, -0.3736,  0.4033, -0.1386, -1.0151, -0.5971]],

        [[ 0.0441, -0.2279, -0.1483,  0.3397, -0.5597, -0.4339],
         [ 0.2154,  0.4119,  0.4723,  0.4731, -0.0284, -1.1095],
         [-0.5016, -0.5146, -0.4286,  1.5299,  0.5992,  0.1224]]],
       grad_fn=<StackBackward0>)

Process finished with exit code 0