nn.Embedding 理解及其参数 padding_idx含义
import torch
import torch.nn as nn
# 10 x 3的向量矩阵
embed = nn.Embedding(10,3)
# Embedding输入必须是tensor
input1 = torch.tensor(1)
print(input1) # tensor(1)
input2 = torch.tensor([1, 1])
print(input2) # tensor([1, 1])
input3 = torch.tensor([1, 2]) # tensor([1, 2])
print(input3)
input4 = torch.tensor([1, 10]) # tensor([ 1, 10])
print(input4)
out1 = embed(input1)
print(out1)
# tensor([ 0.1294, -0.1507, -0.0476], grad_fn=<EmbeddingBackward0>)
out2 = embed(input2)
print(out2)
# tensor([[-0.4178, 0.8059, 0.0863],
# [-0.4178, 0.8059, 0.0863]], grad_fn=<EmbeddingBackward0>)
out3 = embed(input3)
print(out3)
# tensor([[-0.4178, 0.8059, 0.0863],
# [ 0.9092, -0.8834, -0.5366]], grad_fn=<EmbeddingBackward0>)
out4 = embed(input4)
print(out4)
# IndexError: index out of range in self
# 综上,nn.Embedding(10, 3),10表示num_embeddings, 3表示embedding_dim
# 也就是10个嵌入向量,每个向量是3维(向量长度是3)
# nn.Embedding层的过程可以理解成根据索引查询Embedding向量矩阵的过程,当输入的索引值是0,即返回Embedding矩阵的第一行
# 当输入的索引值是10,由于定义的Embedding矩阵大小是10x3,最多只支持0-9索引,所以会报错(见out4)
# 另一种理解:输入的每个数字都可以表示成one-hot向量,这个向量维度就是10,比如输入的数字是2(索引为2),则对应向量[0 0 1 0 0...0]
# 这个one-hot向量和Embedding向量矩阵相乘,依然是得到Embedding矩阵的第三行。
# 故:10限定了输入的数字大小,正常情况是词表大小作为Embedding的num_embeddings,这样就可以根据各个词的索引查询到对应的向量;
# 3 是输出的向量维度
import torch.nn as nn
embed1 = nn.Embedding(3, 3)
print(embed1.weight)
# tensor([[ 1.0503, 1.2954, 0.0826],
# [ 1.3010, -0.1322, 2.4299],
# [ 0.2982, -0.0534, -0.0754]], requires_grad=True)
embed2 = nn.Embedding(3, 3, padding_idx=0)
print(embed2.weight)
# tensor([[ 0.0000, 0.0000, 0.0000],
# [ 1.1654, 1.5345, 0.9253],
# [ 1.0780, -1.8185, -1.4120]], requires_grad=True)
embed3 = nn.Embedding(3, 3, padding_idx=1)
print(embed3.weight)
# tensor([[-0.4296, 0.3443, -0.3189],
# [ 0.0000, 0.0000, 0.0000],
# [-0.8069, 0.9383, 0.9449]], requires_grad=True)
embed4 = nn.Embedding(3, 3, padding_idx=2)
print(embed4.weight)
# tensor([[-0.8485, 1.5352, 1.1185],
# [-0.6012, -1.5501, -0.2466],
# [ 0.0000, 0.0000, 0.0000]], requires_grad=True)
# 综上,padding_idx就是把Embeddings矩阵某一行置为0
input1 = torch.tensor([0, 1, 2, 2, 1, 0])
print(embed4(input1))
tensor([[ 0.4167, -0.5717, -0.9844],
[ 1.1028, -0.3473, 0.5762],
[ 0.0000, 0.0000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[ 1.1028, -0.3473, 0.5762],
[ 0.4167, -0.5717, -0.9844]], grad_fn=<EmbeddingBackward0>)