- 并行乘法器,也就是用乘法运算符实现,下面的代码实现8bit无符号数的乘法。
代码:
1 module mult_parrell(rst_n,
2 clk,
3 a,
4 b,
5 p
6 );
7 parameter DATA_SIZE = 8;
8
9 input rst_n;
10 input clk;
11 input [DATA_SIZE - 1 : 0] a;
12 input [DATA_SIZE - 1 : 0] b;
13
14 output [2*DATA_SIZE - 1 : 0] p;
15
16 reg [DATA_SIZE - 1 : 0] a_r;
17 reg [DATA_SIZE - 1 : 0] b_r;
18
19 wire [2*DATA_SIZE - 1 : 0] p_tmp;
20 reg [2*DATA_SIZE - 1 : 0] p;
21
22 //输入数据打一拍
23 always@(posedge clk)
24 if(!rst_n)
25 begin
26 a_r <= 8'd0;
27 b_r <= 8'd0;
28 end
29 else
30 begin
31 a_r <= a;
32 b_r <= b;
33 end
34
35 assign p_tmp = a*b; //只能做无符号数的相乘,若要做有符号数乘法,需将数据声明为signed类型
36
37 //输出数据打一拍
38 always@(posedge clk)
39 if(!rst_n)
40 begin
41 p <= 16'd0;
42 end
43 else
44 begin
45 p <= p_tmp;
46 end
47
48 endmodule
- 移位相加乘法器,下面的代码可实现8bit有符号数的相乘,注意符号扩展以及MSB位的处理:
//输入数据取反
assign a_r_inv = ~a_r + 1;
assign a_shift0 = b_r[0] ? {{8{a_r[7]}},a_r} : 0;
assign a_shift1 = b_r[1] ? {{7{a_r[7]}},a_r,1'b0} : 0;
assign a_shift2 = b_r[2] ? {{6{a_r[7]}},a_r,2'b0} : 0;
assign a_shift3 = b_r[3] ? {{5{a_r[7]}},a_r,3'b0} : 0;
assign a_shift4 = b_r[4] ? {{4{a_r[7]}},a_r,4'b0} : 0;
assign a_shift5 = b_r[5] ? {{3{a_r[7]}},a_r,5'b0} : 0;
assign a_shift6 = b_r[6] ? {{2{a_r[7]}},a_r,6'b0} : 0;
assign a_shift7 = b_r[7] ? {{1{a_r_inv[7]}},a_r_inv,7'b0} : 0; //被乘数为无符号数时,特别处理
代码:
1 module mult_shift_add(rst_n,
2 clk,
3 a,
4 b,
5 p
6 );
7 parameter DATA_SIZE = 8;
8
9 input rst_n;
10 input clk;
11 input [DATA_SIZE - 1 : 0] a;
12 input [DATA_SIZE - 1 : 0] b;
13
14 output [2*DATA_SIZE - 2 : 0] p;
15
16 //输入数据打一个时钟节拍
17 reg [DATA_SIZE - 1 : 0] a_r;
18 reg [DATA_SIZE - 1 : 0] b_r;
19
20 //输入数据取反
21 wire [DATA_SIZE - 1 : 0] a_r_inv;
22
23 //输入数据移位
24 wire [2*DATA_SIZE - 1 : 0] a_shift0;
25 wire [2*DATA_SIZE - 1 : 0] a_shift1;
26 wire [2*DATA_SIZE - 1 : 0] a_shift2;
27 wire [2*DATA_SIZE - 1 : 0] a_shift3;
28 wire [2*DATA_SIZE - 1 : 0] a_shift4;
29 wire [2*DATA_SIZE - 1 : 0] a_shift5;
30 wire [2*DATA_SIZE - 1 : 0] a_shift6;
31 wire [2*DATA_SIZE - 1 : 0] a_shift7;
32
33 //输出数据打一个时钟节拍
34 wire [2*DATA_SIZE - 1 : 0] p_tmp;
35 reg [2*DATA_SIZE - 1 : 0] p;
36
37 //输入数据打一个时钟节拍
38 always@(posedge clk)
39 if(!rst_n)
40 begin
41 a_r <= 8'd0;
42 b_r <= 8'd0;
43 end
44 else
45 begin
46 a_r <= a;
47 b_r <= b;
48 end
49 //输入数据取反
50 assign a_r_inv = ~a_r + 1;
51
52 //输入数据移位,注意符号扩展,不仅仅是最高位扩展
53 //对每一个bit都需扩展
54 assign a_shift0 = b_r[0] ? {{8{a_r[7]}},a_r} : 0;
55 assign a_shift1 = b_r[1] ? {{7{a_r[7]}},a_r,1'b0} : 0;
56 assign a_shift2 = b_r[2] ? {{6{a_r[7]}},a_r,2'b0} : 0;
57 assign a_shift3 = b_r[3] ? {{5{a_r[7]}},a_r,3'b0} : 0;
58 assign a_shift4 = b_r[4] ? {{4{a_r[7]}},a_r,4'b0} : 0;
59 assign a_shift5 = b_r[5] ? {{3{a_r[7]}},a_r,5'b0} : 0;
60 assign a_shift6 = b_r[6] ? {{2{a_r[7]}},a_r,6'b0} : 0;
61 assign a_shift7 = b_r[7] ? {{1{a_r_inv[7]}},a_r_inv,7'b0} : 0; //被乘数为无符号数时,特别处理
62
63 assign p_tmp = a_shift0 + a_shift1 + a_shift2 + a_shift3 + a_shift4
64 + a_shift5 + a_shift6 + a_shift7;
65
66 always@(posedge clk)
67 if(!rst_n)
68 begin
69 //p <= 16'd0;
70 p <= 15'd0;
71 end
72 else
73 begin
74 //p <= p_tmp[15:0];
75 p <= p_tmp[14:0];
76 end
77
78 endmodule
testbench:
1 module mult_shift_add_tb;
2
3 // Inputs
4 reg rst_n;
5 reg clk;
6 reg [7:0] a;
7 reg [7:0] b;
8
9 // Outputs
10 wire [14:0] p;
11
12 // Instantiate the Unit Under Test (UUT)
13 mult_shift_add uut (
14 .rst_n(rst_n),
15 .clk(clk),
16 .a(a),
17 .b(b),
18 .p(p)
19 );
20
21 parameter CLK_PERIOD = 10;
22
23 initial begin
24 rst_n = 0;
25 clk = 0;
26
27 #100;
28 rst_n = 1;
29 end
30
31 always #(CLK_PERIOD/2) clk = ~clk;
32
33 always@(posedge clk)
34 if(!rst_n)
35 begin
36 a = 8'd0;
37 b = 8'd0;
38 end
39 else
40 begin
41 a = a + 1;
42 b = b - 1;
43 end
44
45 endmodule
ISIM仿真结果:
- 移位相加乘法器树:
将
assign p_tmp = a_shift0 + a_shift1 + a_shift2 + a_shift3 + a_shift4 + a_shift5 + a_shift6 + a_shift7;
换为:
assign sum_01 = a_shift0 + a_shift1;
assign sum_23 = a_shift2 + a_shift3;
assign sum_45 = a_shift4 + a_shift5;
assign sum_67 = a_shift6 + a_shift7;
assign sum_0123 = sum_01 + sum_23;
assign sum_4567 = sum_45 + sum_67;
assign p_tmp = sum_0123 + sum_4567;
就成为乘法器树。
原理是通过切断关键路径,提高电路的运行频率。
- LUT乘法,下面的代码利用2bit的LUT实现4bit无符号数的乘法。
代码:
1 module mult_lut(rst_n,
2 clk,
3 a,
4 b,
5 p
6 );
7
8 parameter DATA_SIZE = 4;
9
10 input rst_n;
11 input clk;
12 input [DATA_SIZE - 1 : 0] a;
13 input [DATA_SIZE - 1 : 0] b;
14
15 output [2*DATA_SIZE - 1 : 0] p;
16
17 //输入数据打一个时钟节拍
18 reg [DATA_SIZE - 1 : 0] a_r;
19 reg [DATA_SIZE - 1 : 0] b_r;
20
21 //输入数据拆半的乘积
22
23 wire [DATA_SIZE - 1 : 0] p_tmp00;
24 wire [DATA_SIZE - 1 : 0] p_tmp01;
25
26 wire [DATA_SIZE - 1 : 0] p_tmp10;
27 wire [DATA_SIZE - 1 : 0] p_tmp11;
28
29 //reg [2*DATA_SIZE - 1 : 0] sum01;
30 //reg [2*DATA_SIZE - 1 : 0] sum23;
31
32 wire [2*DATA_SIZE - 1 : 0] p_tmp;
33 reg [2*DATA_SIZE - 1 : 0] p;
34
35 //输入数据打一个时钟节拍
36 always@(posedge clk)
37 if(!rst_n)
38 begin
39 a_r <= 4'd0;
40 b_r <= 4'd0;
41 end
42 else
43 begin
44 a_r <= a;
45 b_r <= b;
46 end
47
48 mult_lut_2bit u0_mult_lut_2bit (
49 .rst_n(rst_n),
50 .clk(clk),
51 .a(a_r[1:0]),
52 .b(b_r[1:0]),
53 .p(p_tmp00)
54 );
55
56 mult_lut_2bit u1_mult_lut_2bit (
57 .rst_n(rst_n),
58 .clk(clk),
59 .a(a_r[1:0]),
60 .b(b_r[3:2]),
61 .p(p_tmp01)
62 );
63
64 mult_lut_2bit u2_mult_lut_2bit (
65 .rst_n(rst_n),
66 .clk(clk),
67 .a(a_r[3:2]),
68 .b(b_r[1:0]),
69 .p(p_tmp10)
70 );
71
72 mult_lut_2bit u3_mult_lut_2bit (
73 .rst_n(rst_n),
74 .clk(clk),
75 .a(a_r[3:2]),
76 .b(b_r[3:2]),
77 .p(p_tmp11)
78 );
79
80 //assign p_tmp = p_tmp00 + p_tmp01<<2 + p_tmp10<<2 + p_tmp11<<4; //不能直接用移位操作符实现移位
81 assign p_tmp = p_tmp00 + {p_tmp01,2'b00} + {p_tmp10,2'b00} + {p_tmp11,4'b00};
82 //assign sum01 = p_tmp00 + p_tmp01<<2;
83 //assign sum23 = p_tmp10<<2 + p_tmp11<<4;
84
85 //assign p_tmp = sum01 + sum23;
86
87 always@(posedge clk)
88 if(!rst_n)
89 begin
90 p <= 8'd0;
91 end
92 else
93 begin
94 p <= p_tmp;
95 end
96
97 endmodule
2bitLUT乘法器:
1 module mult_lut_2bit(rst_n,
2 clk,
3 a,
4 b,
5 p
6 );
7
8 parameter DATA_SIZE = 2;
9
10 input rst_n;
11 input clk;
12 input [DATA_SIZE - 1 : 0] a;
13 input [DATA_SIZE - 1 : 0] b;
14
15 output [2*DATA_SIZE - 1 : 0] p;
16
17 //输入数据打一个时钟节拍
18 reg [DATA_SIZE - 1 : 0] a_r;
19 reg [DATA_SIZE - 1 : 0] b_r;
20
21 //输出数据打一个时钟节拍
22 reg [2*DATA_SIZE - 1 : 0] p_tmp;
23 reg [2*DATA_SIZE - 1 : 0] p;
24
25 //输入数据打一个时钟节拍
26 always@(posedge clk)
27 if(!rst_n)
28 begin
29 a_r <= 8'd0;
30 b_r <= 8'd0;
31 end
32 else
33 begin
34 a_r <= a;
35 b_r <= b;
36 end
37
38 always@(*)
39 begin
40 case({a_r,b_r})
41 4'b0000 : p_tmp = 4'b0000;
42 4'b0001 : p_tmp = 4'b0000;
43 4'b0010 : p_tmp = 4'b0000;
44 4'b0011 : p_tmp = 4'b0000;
45 4'b0100 : p_tmp = 4'b0000;
46 4'b0101 : p_tmp = 4'b0001;
47 4'b0110 : p_tmp = 4'b0010;
48 4'b0111 : p_tmp = 4'b0011;
49
50 4'b1000 : p_tmp = 4'b0000;
51 4'b1001 : p_tmp = 4'b0010;
52 4'b1010 : p_tmp = 4'b0100;
53 4'b1011 : p_tmp = 4'b0110;
54 4'b1100 : p_tmp = 4'b0000;
55 4'b1101 : p_tmp = 4'b0011;
56 4'b1110 : p_tmp = 4'b0110;
57 4'b1111 : p_tmp = 4'b1001;
58 endcase
59 end
60
61 always@(posedge clk)
62 if(!rst_n)
63 begin
64 p <= 4'd0;
65 end
66 else
67 begin
68 p <= p_tmp[3:0];
69 end
70
71 endmodule
仿真结果与并行乘法一致。
上面的LUT乘法器求p_tmp的组合逻辑时延比较大,可以通过加入寄存器的方法进行拆分,将
assign p_tmp = p_tmp00 + {p_tmp01,2'b00} + {p_tmp10,2'b00} + {p_tmp11,4'b00};
替换为:
always@(posedge clk)
if(!rst_n)
begin
sum01 <= 8'd0;
sum23 <= 8'd0;
end
else
begin
sum01 <= p_tmp00 + {p_tmp01,2'b00};
sum23 <= {p_tmp10,2'b00} + {p_tmp11,4'b00};
end
assign p_tmp = sum01 + sum23;
这样就分割了组合逻辑,切断关键路径,从而提高电路的运行速度。虽然加入寄存器,对中间结果缓存,使得乘法器的输出对于输入的延时增加,但是提高了电路的整体运行频率,这是更重要的。
如下:
1 module mult_lut_reg(rst_n,
2 clk,
3 a,
4 b,
5 p
6 );
7
8 parameter DATA_SIZE = 4;
9
10 input rst_n;
11 input clk;
12 input [DATA_SIZE - 1 : 0] a;
13 input [DATA_SIZE - 1 : 0] b;
14
15 output [2*DATA_SIZE - 1 : 0] p;
16
17 //输入数据打一个时钟节拍
18 reg [DATA_SIZE - 1 : 0] a_r;
19 reg [DATA_SIZE - 1 : 0] b_r;
20
21 //输入数据拆半的乘积
22
23 wire [DATA_SIZE - 1 : 0] p_tmp00;
24 wire [DATA_SIZE - 1 : 0] p_tmp01;
25
26 wire [DATA_SIZE - 1 : 0] p_tmp10;
27 wire [DATA_SIZE - 1 : 0] p_tmp11;
28
29 reg [2*DATA_SIZE - 1 : 0] sum01;
30 reg [2*DATA_SIZE - 1 : 0] sum23;
31
32 wire [2*DATA_SIZE - 1 : 0] p_tmp;
33 reg [2*DATA_SIZE - 1 : 0] p;
34
35 //输入数据打一个时钟节拍
36 always@(posedge clk)
37 if(!rst_n)
38 begin
39 a_r <= 4'd0;
40 b_r <= 4'd0;
41 end
42 else
43 begin
44 a_r <= a;
45 b_r <= b;
46 end
47
48 mult_lut_2bit u0_mult_lut_2bit (
49 .rst_n(rst_n),
50 .clk(clk),
51 .a(a_r[1:0]),
52 .b(b_r[1:0]),
53 .p(p_tmp00)
54 );
55
56 mult_lut_2bit u1_mult_lut_2bit (
57 .rst_n(rst_n),
58 .clk(clk),
59 .a(a_r[1:0]),
60 .b(b_r[3:2]),
61 .p(p_tmp01)
62 );
63
64 mult_lut_2bit u2_mult_lut_2bit (
65 .rst_n(rst_n),
66 .clk(clk),
67 .a(a_r[3:2]),
68 .b(b_r[1:0]),
69 .p(p_tmp10)
70 );
71
72 mult_lut_2bit u3_mult_lut_2bit (
73 .rst_n(rst_n),
74 .clk(clk),
75 .a(a_r[3:2]),
76 .b(b_r[3:2]),
77 .p(p_tmp11)
78 );
79
80 always@(posedge clk)
81 if(!rst_n)
82 begin
83 sum01 <= 8'd0;
84 sum23 <= 8'd0;
85 end
86 else
87 begin
88 sum01 <= p_tmp00 + {p_tmp01,2'b00};
89 sum23 <= {p_tmp10,2'b00} + {p_tmp11,4'b00};
90 end
91
92 assign p_tmp = sum01 + sum23;
93
94 always@(posedge clk)
95 if(!rst_n)
96 begin
97 p <= 8'd0;
98 end
99 else
100 begin
101 p <= p_tmp;
102 end
103
104 endmodule