ConvNetJS源代码解析第二篇

时间:2021-01-18 09:16:51


上一篇主要介绍了本次代码的数据结构Vol。第二篇将紧接着第一篇的内容介绍Layer(convnet_layers_dotproducts.js;convnet_layers_dropout.js;convnet_layers_input.js;convnet_layers_loss.js;

convnet_layers_nonlinearities.js;convnet_layers_normalization.js;convnet_layers_pool.js)

在进行源代码的正式分析之前,我们有必要跑一遍完整的使用作者提供的API,跑一个神经网络的分类任务。这个任务可以让我们对API的功能有一个整体的理解,以便于对于代码的功能的宏观把握。

<html>
<Script src="C:\Users\Administrator\Desktop\JS\convnetjs\convnetjs-master\build\convnet.js"></Script>
<head> <Script Language="JavaScript">//JavaScriptAppearshere.
var layer_defs = [];
// input layer of size 1x1x2 (all volumes are 3D)
layer_defs.push({type:'input', out_sx:1, out_sy:1, out_depth:2});
// some fully connected layers
layer_defs.push({type:'fc', num_neurons:20, activation:'relu'});
layer_defs.push({type:'fc', num_neurons:20, activation:'relu'});
// a softmax classifier predicting probabilities for two classes: 0,1
layer_defs.push({type:'softmax', num_classes:2});

// create a net out of it
var net = new convnetjs.Net();
net.makeLayers(layer_defs);

// the network always works on Vol() elements. These are essentially
// simple wrappers around lists, but also contain gradients and dimensions
// line below will create a 1x1x2 volume and fill it with 0.5 and -1.3
var x = new convnetjs.Vol([0.5, -1.3]);
alert(x);
var probability_volume = net.forward(x);
alert('probability that x is class 0: ' + probability_volume.w[0]);

var trainer = new convnetjs.Trainer(net, {learning_rate:0.01, l2_decay:0.001});
trainer.train(x, 0);
var probability_volume2 = net.forward(x);
alert('probability that x is class 0: ' + probability_volume2.w[0]);
</Script>
</Head>
</Html>
以上代码中的第二行“C:\Users\Administrator\Desktop\JS\convnetjs\convnetjs-master\build\convnet.js“中文件convnet.js是按照作者Github主页上编译之后的产生的文件,其实就是把src文件中代码整理在一个文件中。上面的路径是我的本地路径。因此运行这个程序首先要生成convnet.js文件,(要么自己编译,要么自己动手复制src,要么去github上下装)。然后将上面的代码复制,保存为html文件,使用IE浏览器打开就可以运行了。代码的意思是首先建立array类型变量layer_defs,然后使用push方法,将
{type:'fc', num_neurons:20, activation:'relu'}
这种形式的变量添加进去array中。上面的layer部分定义了神经网络的结构,第一层是输入层,从中可知输入数据的维度为2.然后建立了两个全连接类型的隐藏层,隐藏节点分别为20,激活函数为relu.最后由于是要分类,因此最后一层的输出层是softmax,是对其二分类。

有了各个层的变量定义layer_defs。然后将其聚合在一起是通过

                                                         var net = new convnetjs.Net();
                                                         net.makeLayers(layer_defs);

将上面定义的layer层统一为一个神经网络Net.

紧接着,var x = new convnetjs.Vol([0.5, -1.3]); 建立了一个Vol类型的数据,[0.5,-1.3]是数据点。这里由于Vol是代码的基本数据结构。var probability_volume = net.forward(x);使用net的前向传播算法,给出数据点x的输出。使用alert('probability that x is class 0: ' + probability_volume.w[0]);是网络的输出,从引用可以看出probability_volume也是一个Vol类型的变量。引用probability_volume.w[0]是指数据点x分到第一类的概率。从这里继续验证作者此时的编码风格,使用Vol作为数据的基本结构,神经网络的不同层级的处理仅仅是一个“transformer”。随后

                                var trainer = new convnetjs.Trainer(net, {learning_rate:0.01, l2_decay:0.001});
                                 trainer.train(x, 0);

使用类Trainer定义了训练的相关参数,使用期方法train对数据点x,分类为0进行训练。再次调用

                                                        var probability_volume2 = net.forward(x);
                                                       alert('probability that x is class 0: ' + probability_volume2.w[0]);

应该可以发现,两次的输出概率应该变大。这次训练导致的结果。


2.5. convnet_layers_dotproducts.js

这是放在convnetjs.js文件中属于layer级别的第一个文件代码,其中定义了两个特殊的Layer,一个是ConvLayer卷积层,一个是FullyConnLayer全连接层。读者可能会问这两个 层定义有什么必要性吗?在正式进行convnet_layers_dotproducts.js分析之前,我们首先回答这个问题,即搞清楚在整个程序中使用Layer的上下游程序接口。上游接口就是在上面的整体代码说明中下面这种形式的数据结构:

                    {type:'fc', num_neurons:20, activation:'relu'}

下游程序接口隐藏在convnet_net.js的80-90之间,就是如下的截图。详尽的关于convnet_net.js的说明会在后面进行更新。在这个下游程序接口中,def变量就是{type:'fc', num_neurons:20, activation:'relu'}。下面这个程序会根据def中的type属性,选择调用不同的Layer生成器,对于这个def的type='fc',会调用global.FullyConnLayer(def).并将结果返回给net的一个属性layers.另外补充定义了def.in_sx,def.in_sy,def.in_depth的属性,记住这一点,这在后面会用到。有了这个认识,我们会在分析convnet_layers_dotproducts.js中更加得心应手。

ConvNetJS源代码解析第二篇

ConvNetJS源代码解析第二篇

图.convnet_net.js代码的部分截图


下面程序中为convnet_net.js的全部代码,其中中文注释是我自己加的,英文注释是作者加的。先分析文件的后一部分FullyConnLayer类的定义。代码中使用到了BP算法,不熟悉的可以参考维基Backpropagation.在分析ConvLayer类定义中需要对CNN的架构有一定的了解。可以参看维基。搞清楚什么是filters,stride,padding.


(function(global) {
"use strict";
var Vol = global.Vol; // convenience

// This file contains all layers that do dot products with input,
// but usually in a different connectivity pattern and weight sharing
// schemes:
// - FullyConn is fully connected dot products
// - ConvLayer does convolutions (so weight sharing spatially)
// putting them together in one file because they are very similar
var ConvLayer = function(opt) {
var opt = opt || {};

// required
this.out_depth = opt.filters;
this.sx = opt.sx; // filter size. Should be odd if possible, it's cleaner.
this.in_depth = opt.in_depth;
this.in_sx = opt.in_sx;
this.in_sy = opt.in_sy;

// optional
this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 1; // stride at which we apply filters to input volume
this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
// 以上定义了一些基本的参数,网络的架构。
// computed
// note we are doing floor, so if the strided convolution of the filter doesnt fit into the input
// volume exactly, the output volume will be trimmed and not contain the (incomplete) computed
// final application.
this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
this.layer_type = 'conv';

// initializations
var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
this.filters = [];
for(var i=0;i<this.out_depth;i++) { this.filters.push(new Vol(this.sx, this.sy, this.in_depth)); }
this.biases = new Vol(1, 1, this.out_depth, bias);
}
ConvLayer.prototype = {
forward: function(V, is_training) {
// optimized code by @mdda that achieves 2x speedup over previous version

this.in_act = V;
var A = new Vol(this.out_sx |0, this.out_sy |0, this.out_depth |0, 0.0);

var V_sx = V.sx |0;
var V_sy = V.sy |0;
var xy_stride = this.stride |0;

for(var d=0;d<this.out_depth;d++) {
var f = this.filters[d];
var x = -this.pad |0;
var y = -this.pad |0;
for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) { // xy_stride
x = -this.pad |0;
for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) { // xy_stride

// convolve centered at this particular location
var a = 0.0;
for(var fy=0;fy<f.sy;fy++) {
var oy = y+fy; // coordinates in the original input array coordinates
for(var fx=0;fx<f.sx;fx++) {
var ox = x+fx;
if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
for(var fd=0;fd<f.depth;fd++) {
// avoid function call overhead (x2) for efficiency, compromise modularity :(
a += f.w[((f.sx * fy)+fx)*f.depth+fd] * V.w[((V_sx * oy)+ox)*V.depth+fd];
}
}
}
}
a += this.biases.w[d];
A.set(ax, ay, d, a);
}
}
}
this.out_act = A;
return this.out_act;
},
backward: function() {

var V = this.in_act;
V.dw = global.zeros(V.w.length); // zero out gradient wrt bottom data, we're about to fill it

var V_sx = V.sx |0;
var V_sy = V.sy |0;
var xy_stride = this.stride |0;

for(var d=0;d<this.out_depth;d++) {
var f = this.filters[d];
var x = -this.pad |0;
var y = -this.pad |0;
for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) { // xy_stride
x = -this.pad |0;
for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) { // xy_stride

// convolve centered at this particular location
var chain_grad = this.out_act.get_grad(ax,ay,d); // gradient from above, from chain rule
for(var fy=0;fy<f.sy;fy++) {
var oy = y+fy; // coordinates in the original input array coordinates
for(var fx=0;fx<f.sx;fx++) {
var ox = x+fx;
if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
for(var fd=0;fd<f.depth;fd++) {
// avoid function call overhead (x2) for efficiency, compromise modularity :(
var ix1 = ((V_sx * oy)+ox)*V.depth+fd;
var ix2 = ((f.sx * fy)+fx)*f.depth+fd;
f.dw[ix2] += V.w[ix1]*chain_grad;
V.dw[ix1] += f.w[ix2]*chain_grad;
}
}
}
}
this.biases.dw[d] += chain_grad;
}
}
}
},
getParamsAndGrads: function() {
var response = [];
for(var i=0;i<this.out_depth;i++) {
response.push({params: this.filters[i].w, grads: this.filters[i].dw, l2_decay_mul: this.l2_decay_mul, l1_decay_mul: this.l1_decay_mul});
}
response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
return response;
},
toJSON: function() {
var json = {};
json.sx = this.sx; // filter size in x, y dims
json.sy = this.sy;
json.stride = this.stride;
json.in_depth = this.in_depth;
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.l1_decay_mul = this.l1_decay_mul;
json.l2_decay_mul = this.l2_decay_mul;
json.pad = this.pad;
json.filters = [];
for(var i=0;i<this.filters.length;i++) {
json.filters.push(this.filters[i].toJSON());
}
json.biases = this.biases.toJSON();
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.sx = json.sx; // filter size in x, y dims
this.sy = json.sy;
this.stride = json.stride;
this.in_depth = json.in_depth; // depth of input volume
this.filters = [];
this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
this.pad = typeof json.pad !== 'undefined' ? json.pad : 0;
for(var i=0;i<json.filters.length;i++) {
var v = new Vol(0,0,0,0);
v.fromJSON(json.filters[i]);
this.filters.push(v);
}
this.biases = new Vol(0,0,0,0);
this.biases.fromJSON(json.biases);
}
}
// opt时刻记住就是<span style="font-size:18px; background-color: rgb(240, 240, 240);">{type:'fc', num_neurons:20, activation:'relu'}。下面的分析将以此为例。</span>
var FullyConnLayer = function(opt) {
var opt = opt || {};

// required
// ok fine we will allow 'filters' as the word as well
this.out_depth = typeof opt.num_neurons !== 'undefined' ? opt.num_neurons : opt.filters;//条件操作符,out_depth=20.

// optional
this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;// 结果为0
this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;//条件操作符,结果为1

// computed
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;// 回看图一.
this.out_sx = 1;
this.out_sy = 1; //对于全连接层,out_sx,out_sy全部为1,这点我在上面已经说明。
this.layer_type = 'fc';

// initializations
var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;// 对于这个例子,结果为0
this.filters = [];
for(var i=0;i<this.out_depth ;i++) { this.filters.push(new Vol(1, 1, this.num_inputs)); }
this.biases = new Vol(1, 1, this.out_depth, bias); // 初始化 本次网络的权重和偏置。
}
// 下面开始定义FullyConnLayer的公共方法。forword,backword等
FullyConnLayer.prototype = {
forward: function(V, is_training) { // 输入V是Vol类型数据
this.in_act = V;
var A = new Vol(1, 1, this.out_depth, 0.0);// 输出也是一个Vol类型变量A
var Vw = V.w;
for(var i=0;i<this.out_depth;i++) { // 对于每一个下一层的神经元节点,循环
var a = 0.0;
var wi = this.filters[i].w; // 下面是信号传递,其中Vol.w是数据存储,也是权重存储的位置。
for(var d=0;d<this.num_inputs;d++) {
a += Vw[d] * wi[d]; // for efficiency use Vols directly for now
}
a += this.biases.w[i];
A.w[i] = a;
}
this.out_act = A; // 建立本层layer的输出值out_act
return this.out_act;// 返回对于输入V,经过本Layer传播以后变换Vol类型变量。
},// notice:上面并未涉及到激活函数,说明上面的传播都是未经过激活函数算子的,从作者的代码汇中推测in_act是上一层的输出,out_act是本层的网络的输出,而不是经过激活函数变换后的输出。这一点在下面的后向传播同样适用。
backward: function() { // 这个是后向传播,注意的是下面的仅仅是在一个层上面的传播。不熟悉的可以参考*。
var V = this.in_act;
V.dw = global.zeros(V.w.length); // zero out the gradient in input Vol

// compute gradient wrt weights and data
for(var i=0;i<this.out_depth;i++) {
var tfi = this.filters[i];
var chain_grad = this.out_act.dw[i];
for(var d=0;d<this.num_inputs;d++) {
V.dw[d] += tfi.w[d]*chain_grad; // grad wrt input data
tfi.dw[d] += V.w[d]*chain_grad; // grad wrt params
}// 注意这里的in_act,out_act。上面的公式可以通过chain rule推出。
this.biases.dw[i] += chain_grad;
}
},// 后面三个定义的方法都是为了读取写入
getParamsAndGrads: function() {
var response = [];
for(var i=0;i<this.out_depth;i++) {
response.push({params: this.filters[i].w, grads: this.filters[i].dw, l1_decay_mul: this.l1_decay_mul, l2_decay_mul: this.l2_decay_mul});
}
response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
return response;
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.num_inputs = this.num_inputs;
json.l1_decay_mul = this.l1_decay_mul;
json.l2_decay_mul = this.l2_decay_mul;
json.filters = [];
for(var i=0;i<this.filters.length;i++) {
json.filters.push(this.filters[i].toJSON());
}
json.biases = this.biases.toJSON();
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.num_inputs = json.num_inputs;
this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
this.filters = [];
for(var i=0;i<json.filters.length;i++) {
var v = new Vol(0,0,0,0);
v.fromJSON(json.filters[i]);
this.filters.push(v);
}
this.biases = new Vol(0,0,0,0);
this.biases.fromJSON(json.biases);
}
}

global.ConvLayer = ConvLayer;
global.FullyConnLayer = FullyConnLayer;

})(convnetjs);