前端编译原理 parser.js源码解读

时间:2021-05-19 09:04:52

前面已经介绍了一个jison的使用,在正常开发中其实已经够用下,下面主要是看了下parser.js代码解读下,作为一些了解。

下面以最简单的文法产生的parser做一些代码注释

前端编译原理  parser.js源码解读

下面是一些注释,标示了编译过程,能够了解jison产生的编译器有哪些处理

var a = (function() {
var o = function(k, v, o, l) {
for (o = o || {}, l = k.length; l--; o[k[l]] = v);
return o;
};
var parser = {
trace: function trace() {},
//编译期共享对象
yy: {},
//标示符,$accept: 0, $end: 1,error: 2是默认的符号,E: 3, NAT: 4是分析文法获得的
symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 },
//终结符
terminals_: { 2: 'error', 4: 'NAT' },
//对应文法规则
productions_: [
0, // e` => E $end
[
3, //规约成符号2(E)
1 //堆栈移除一个状态即移除NAT,压入E
] // E => NAT
],
//规约的时候执行的代码
performAction: function anonymous(
yytext,
yyleng,
yylineno,
yy,
yystate /* action[1] */,
$$ /* vstack */,
_$ /* lstack */
) {
var $0 = $$.length - 1;
switch (yystate) {
case 1:
this.$ = $$[$0] * 1;
break;
}
},
//移入规约自动机 lalr(1)分析表,每一值表示一个状态
table: [
/*状态0 I0*/{ 3: 1/**goto 遇到符号3(E),跳转到状态1 */, 4: [1, 2] /**移入,遇到符号4(NAT),移入进入状态2 */},
/*状态1 I1*/{ 1: [3] /**接受 遇到符号1($end),结束程序,完成编译 */},
/*状态2 I2*/{ 1: [2, 1] /**规约 遇到符号1($end),按照productions[1],进行规则 */ }
],
defaultActions: {
2: [2, 1] //状态2进行规则规约,不用向前看符号是什么(逻辑优化)
},
//错误提示
parseError: function parseError(str, hash) {
if (hash.recoverable) {
this.trace(str);
} else {
var error = new Error(str);
error.hash = hash;
throw error;
}
},
//编译 input为处理的字符串
parse: function parse(input) {
var self = this,
stack = [0],
tstack = [],
vstack = [null],
lstack = [],
table = this.table,
yytext = '',
yylineno = 0,
yyleng = 0,
recovering = 0,
TERROR = 2,
EOF = 1;
var args = lstack.slice.call(arguments, 1);
var lexer = Object.create(this.lexer);
var sharedState = { yy: {} };
for (var k in this.yy) {
if (Object.prototype.hasOwnProperty.call(this.yy, k)) {
sharedState.yy[k] = this.yy[k];
}
}
lexer.setInput(input, sharedState.yy);
sharedState.yy.lexer = lexer;
sharedState.yy.parser = this;
if (typeof lexer.yylloc === 'undefined') {
lexer.yylloc = {};
}
var yyloc = lexer.yylloc;
lstack.push(yyloc);
//位置信息采用区间的形式显示
var ranges = lexer.options && lexer.options.ranges;
if (typeof sharedState.yy.parseError === 'function') {
this.parseError = sharedState.yy.parseError;
} else {
this.parseError = Object.getPrototypeOf(this).parseError;
}
//规约的时候需要弹出堆栈
function popStack(n) {
//stack [0,标识符,状态,标识符,状态...],所以出栈是2倍
stack.length = stack.length - 2 * n;
vstack.length = vstack.length - n;
lstack.length = lstack.length - n;
}
//词法分析,返回token即标示符
var lex = function() {
var token;
token = lexer.lex() || EOF;
if (typeof token !== 'number') {
token = self.symbols_[token] || token;
}
return token;
};
var symbol,
preErrorSymbol,
state,
action,
a,
r,
yyval = {},
p,
len,
newState,
expected;
while (true) {
state = stack[stack.length - 1];
if (this.defaultActions[state]) {
action = this.defaultActions[state];
} else {
if (symbol === null || typeof symbol === 'undefined') {
//通过正则获取下一个标示符
symbol = lex();
}
//action对应的动作
action = table[state] && table[state][symbol];
}
//动作错误,语法分析错误
if (typeof action === 'undefined' || !action.length || !action[0]) {
var errStr = '';
expected = [];
for (p in table[state]) {
if (this.terminals_[p] && p > TERROR) {
expected.push("'" + this.terminals_[p] + "'");
}
}
if (lexer.showPosition) {
errStr =
'Parse error on line ' +
(yylineno + 1) +
':\n' +
lexer.showPosition() +
'\nExpecting ' +
expected.join(', ') +
", got '" +
(this.terminals_[symbol] || symbol) +
"'";
} else {
errStr =
'Parse error on line ' +
(yylineno + 1) +
': Unexpected ' +
(symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'");
}
this.parseError(errStr, {
text: lexer.match,
token: this.terminals_[symbol] || symbol,
line: lexer.yylineno,
loc: yyloc,
expected: expected
});
}
//规约-规约冲突,移入规约冲突
if (action[0] instanceof Array && action.length > 1) {
throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
}
switch (action[0]) {
//移入
case 1:
stack.push(symbol);
vstack.push(lexer.yytext);
lstack.push(lexer.yylloc);
stack.push(action[1]);
symbol = null;
if (!preErrorSymbol) {
yyleng = lexer.yyleng;
yytext = lexer.yytext;
yylineno = lexer.yylineno;
yyloc = lexer.yylloc;
if (recovering > 0) {
recovering--;
}
} else {
symbol = preErrorSymbol;
preErrorSymbol = null;
}
break;
//规约
case 2:
len = this.productions_[action[1]][1];
yyval.$ = vstack[vstack.length - len];
yyval._$ = {
first_line: lstack[lstack.length - (len || 1)].first_line,
last_line: lstack[lstack.length - 1].last_line,
first_column: lstack[lstack.length - (len || 1)].first_column,
last_column: lstack[lstack.length - 1].last_column
};
if (ranges) {
yyval._$.range = [
lstack[lstack.length - (len || 1)].range[0],
lstack[lstack.length - 1].range[1]
];
}
//执行文法定义的代码
r = this.performAction.apply(
yyval,
[yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args)
);
if (typeof r !== 'undefined') {
return r;
}
if (len) {
stack = stack.slice(0, -1 * len * 2);
vstack = vstack.slice(0, -1 * len);
lstack = lstack.slice(0, -1 * len);
}
stack.push(this.productions_[action[1]][0]);
vstack.push(yyval.$);
lstack.push(yyval._$);
newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
stack.push(newState);
break;
//接受
case 3:
return true;
}
}
return true;
}
};
/* 词法分析 */
var lexer = (function() {
var lexer = {
EOF: 1, parseError: function parseError(str, hash) {
if (this.yy.parser) {
this.yy.parser.parseError(str, hash);
} else {
throw new Error(str);
}
}, // resets the lexer, sets new input
setInput: function(input, yy) {
this.yy = yy || this.yy || {};
this._input = input;
this._more = this._backtrack = this.done = false;
this.yylineno = this.yyleng = 0;
this.yytext = this.matched = this.match = '';
this.conditionStack = ['INITIAL'];
this.yylloc = {
first_line: 1,
first_column: 0,
last_line: 1,
last_column: 0
};
if (this.options.ranges) {
this.yylloc.range = [0, 0];
}
this.offset = 0;
return this;
},
input: function() {
var ch = this._input[0];
this.yytext += ch;
this.yyleng++;
this.offset++;
this.match += ch;
this.matched += ch;
var lines = ch.match(/(?:\r\n?|\n).*/g);
if (lines) {
this.yylineno++;
this.yylloc.last_line++;
} else {
this.yylloc.last_column++;
}
if (this.options.ranges) {
this.yylloc.range[1]++;
} this._input = this._input.slice(1);
return ch;
},
unput: function(ch) {
var len = ch.length;
var lines = ch.split(/(?:\r\n?|\n)/g); this._input = ch + this._input;
this.yytext = this.yytext.substr(0, this.yytext.length - len);
//this.yyleng -= len;
this.offset -= len;
var oldLines = this.match.split(/(?:\r\n?|\n)/g);
this.match = this.match.substr(0, this.match.length - 1);
this.matched = this.matched.substr(0, this.matched.length - 1); if (lines.length - 1) {
this.yylineno -= lines.length - 1;
}
var r = this.yylloc.range; this.yylloc = {
first_line: this.yylloc.first_line,
last_line: this.yylineno + 1,
first_column: this.yylloc.first_column,
last_column: lines
? (lines.length === oldLines.length ? this.yylloc.first_column : 0) +
oldLines[oldLines.length - lines.length].length -
lines[0].length
: this.yylloc.first_column - len
}; if (this.options.ranges) {
this.yylloc.range = [r[0], r[0] + this.yyleng - len];
}
this.yyleng = this.yytext.length;
return this;
},
more: function() {
this._more = true;
return this;
},
reject: function() {
if (this.options.backtrack_lexer) {
this._backtrack = true;
} else {
return this.parseError(
'Lexical error on line ' +
(this.yylineno + 1) +
'. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' +
this.showPosition(),
{
text: '',
token: null,
line: this.yylineno
}
);
}
return this;
},
less: function(n) {
this.unput(this.match.slice(n));
},
pastInput: function() {
var past = this.matched.substr(0, this.matched.length - this.match.length);
return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/\n/g, '');
},
upcomingInput: function() {
var next = this.match;
if (next.length < 20) {
next += this._input.substr(0, 20 - next.length);
}
return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/\n/g, '');
},
showPosition: function() {
var pre = this.pastInput();
var c = new Array(pre.length + 1).join('-');
return pre + this.upcomingInput() + '\n' + c + '^';
},
test_match: function(match, indexed_rule) {
var token, lines, backup; if (this.options.backtrack_lexer) {
// save context
backup = {
yylineno: this.yylineno,
yylloc: {
first_line: this.yylloc.first_line,
last_line: this.last_line,
first_column: this.yylloc.first_column,
last_column: this.yylloc.last_column
},
yytext: this.yytext,
match: this.match,
matches: this.matches,
matched: this.matched,
yyleng: this.yyleng,
offset: this.offset,
_more: this._more,
_input: this._input,
yy: this.yy,
conditionStack: this.conditionStack.slice(0),
done: this.done
};
if (this.options.ranges) {
backup.yylloc.range = this.yylloc.range.slice(0);
}
} lines = match[0].match(/(?:\r\n?|\n).*/g);
if (lines) {
this.yylineno += lines.length;
}
this.yylloc = {
first_line: this.yylloc.last_line,
last_line: this.yylineno + 1,
first_column: this.yylloc.last_column,
last_column: lines
? lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length
: this.yylloc.last_column + match[0].length
};
this.yytext += match[0];
this.match += match[0];
this.matches = match;
this.yyleng = this.yytext.length;
if (this.options.ranges) {
this.yylloc.range = [this.offset, (this.offset += this.yyleng)];
}
this._more = false;
this._backtrack = false;
this._input = this._input.slice(match[0].length);
this.matched += match[0];
token = this.performAction.call(
this,
this.yy,
this,
indexed_rule,
this.conditionStack[this.conditionStack.length - 1]
);
if (this.done && this._input) {
this.done = false;
}
if (token) {
return token;
} else if (this._backtrack) {
for (var k in backup) {
this[k] = backup[k];
}
return false;
}
return false;
},
next: function() {
if (this.done) {
return this.EOF;
}
if (!this._input) {
this.done = true;
} var token, match, tempMatch, index;
if (!this._more) {
this.yytext = '';
this.match = '';
}
//获取词法规则
//没看出来conditionStack有什么用,可能跟我的文法定义比较简单,没有使用一些内置的begin,pushState, popState之类的有点关系
var rules = this._currentRules();
for (var i = 0; i < rules.length; i++) {
tempMatch = this._input.match(this.rules[rules[i]]);
if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
match = tempMatch;
index = i;
//backtrack_lexer,会先尝试返回token,如果想尝试下面的规则的话,通过reject()方法
if (this.options.backtrack_lexer) {
token = this.test_match(tempMatch, rules[i]);
if (token !== false) {
return token;
} else if (this._backtrack) {
match = false;
continue;
} else {
return false;
}
//flex 设置true 会匹配符合正则多的文本
//flex 设置false 根据文法顺序返回结果
} else if (!this.options.flex) {
break;
}
}
}
if (match) {
token = this.test_match(match, rules[index]);
if (token !== false) {
return token;
}
return false;
}
if (this._input === '') {
return this.EOF;
} else {
//输入字符串不存在词法对应
return this.parseError(
'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(),
{
text: '',
token: null,
line: this.yylineno
}
);
}
},
lex: function lex() {
var r = this.next();
if (r) {
return r;
} else {
return this.lex();
}
},
begin: function begin(condition) {
this.conditionStack.push(condition);
},
popState: function popState() {
var n = this.conditionStack.length - 1;
if (n > 0) {
return this.conditionStack.pop();
} else {
return this.conditionStack[0];
}
},
_currentRules: function _currentRules() {
if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
} else {
return this.conditions['INITIAL'].rules;
}
},
topState: function topState(n) {
n = this.conditionStack.length - 1 - Math.abs(n || 0);
if (n >= 0) {
return this.conditionStack[n];
} else {
return 'INITIAL';
}
},
pushState: function pushState(condition) {
this.begin(condition);
},
stateStackSize: function stateStackSize() {
return this.conditionStack.length;
},
options: {},
performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
var YYSTATE = YY_START;
switch ($avoiding_name_collisions) {
case 0 /* skip whitespace */:
break;
case 1:
return 4;
break;
case 2:
return '+';
break;
}
},
//词法中的规则
rules: [
/^(?:\s+)/,
/^(?:[0-9]+)/,
/^(?:\+)/
],
conditions: {
INITIAL: { rules: [0, 1, 2], inclusive: true }
}
};
return lexer;
})();
parser.lexer = lexer;
function Parser() {
this.yy = {};
}
Parser.prototype = parser;
parser.Parser = Parser;
return new Parser();
})();