前端编译原理 parser.js源码解读

前面已经介绍了一个jison的使用，在正常开发中其实已经够用下，下面主要是看了下parser.js代码解读下，作为一些了解。
下面以最简单的文法产生的parser做一些代码注释
下面是一些注释，标示了编译过程，能够了解jison产生的编译器有哪些处理
var a = (function() {

    var o = function(k, v, o, l) {

        for (o = o || {}, l = k.length; l--; o[k[l]] = v);

        return o;

    };

    var parser = {

        trace: function trace() {},

        //编译期共享对象

        yy: {},

        //标示符，$accept: 0, $end: 1，error: 2是默认的符号，E: 3, NAT: 4是分析文法获得的

        symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 },

        //终结符

        terminals_: { 2: 'error', 4: 'NAT' },

        //对应文法规则

        productions_: [

            0,  // e` => E $end

            [

                3, //规约成符号2（E）

                1 //堆栈移除一个状态即移除NAT，压入E

            ] // E => NAT

        ],

        //规约的时候执行的代码

        performAction: function anonymous(

            yytext,

            yyleng,

            yylineno,

            yy,

            yystate /* action[1] */,

            $$ /* vstack */,

            _$ /* lstack */

        ) {

            var $0 = $$.length - 1;

            switch (yystate) {

                case 1:

                    this.$ = $$[$0] * 1;

                    break;

            }

        },

        //移入规约自动机 lalr(1)分析表，每一值表示一个状态

        table: [

            /*状态0 I0*/{ 3: 1/**goto 遇到符号3(E),跳转到状态1 */, 4: [1, 2] /**移入，遇到符号4（NAT），移入进入状态2 */},

            /*状态1 I1*/{ 1: [3] /**接受 遇到符号1($end),结束程序，完成编译 */},

            /*状态2 I2*/{ 1: [2, 1] /**规约 遇到符号1($end),按照productions[1],进行规则 */ }

        ],

        defaultActions: {

            2: [2, 1] //状态2进行规则规约，不用向前看符号是什么(逻辑优化)

        },

        //错误提示

        parseError: function parseError(str, hash) {

            if (hash.recoverable) {

                this.trace(str);

            } else {

                var error = new Error(str);

                error.hash = hash;

                throw error;

            }

        },

        //编译 input为处理的字符串

        parse: function parse(input) {

            var self = this,

                stack = [0],

                tstack = [],

                vstack = [null],

                lstack = [],

                table = this.table,

                yytext = '',

                yylineno = 0,

                yyleng = 0,

                recovering = 0,

                TERROR = 2,

                EOF = 1;

            var args = lstack.slice.call(arguments, 1);

            var lexer = Object.create(this.lexer);

            var sharedState = { yy: {} };

            for (var k in this.yy) {

                if (Object.prototype.hasOwnProperty.call(this.yy, k)) {

                    sharedState.yy[k] = this.yy[k];

                }

            }

            lexer.setInput(input, sharedState.yy);

            sharedState.yy.lexer = lexer;

            sharedState.yy.parser = this;

            if (typeof lexer.yylloc === 'undefined') {

                lexer.yylloc = {};

            }

            var yyloc = lexer.yylloc;

            lstack.push(yyloc);

            //位置信息采用区间的形式显示

            var ranges = lexer.options && lexer.options.ranges;

            if (typeof sharedState.yy.parseError === 'function') {

                this.parseError = sharedState.yy.parseError;

            } else {

                this.parseError = Object.getPrototypeOf(this).parseError;

            }

            //规约的时候需要弹出堆栈

            function popStack(n) {

                //stack [0,标识符,状态,标识符,状态...],所以出栈是2倍

                stack.length = stack.length - 2 * n;

                vstack.length = vstack.length - n;

                lstack.length = lstack.length - n;

            }

            //词法分析,返回token即标示符

            var lex = function() {

                var token;

                token = lexer.lex() || EOF;

                if (typeof token !== 'number') {

                    token = self.symbols_[token] || token;

                }

                return token;

            };

            var symbol,

                preErrorSymbol,

                state,

                action,

                a,

                r,

                yyval = {},

                p,

                len,

                newState,

                expected;

            while (true) {

                state = stack[stack.length - 1];

                if (this.defaultActions[state]) {

                    action = this.defaultActions[state];

                } else {

                    if (symbol === null || typeof symbol === 'undefined') {

                        //通过正则获取下一个标示符

                        symbol = lex();

                    }

                    //action对应的动作

                    action = table[state] && table[state][symbol];

                }

                //动作错误，语法分析错误

                if (typeof action === 'undefined' || !action.length || !action[0]) {

                    var errStr = '';

                    expected = [];

                    for (p in table[state]) {

                        if (this.terminals_[p] && p > TERROR) {

                            expected.push("'" + this.terminals_[p] + "'");

                        }

                    }

                    if (lexer.showPosition) {

                        errStr =

                            'Parse error on line ' +

                            (yylineno + 1) +

                            ':\n' +

                            lexer.showPosition() +

                            '\nExpecting ' +

                            expected.join(', ') +

                            ", got '" +

                            (this.terminals_[symbol] || symbol) +

                            "'";

                    } else {

                        errStr =

                            'Parse error on line ' +

                            (yylineno + 1) +

                            ': Unexpected ' +

                            (symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'");

                    }

                    this.parseError(errStr, {

                        text: lexer.match,

                        token: this.terminals_[symbol] || symbol,

                        line: lexer.yylineno,

                        loc: yyloc,

                        expected: expected

                    });

                }

                //规约-规约冲突，移入规约冲突

                if (action[0] instanceof Array && action.length > 1) {

                    throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);

                }

                switch (action[0]) {

                    //移入

                    case 1:

                        stack.push(symbol);

                        vstack.push(lexer.yytext);

                        lstack.push(lexer.yylloc);

                        stack.push(action[1]);

                        symbol = null;

                        if (!preErrorSymbol) {

                            yyleng = lexer.yyleng;

                            yytext = lexer.yytext;

                            yylineno = lexer.yylineno;

                            yyloc = lexer.yylloc;

                            if (recovering > 0) {

                                recovering--;

                            }

                        } else {

                            symbol = preErrorSymbol;

                            preErrorSymbol = null;

                        }

                        break;

                    //规约

                    case 2:

                        len = this.productions_[action[1]][1];

                        yyval.$ = vstack[vstack.length - len];

                        yyval._$ = {

                            first_line: lstack[lstack.length - (len || 1)].first_line,

                            last_line: lstack[lstack.length - 1].last_line,

                            first_column: lstack[lstack.length - (len || 1)].first_column,

                            last_column: lstack[lstack.length - 1].last_column

                        };

                        if (ranges) {

                            yyval._$.range = [

                                lstack[lstack.length - (len || 1)].range[0],

                                lstack[lstack.length - 1].range[1]

                            ];

                        }

                        //执行文法定义的代码

                        r = this.performAction.apply(

                            yyval,

                            [yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args)

                        );

                        if (typeof r !== 'undefined') {

                            return r;

                        }

                        if (len) {

                            stack = stack.slice(0, -1 * len * 2);

                            vstack = vstack.slice(0, -1 * len);

                            lstack = lstack.slice(0, -1 * len);

                        }

                        stack.push(this.productions_[action[1]][0]);

                        vstack.push(yyval.$);

                        lstack.push(yyval._$);

                        newState = table[stack[stack.length - 2]][stack[stack.length - 1]];

                        stack.push(newState);

                        break;

                    //接受

                    case 3:

                        return true;

                }

            }

            return true;

        }

    };

    /* 词法分析 */

    var lexer = (function() {

        var lexer = {

            EOF: 1,

            parseError: function parseError(str, hash) {

                if (this.yy.parser) {

                    this.yy.parser.parseError(str, hash);

                } else {

                    throw new Error(str);

                }

            },

            // resets the lexer, sets new input

            setInput: function(input, yy) {

                this.yy = yy || this.yy || {};

                this._input = input;

                this._more = this._backtrack = this.done = false;

                this.yylineno = this.yyleng = 0;

                this.yytext = this.matched = this.match = '';

                this.conditionStack = ['INITIAL'];

                this.yylloc = {

                    first_line: 1,

                    first_column: 0,

                    last_line: 1,

                    last_column: 0

                };

                if (this.options.ranges) {

                    this.yylloc.range = [0, 0];

                }

                this.offset = 0;

                return this;

            },

            input: function() {

                var ch = this._input[0];

                this.yytext += ch;

                this.yyleng++;

                this.offset++;

                this.match += ch;

                this.matched += ch;

                var lines = ch.match(/(?:\r\n?|\n).*/g);

                if (lines) {

                    this.yylineno++;

                    this.yylloc.last_line++;

                } else {

                    this.yylloc.last_column++;

                }

                if (this.options.ranges) {

                    this.yylloc.range[1]++;

                }

                this._input = this._input.slice(1);

                return ch;

            },

            unput: function(ch) {

                var len = ch.length;

                var lines = ch.split(/(?:\r\n?|\n)/g);

                this._input = ch + this._input;

                this.yytext = this.yytext.substr(0, this.yytext.length - len);

                //this.yyleng -= len;

                this.offset -= len;

                var oldLines = this.match.split(/(?:\r\n?|\n)/g);

                this.match = this.match.substr(0, this.match.length - 1);

                this.matched = this.matched.substr(0, this.matched.length - 1);

                if (lines.length - 1) {

                    this.yylineno -= lines.length - 1;

                }

                var r = this.yylloc.range;

                this.yylloc = {

                    first_line: this.yylloc.first_line,

                    last_line: this.yylineno + 1,

                    first_column: this.yylloc.first_column,

                    last_column: lines

                        ? (lines.length === oldLines.length ? this.yylloc.first_column : 0) +

                          oldLines[oldLines.length - lines.length].length -

                          lines[0].length

                        : this.yylloc.first_column - len

                };

                if (this.options.ranges) {

                    this.yylloc.range = [r[0], r[0] + this.yyleng - len];

                }

                this.yyleng = this.yytext.length;

                return this;

            },

            more: function() {

                this._more = true;

                return this;

            },

            reject: function() {

                if (this.options.backtrack_lexer) {

                    this._backtrack = true;

                } else {

                    return this.parseError(

                        'Lexical error on line ' +

                            (this.yylineno + 1) +

                            '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' +

                            this.showPosition(),

                        {

                            text: '',

                            token: null,

                            line: this.yylineno

                        }

                    );

                }

                return this;

            },

            less: function(n) {

                this.unput(this.match.slice(n));

            },

            pastInput: function() {

                var past = this.matched.substr(0, this.matched.length - this.match.length);

                return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/\n/g, '');

            },

            upcomingInput: function() {

                var next = this.match;

                if (next.length < 20) {

                    next += this._input.substr(0, 20 - next.length);

                }

                return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/\n/g, '');

            },

            showPosition: function() {

                var pre = this.pastInput();

                var c = new Array(pre.length + 1).join('-');

                return pre + this.upcomingInput() + '\n' + c + '^';

            },

            test_match: function(match, indexed_rule) {

                var token, lines, backup;

                if (this.options.backtrack_lexer) {

                    // save context

                    backup = {

                        yylineno: this.yylineno,

                        yylloc: {

                            first_line: this.yylloc.first_line,

                            last_line: this.last_line,

                            first_column: this.yylloc.first_column,

                            last_column: this.yylloc.last_column

                        },

                        yytext: this.yytext,

                        match: this.match,

                        matches: this.matches,

                        matched: this.matched,

                        yyleng: this.yyleng,

                        offset: this.offset,

                        _more: this._more,

                        _input: this._input,

                        yy: this.yy,

                        conditionStack: this.conditionStack.slice(0),

                        done: this.done

                    };

                    if (this.options.ranges) {

                        backup.yylloc.range = this.yylloc.range.slice(0);

                    }

                }

                lines = match[0].match(/(?:\r\n?|\n).*/g);

                if (lines) {

                    this.yylineno += lines.length;

                }

                this.yylloc = {

                    first_line: this.yylloc.last_line,

                    last_line: this.yylineno + 1,

                    first_column: this.yylloc.last_column,

                    last_column: lines

                        ? lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length

                        : this.yylloc.last_column + match[0].length

                };

                this.yytext += match[0];

                this.match += match[0];

                this.matches = match;

                this.yyleng = this.yytext.length;

                if (this.options.ranges) {

                    this.yylloc.range = [this.offset, (this.offset += this.yyleng)];

                }

                this._more = false;

                this._backtrack = false;

                this._input = this._input.slice(match[0].length);

                this.matched += match[0];

                token = this.performAction.call(

                    this,

                    this.yy,

                    this,

                    indexed_rule,

                    this.conditionStack[this.conditionStack.length - 1]

                );

                if (this.done && this._input) {

                    this.done = false;

                }

                if (token) {

                    return token;

                } else if (this._backtrack) {

                    for (var k in backup) {

                        this[k] = backup[k];

                    }

                    return false;

                }

                return false;

            },

            next: function() {

                if (this.done) {

                    return this.EOF;

                }

                if (!this._input) {

                    this.done = true;

                }

                var token, match, tempMatch, index;

                if (!this._more) {

                    this.yytext = '';

                    this.match = '';

                }

                //获取词法规则

                //没看出来conditionStack有什么用，可能跟我的文法定义比较简单，没有使用一些内置的begin，pushState, popState之类的有点关系

                var rules = this._currentRules();

                for (var i = 0; i < rules.length; i++) {

                    tempMatch = this._input.match(this.rules[rules[i]]);

                    if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {

                        match = tempMatch;

                        index = i;

                        //backtrack_lexer，会先尝试返回token,如果想尝试下面的规则的话，通过reject()方法

                        if (this.options.backtrack_lexer) {

                            token = this.test_match(tempMatch, rules[i]);

                            if (token !== false) {

                                return token;

                            } else if (this._backtrack) {

                                match = false;

                                continue;

                            } else {

                                return false;

                            }

                        //flex 设置true 会匹配符合正则多的文本

                        //flex 设置false 根据文法顺序返回结果

                        } else if (!this.options.flex) {

                            break;

                        }

                    }

                }

                if (match) {

                    token = this.test_match(match, rules[index]);

                    if (token !== false) {

                        return token;

                    }

                    return false;

                }

                if (this._input === '') {

                    return this.EOF;

                } else {

                    //输入字符串不存在词法对应

                    return this.parseError(

                        'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(),

                        {

                            text: '',

                            token: null,

                            line: this.yylineno

                        }

                    );

                }

            },

            lex: function lex() {

                var r = this.next();

                if (r) {

                    return r;

                } else {

                    return this.lex();

                }

            },

            begin: function begin(condition) {

                this.conditionStack.push(condition);

            },

            popState: function popState() {

                var n = this.conditionStack.length - 1;

                if (n > 0) {

                    return this.conditionStack.pop();

                } else {

                    return this.conditionStack[0];

                }

            },

            _currentRules: function _currentRules() {

                if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {

                    return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;

                } else {

                    return this.conditions['INITIAL'].rules;

                }

            },

            topState: function topState(n) {

                n = this.conditionStack.length - 1 - Math.abs(n || 0);

                if (n >= 0) {

                    return this.conditionStack[n];

                } else {

                    return 'INITIAL';

                }

            },

            pushState: function pushState(condition) {

                this.begin(condition);

            },

            stateStackSize: function stateStackSize() {

                return this.conditionStack.length;

            },

            options: {},

            performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {

                var YYSTATE = YY_START;

                switch ($avoiding_name_collisions) {

                    case 0 /* skip whitespace */:

                        break;

                    case 1:

                        return 4;

                        break;

                    case 2:

                        return '+';

                        break;

                }

            },

            //词法中的规则

            rules: [

                /^(?:\s+)/,

                /^(?:[0-9]+)/,

                /^(?:\+)/

            ],

            conditions: {

                INITIAL: { rules: [0, 1, 2], inclusive: true }

            }

        };

        return lexer;

    })();

    parser.lexer = lexer;

    function Parser() {

        this.yy = {};

    }

    Parser.prototype = parser;

    parser.Parser = Parser;

    return new Parser();

})();
秒客网

前端编译原理 parser.js源码解读

相关文章