基于Python编写个语法解析器

时间:2025-04-06 07:14:03

"""

语法解析器

"""

class Lexer:

    def __init__(self, in_fn, text):

        """

        :param in_fn: 从哪里输入的文本(文本所在文件,标准输入,输出也是一个文件)

        其实就是文件名~~~

        :param text: 待解析文本

        """

        self.in_fn = in_fn

        self.text = text

        self.pos = Position(-1, 0, -1)

        self.cur_char = None

        self.advance()

        #基本的符号处理

        self.char_pro_base = {

            '+':TT_PLUS,

            '-':TT_MINUS,

            '*':TT_MUL,

            '/':TT_DIV,

            '^':TT_POWER,

            '(':TT_LPAREN,

            ')':TT_RPAREN

        }

    def advance(self):

        self.(self.cur_char)

        self.cur_char = self.text[self.] if self. < len(self.text) else None

    def __char_process(self,tokens,TT):

        """

        处理基本字符的方法,

        添加Token,并且移动字符指针

        :return:

        """

        (Token(TT))

        self.advance()

    def make_tokens(self):

        """

        将文本当中的字符添加到语法解析器当中,将符合语法规范的内容,封装为Token,

        (就像Spring将对象信息再封装为Wapper一样,方便后续进行操作。)

        :return:

        """

        tokens = []

        while self.cur_char != None:

            if self.cur_char in ' \t':

                #制表符(空格),没有意义,往前移动

                self.advance()

            elif self.cur_char in DIGITS:

                #如果是数字,自动往前搜索,并且将数字进行添加,并且判断类型,

                #数字比较特殊,不是一个字符一个字符参与的(后面还要定义关键字也是类似的)

                (self.make_number())

            else:

                TT = self.char_pro_base.get(self.cur_char)

                if(TT):

                    self.__char_process(tokens,TT)

                else:

                    char = self.cur_char

                    self.advance()

                    return [], IllegalCharError(self.,self.in_fn, "'" + char + "'")

        return tokens, None

    def make_number(self):

        num_str = ''

        dot_count = 0

        while self.cur_char != None and self.cur_char in DIGITS + '.':

            if self.cur_char == '.':

                if dot_count == 1: break

                dot_count += 1

                num_str += '.'

            else:

                num_str += self.cur_char

            self.advance()

        if dot_count == 0:

            return Token(TT_INT, int(num_str))

        else:

            return Token(TT_FLOAT, float(num_str))