python 中的re模块,正则表达式

时间:2023-01-09 22:35:32
一、re模块

re模块中常用的方法。

  • match: 默认从字符串开头开始匹配,re.match('fun', 'funny') 可以匹配出来 'fun'
    match(pattern, string, flags=0):
    """Try to apply the pattern at the start of the string, returning
    a match object, or None if no match was found."""
    return _compile(pattern, flags).match(string)

  • search: 扫描整个字符串,返回匹配到的第一个结果,否则返回None,re.search('f\d*', 'sdf23kf2') 匹配到 'f23'
    search(pattern, string, flags=0):
    """Scan through string looking for a match to the pattern, returning
    a match object, or None if no match was found."""
    return _compile(pattern, flags).search(string)

  • findall: 匹配字符串中所有符合pattern的字符,并返回一个列表
    findall(pattern, string, flags=0):
    """Return a list of all non-overlapping matches in the string.
    If one or more capturing groups are present in the pattern, return
    a list of groups; this will be a list of tuples if the pattern
    has more than one group.
    Empty matches are included in the result."""
    return _compile(pattern, flags).findall(string)

  • sub: 第一个参数pattern,第二个replace替换字符,第三个string,第四个count,表示替换几次。同str.replace(repl, string, count)一样
    sub(pattern, repl, string, count=0, flags=0):
    """Return the string obtained by replacing the leftmost
    non-overlapping occurrences of the pattern in string by the
    replacement repl. repl can be either a string or a callable;
    if a string, backslash escapes in it are processed. If it is
    a callable, it's passed the match object and must return
    a replacement string to be used."""
    return _compile(pattern, flags).sub(repl, string, count)

  • split: 按照匹配字符分割字符串。re.split('a', '1a2a3a4') --> [1, 2, 3, 4]
    split(pattern, string, maxsplit=0, flags=0):
    """Split the source string by the occurrences of the pattern,
    returning a list containing the resulting substrings. If
    capturing parentheses are used in pattern, then the text of all
    groups in the pattern are also returned as part of the resulting
    list. If maxsplit is nonzero, at most maxsplit splits occur,
    and the remainder of the string is returned as the final element
    of the list."""
    return _compile(pattern, flags).split(string, maxsplit)

二、一个计算题的实现

用户输入 1 - 2 * ( (60-30 +(-40/5) * (9-25/3 + 7 /399/42998 +10 568/14 )) - (-43)/ (16-32) )等类似公式后,自己解析里面的(),+,-,*,/符号和公式(不调用eval等类似功能实现),运算后得出结果,结果必须与真实的计算器所得出的结果一致

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import re


parser_str = '1 - 2 * ( (60-30 +(-40/5) * (9-2*5/3 + 7 /3*99/4*2998 +10 * 568/14 )) - (-4*3)/ (16-3*2) )'
#完成

def deal_mul_divi(string):
    """
    处理乘除法
    只处理传过来的字符串,每次处理一个,返回字符串
    :param string: 字符串格式严格要求为 25/-5  或 34*3 这样,否则返回None
    :return: 返回处理后的结果字符串
    """
    res1 = re.search('\d+\.?\d*\/(\-|\d+\.?\d*)+',string)
    res2 = re.search('\d+\.?\d*\*(\-|\d+\.?\d*)+',string)
    if not res1 and not res2:
        print('格式有误:{}'.format(string))
        return None
    else:
        items = re.findall('\d+\.?\d*', string)
        result = str(float(items[0])/float(items[1])) \
            if res1 else str(float(items[0])*float(items[1]))
        #如果字符串中有- 负号,那就增加标记值,让返回值为负 re.search('-', string)同下 '-' in
        result = '-{}'.format(result) if '-' in string else result
        return result

#完成
def deal_plus_minus(string):
    """
    将没有乘除号的带(不带)括号都行的字符串传入。该函数先处理字符串中所有负数:(40-4+34)
    再处理所有正数,再用正数减负数值作为结果返回,操作皆为浮点数。
    :param string: 参数为只有 + - 号的算式
    :return:
    """
    if  re.search('\*|\/', string): #如果有乘除号视为错误
        return None
    num_minus = 0
    for minus in re.findall('\-(\d+\.?\d*)', string): #将所有负数找出来并加起来
        string = string.replace(minus, 'sep') #所有前面带减号的数,都将被sep 符替换
        num_minus += float(minus)
    num_plus = 0
    for plus in re.findall('(\d+\.?\d*)', string):  #匹配正数相加  #|\+(\d+\.?\d*)
        num_plus += float(plus)
    return str(num_plus - num_minus)
#完成

def match_brackets(string):
    """
    匹配算式中的括号,并调用函数处理
    :param string: 算式字符串
    :return:
    """
    flag = True
    while flag:
        brackets_str = re.search('\((\+|\-|\.|\/|\*|\d)+\)', string) #拿到括号字符串
        if not brackets_str:
            flag = False
            continue
        else:
            result = deal_brackets(brackets_str.group()) #调用处理括号函数,处理返回
            # print('\033[33;1m{}\033[0m'.format(string))
            string = string.replace(brackets_str.group(), result, 1) #将计算原括号得到的结果替换原括号
            # print('\033[34;1m{}\033[0m'.format(string))
            string = re.sub('(\+\-)|(\-\+)', '-', string)  #处理 +- 号和 -+ 并排一起
            string = re.sub('--', '+', string)  #处理 -- 两减号并排
    return string

def deal_brackets(string):
    """
    处理传过来的括号
    :param string:
    :return:
    """
    flag = True

    while flag:
        # ( -3.2/-1.6-2-3*-2)这样的也要能匹配得 3.2/-1.6
        mul_divi_str = re.search('(\d+\.?\d*)(\*|\/)(\-|\d+\.?\d*){1,2}', string) #只能匹配一到两位如 - 1.6
        if not mul_divi_str:
            flag = False
            break
        else:
            # print('\033[31;4m处理传来的乘除:{}\033[0m'.format(mul_divi_str.group()))
            mul_divi_res = deal_mul_divi(mul_divi_str.group())
            string = string.replace(mul_divi_str.group(), mul_divi_res, 1)
            string = re.sub('(\+\-)|(\-\+)', '-', string)  # 处理 +- 号和 -+ 并排一起
            string = re.sub('--', '+', string)  # 处理 -- 两减号并排
    return deal_plus_minus(string)

#calculate函数就可以计算任意一个表达式的值了
def calculate(string):
    strip_space = lambda x: re.sub(' ', '', x, count=x.count(' ')) #将算式中的所有空格剔除
    string = strip_space(string)
    string = match_brackets(string) #处理完表达式所有的的括号
    result = deal_brackets(string) #在把没有括号的表达式交给它处理一次
    return result

print('\033[31;1meval:\033[0m{: >22}'.format(eval(parser_str)))  #eval 验证结果
print('\033[32;2mcalculate:\033[0m{}'.format(calculate(parser_str))) #正则计算

#运算结果为
eval:    2776672.6952380957
calculate:2776672.6952380957