第6章 字符串与正则表达式

时间:2021-02-23 18:45:45

本章知识点:

1、字符串的格式化;

2、字符串的截取、合并、过滤等操作;

3、字符串的查找;

4、正则表达式的语法;

5、Python的正则表达式模式;

6.1 常见的字符串操作

6.1.1 字符串的格式化

## 格式化字符串
str1 = "version"
num = 1.0
format = "%s" % str1
print (format)
format = "%s %d" % (str1, num)
print (format)
# 输出:version
# version 1
## 带精度的格式化
print ("浮点类型数字:%f" % 1.25)  # 以浮点数格式化打印
print ("浮点类型数字:%.1f" % 1.25)  # 精确到小数点后1位
print ("浮点类型数字:%.2f" % 1.254)  # 精确到小数点后2位
# 输出:浮点类型数字:1.250000
# 浮点类型数字:1.2
# 浮点类型数字:1.25
## 使用字典格式化字符串
print ("%(version)s: %(num).1f" % {"version":"version", "num":2})
# 输出:version: 2.0
## 字符串对齐
word = "version3.0"
print (word.center(20))
print (word.center(20, "*"))
print (word.ljust(20))
print (word.rjust(20))
print ("%30s" % word)
# # 输出:     version3.0     
# *****version3.0*****
# version3.0          
#           version3.0
#                     version3.0

6.1.2 字符串的转义符

## 输出转义符
path = "hello\tworld\n"
print (path)
print (len(path))
path = r"hello\tworld\n"
print (path)
print (len(path))
# 输出:hello	world
## strip()去掉转义符
word = "\thello woeld\n"
print ("直接输出:", word)
print ("strip()后输出:", word.strip())
print ("lstrip()后输出:", word.lstrip())
print ("rstrip()后输出:", word.rstrip())
# 输出:直接输出: 	hello woeld
#
# strip()后输出: hello woeld
# lstrip()后输出: hello woeld
#
# rstrip()后输出: 	hello woeld

6.1.3 字符串的合并

## 使用"+"连接字符串
str1 = "hello"
str2 = "word"
str3 = "hello"
str4 = "China"
result = str1 + str2 + str3
result += str4
print (result)
# 输出:hellowordhelloChina
# 使用join()连接字符串
strs = ["hello", "world", "hello", "China"]
result = "".join(strs)
print (result)
# 输出:hellowordhelloChina
## 属于reduce()连接字符串
from functools import reduce
import operator
strs = ["hello", "world", "hello", "China"]
result = reduce(operator.add, strs, "")
print (result)
# 输出:hellowordhelloChina

6.1.4 字符串的截取

##使用索引截取子串
word = "world"
print (word[4])
# 输出:d
## 特殊切片截取子串
str1 = "hello world"
print (str1[0:3])
print (str1[::2])
print (str1[1::2])
# 输出:hel
# hlowrd
# el ol
## 使用split()获取子串
sentence = "Bob said: 1, 2, 3, 4"
print ("使用空格取子串:",sentence.split())
print ("使用逗号取子串:",sentence.split(","))
print ("使用两个逗号取子串:",sentence.split(",",2))
# 输出:使用空格取子串: ['Bob', 'said:', '1,', '2,', '3,', '4']
# 使用逗号取子串: ['Bob said: 1', ' 2', ' 3', ' 4']
# 使用两个逗号取子串: ['Bob said: 1', ' 2', ' 3, 4']
str1 = "a"
print (id(str1))
print (id(str1 + "b"))
# 输出:2624160403048
# 2624160579800
## 比较字符串的开始和结束处
word = "hello world"
print ("hello" == word[0:5])
print (word.startswith("hello"))
print (word.endswith("id", 6))
print (word.endswith("id", 6, 10))
print (word.endswith("id", 6, len(word)))
# 输出:True
# True
# False
# False
## 循环输出反转的字符串
def reverse(s):
    out = ""
    li = list(s)
    for i in range(len(li), 0, -1):
        out += "".join(li[i-1])
    return out
print (reverse("hello"))
# 输出:olleh
## 使用list的reverse()
def reverse(s):
    li = list(s)
    li.reverse()
    s = "".join(li)
    return s
print (reverse("hello"))
# 输出:olleh
def reverse(s):
    return s[::-1]
print (reverse("hello"))
# 输出:olleh

6.1.7 字符串的查找和替换

## 查找字符串
sentence = "This is a apple"
print (sentence.find("a"))
sentence = "This is a apple"
print (sentence.rfind("a"))
# 输出:8  10
## 字符串的替换
sentence = "hello world, hello China"
print (sentence.replace("hello", "hi"))
print (sentence.replace("abc", "hi"))
# 输出:hi world, hi China
# hello world, hello China

6.1.8 字符串与日期的转换

import time,datetime
## 时间到字符串的转换
print (time.strftime("%Y-%m-%d %X", time.localtime()))
## 字符串到时间的转换
t = time.strptime("2008-08-08","%Y-%m-%d")
y, m, d = t[0:3]
print (datetime.datetime(y, m, d))
# 输出:2018-12-29 22:14:20
# 2008-08-08 00:00:00

6.2 正则表达式应用

6.2.1 正则表达式简介

6.2.2 使用re模块处理正则表达式

import re
# ^与$的使用
s = "HELLO WORLD"
print (re.findall(r"^hello",s))
print (re.findall(r"^hello",s,re.I))
print (re.findall("WORLD$",s))
print (re.findall("wORLd$",s,re.I))
print (re.findall(r"\b\w+\b",s))
# 输出:[]
# ['HELLO']
# ['WORLD']
# ['WORLD']
import re
s = "hello world"
print (re.sub("hello", "hi", s))
print (re.sub("hello", "hi", s[-4:]))
print (re.sub("world", "China", s[-5:]))
# 输出:hi world
# orld
# China
import re
# 特殊字符串的使用
s = "你好 WORLD2"
print ("匹配字母数字:"+ re.sub(r"\w", "hi", s))
print ("替换次数:"+ str(re.sub(r"\w", "hi", s)[1]))
print ("匹配非字母数字的字符:"+ re.sub(r"\W", "hi", s))
print ("替换次数:"+ str(re.sub(r"\w", "hi", s)[1]))
print ("匹配空白字符:"+ re.sub(r"\s", "*", s))
print ("替换次数:"+ str(re.sub(r"\s", "*", s)[1]))
print ("匹配非空白字符:"+ re.sub(r"\S", "hi", s))
print ("替换次数:"+ str(re.sub(r"\S", "hi", s)[1]))
print ("匹配数字:"+ re.sub(r"\d", "2.0", s))
print ("替换次数:"+ str(re.sub(r"\d", "2.0", s)[1]))
print ("匹配非数字:"+ re.sub(r"\D", "hi", s))
print ("替换次数:"+ str(re.sub(r"\D", "hi", s)[1]))
print ("匹配任意字符:"+ re.sub(r".", "hi", s))
print ("替换次数:"+ str(re.sub(r".", "hi", s)[1]))
# 输出:匹配字母数字:hihi hihihihihihi
# 替换次数:i
# 匹配非字母数字的字符:你好hiWORLD2
# 替换次数:i
# 匹配空白字符:你好*WORLD2
# 替换次数:好
# 匹配非空白字符:hihi hihihihihihi
# 替换次数:i
# 匹配数字:你好 WORLD2.0
# 替换次数:好
# 匹配非数字:hihihihihihihihi2
# 替换次数:i
# 匹配任意字符:hihihihihihihihihi
# 替换次数:i
import re
# 限定符的使用
tel1 = "010-62083588"
print (re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel1))
tel2 = "010-62082589"
print (re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel2))
tel3 = "(010)62083436"
print (re.findall(r"[\( ]?\d{3}[\]-]?\d{8} | [\( ]?\d{4}[\]-]?\d{7}", tel3))
# 输出:['010-62083588']
# ['010-62082589']
# []
import re
# # compile()预编译
s = "1abc23def45"
p = re.compile(r"\d+")
print (p.findall(s))
print (p.pattern)
# 输出:['1', '23', '45']
# \d+
import re
# 分组
p = re.compile(r"(abc)\1")
m = p.match("abcabcabc")
print (m.group(0))
print (m.group(1))
print (m.group())
# 输出:abcabc
# abc
# abcabc
P = re.compile(r"(?P<one>abc)(?P=one)")
m = P.search("abcabcabc")
print (m.group("one"))
print (m.groupdict().keys())
print (m.groupdict().values())
print (m.re.pattern)
# 输出:abc
# dict_keys(['one'])
# dict_values(['abc'])
# (?P<one>abc)(?P=one)

6.4 习题

1、存在字符串"I,love,Python",取出love并输出。

import re
s = "I,love,Python"
print (s[2:6])

2、存在字符串"aabbccddee",将dd替换为ff。

import re
b = "aabbccddee"
print (b.replace("dd","ff"))
# 输出:aabbccffee

3、存在字符串"ab2b3n5n2n67mm4n2",编程实现下面要求。

### 1)使用re取出字符串中所有的数字,并组合成一个新的字符串输出。
import re
s = "ab2b3n5n2n67mm4n2"
p = re.compile(r"\d")
print (p.findall(s))
print (p.pattern)
# 输出:['2', '3', '5', '2', '6', '7', '4', '2']
# \d