本章知识点:
1、字符串的格式化;
2、字符串的截取、合并、过滤等操作;
3、字符串的查找;
4、正则表达式的语法;
5、Python的正则表达式模式;
6.1 常见的字符串操作
6.1.1 字符串的格式化
## 格式化字符串 str1 = "version" num = 1.0 format = "%s" % str1 print (format) format = "%s %d" % (str1, num) print (format) # 输出:version # version 1
## 带精度的格式化 print ("浮点类型数字:%f" % 1.25) # 以浮点数格式化打印 print ("浮点类型数字:%.1f" % 1.25) # 精确到小数点后1位 print ("浮点类型数字:%.2f" % 1.254) # 精确到小数点后2位 # 输出:浮点类型数字:1.250000 # 浮点类型数字:1.2 # 浮点类型数字:1.25
## 使用字典格式化字符串 print ("%(version)s: %(num).1f" % {"version":"version", "num":2}) # 输出:version: 2.0
## 字符串对齐 word = "version3.0" print (word.center(20)) print (word.center(20, "*")) print (word.ljust(20)) print (word.rjust(20)) print ("%30s" % word) # # 输出: version3.0 # *****version3.0***** # version3.0 # version3.0 # version3.0
6.1.2 字符串的转义符
## 输出转义符 path = "hello\tworld\n" print (path) print (len(path)) path = r"hello\tworld\n" print (path) print (len(path)) # 输出:hello world
## strip()去掉转义符 word = "\thello woeld\n" print ("直接输出:", word) print ("strip()后输出:", word.strip()) print ("lstrip()后输出:", word.lstrip()) print ("rstrip()后输出:", word.rstrip()) # 输出:直接输出: hello woeld # # strip()后输出: hello woeld # lstrip()后输出: hello woeld # # rstrip()后输出: hello woeld
6.1.3 字符串的合并
## 使用"+"连接字符串 str1 = "hello" str2 = "word" str3 = "hello" str4 = "China" result = str1 + str2 + str3 result += str4 print (result) # 输出:hellowordhelloChina
# 使用join()连接字符串 strs = ["hello", "world", "hello", "China"] result = "".join(strs) print (result) # 输出:hellowordhelloChina
## 属于reduce()连接字符串 from functools import reduce import operator strs = ["hello", "world", "hello", "China"] result = reduce(operator.add, strs, "") print (result) # 输出:hellowordhelloChina
6.1.4 字符串的截取
##使用索引截取子串 word = "world" print (word[4]) # 输出:d
## 特殊切片截取子串 str1 = "hello world" print (str1[0:3]) print (str1[::2]) print (str1[1::2]) # 输出:hel # hlowrd # el ol
## 使用split()获取子串 sentence = "Bob said: 1, 2, 3, 4" print ("使用空格取子串:",sentence.split()) print ("使用逗号取子串:",sentence.split(",")) print ("使用两个逗号取子串:",sentence.split(",",2)) # 输出:使用空格取子串: ['Bob', 'said:', '1,', '2,', '3,', '4'] # 使用逗号取子串: ['Bob said: 1', ' 2', ' 3', ' 4'] # 使用两个逗号取子串: ['Bob said: 1', ' 2', ' 3, 4']
str1 = "a" print (id(str1)) print (id(str1 + "b")) # 输出:2624160403048 # 2624160579800
## 比较字符串的开始和结束处 word = "hello world" print ("hello" == word[0:5]) print (word.startswith("hello")) print (word.endswith("id", 6)) print (word.endswith("id", 6, 10)) print (word.endswith("id", 6, len(word))) # 输出:True # True # False # False
## 循环输出反转的字符串 def reverse(s): out = "" li = list(s) for i in range(len(li), 0, -1): out += "".join(li[i-1]) return out print (reverse("hello")) # 输出:olleh
## 使用list的reverse() def reverse(s): li = list(s) li.reverse() s = "".join(li) return s print (reverse("hello")) # 输出:olleh
def reverse(s): return s[::-1] print (reverse("hello")) # 输出:olleh
6.1.7 字符串的查找和替换
## 查找字符串 sentence = "This is a apple" print (sentence.find("a")) sentence = "This is a apple" print (sentence.rfind("a")) # 输出:8 10
## 字符串的替换 sentence = "hello world, hello China" print (sentence.replace("hello", "hi")) print (sentence.replace("abc", "hi")) # 输出:hi world, hi China # hello world, hello China
6.1.8 字符串与日期的转换
import time,datetime ## 时间到字符串的转换 print (time.strftime("%Y-%m-%d %X", time.localtime())) ## 字符串到时间的转换 t = time.strptime("2008-08-08","%Y-%m-%d") y, m, d = t[0:3] print (datetime.datetime(y, m, d)) # 输出:2018-12-29 22:14:20 # 2008-08-08 00:00:00
6.2 正则表达式应用
6.2.1 正则表达式简介
6.2.2 使用re模块处理正则表达式
import re # ^与$的使用 s = "HELLO WORLD" print (re.findall(r"^hello",s)) print (re.findall(r"^hello",s,re.I)) print (re.findall("WORLD$",s)) print (re.findall("wORLd$",s,re.I)) print (re.findall(r"\b\w+\b",s)) # 输出:[] # ['HELLO'] # ['WORLD'] # ['WORLD']
import re s = "hello world" print (re.sub("hello", "hi", s)) print (re.sub("hello", "hi", s[-4:])) print (re.sub("world", "China", s[-5:])) # 输出:hi world # orld # China
import re # 特殊字符串的使用 s = "你好 WORLD2" print ("匹配字母数字:"+ re.sub(r"\w", "hi", s)) print ("替换次数:"+ str(re.sub(r"\w", "hi", s)[1])) print ("匹配非字母数字的字符:"+ re.sub(r"\W", "hi", s)) print ("替换次数:"+ str(re.sub(r"\w", "hi", s)[1])) print ("匹配空白字符:"+ re.sub(r"\s", "*", s)) print ("替换次数:"+ str(re.sub(r"\s", "*", s)[1])) print ("匹配非空白字符:"+ re.sub(r"\S", "hi", s)) print ("替换次数:"+ str(re.sub(r"\S", "hi", s)[1])) print ("匹配数字:"+ re.sub(r"\d", "2.0", s)) print ("替换次数:"+ str(re.sub(r"\d", "2.0", s)[1])) print ("匹配非数字:"+ re.sub(r"\D", "hi", s)) print ("替换次数:"+ str(re.sub(r"\D", "hi", s)[1])) print ("匹配任意字符:"+ re.sub(r".", "hi", s)) print ("替换次数:"+ str(re.sub(r".", "hi", s)[1])) # 输出:匹配字母数字:hihi hihihihihihi # 替换次数:i # 匹配非字母数字的字符:你好hiWORLD2 # 替换次数:i # 匹配空白字符:你好*WORLD2 # 替换次数:好 # 匹配非空白字符:hihi hihihihihihi # 替换次数:i # 匹配数字:你好 WORLD2.0 # 替换次数:好 # 匹配非数字:hihihihihihihihi2 # 替换次数:i # 匹配任意字符:hihihihihihihihihi # 替换次数:i
import re # 限定符的使用 tel1 = "010-62083588" print (re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel1)) tel2 = "010-62082589" print (re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel2)) tel3 = "(010)62083436" print (re.findall(r"[\( ]?\d{3}[\]-]?\d{8} | [\( ]?\d{4}[\]-]?\d{7}", tel3)) # 输出:['010-62083588'] # ['010-62082589'] # []
import re # # compile()预编译 s = "1abc23def45" p = re.compile(r"\d+") print (p.findall(s)) print (p.pattern) # 输出:['1', '23', '45'] # \d+
import re # 分组 p = re.compile(r"(abc)\1") m = p.match("abcabcabc") print (m.group(0)) print (m.group(1)) print (m.group()) # 输出:abcabc # abc # abcabc P = re.compile(r"(?P<one>abc)(?P=one)") m = P.search("abcabcabc") print (m.group("one")) print (m.groupdict().keys()) print (m.groupdict().values()) print (m.re.pattern) # 输出:abc # dict_keys(['one']) # dict_values(['abc']) # (?P<one>abc)(?P=one)
6.4 习题
1、存在字符串"I,love,Python",取出love并输出。
import re s = "I,love,Python" print (s[2:6])
2、存在字符串"aabbccddee",将dd替换为ff。
import re b = "aabbccddee" print (b.replace("dd","ff")) # 输出:aabbccffee
3、存在字符串"ab2b3n5n2n67mm4n2",编程实现下面要求。
### 1)使用re取出字符串中所有的数字,并组合成一个新的字符串输出。 import re s = "ab2b3n5n2n67mm4n2" p = re.compile(r"\d") print (p.findall(s)) print (p.pattern) # 输出:['2', '3', '5', '2', '6', '7', '4', '2'] # \d