PyMuPDF 包读取pdf文档时,span里的属性分别代表什么
import fitz
# 打开PDF文件
doc = fitz.open("example.pdf")
# 加载第一页
page = doc.load_page(0)
# 获取页面上的所有文本块
blocks = page.get_text("dict")["blocks"]
# 遍历每个文本块
for block in blocks:
# 遍历每行文本
for line in block["lines"]:
# 遍历每个文本段
for span in line["spans"]:
print(f"Text: {span['text']}")
print(f"BBox: {span['bbox']}")
print(f"Font: {span['font']}")
print(f"Size: {span['size']}")
print(f"Color: {span['color']}")
print(f"Flags: {span['flags']}")
print(f"Ascender: {span['ascender']}")
print(f"Descender: {span['descender']}")
print(f"Origin: {span['origin']}")
print(f"Adv: {span['adv']}")
print(f"CharSpace: {span['charspace']}")
print(f"WordSpace: {span['wordspace']}")
print(f"FontSize: {span['fontsize']}")
print()