PyMuPDF 包读取pdf文档时,span里的属性分别代表什么

时间:2024-07-14 17:43:08
import fitz # 打开PDF文件 doc = fitz.open("example.pdf") # 加载第一页 page = doc.load_page(0) # 获取页面上的所有文本块 blocks = page.get_text("dict")["blocks"] # 遍历每个文本块 for block in blocks: # 遍历每行文本 for line in block["lines"]: # 遍历每个文本段 for span in line["spans"]: print(f"Text: {span['text']}") print(f"BBox: {span['bbox']}") print(f"Font: {span['font']}") print(f"Size: {span['size']}") print(f"Color: {span['color']}") print(f"Flags: {span['flags']}") print(f"Ascender: {span['ascender']}") print(f"Descender: {span['descender']}") print(f"Origin: {span['origin']}") print(f"Adv: {span['adv']}") print(f"CharSpace: {span['charspace']}") print(f"WordSpace: {span['wordspace']}") print(f"FontSize: {span['fontsize']}") print()