springboot集成pdfbox解析pdf文件

时间:2024-10-16 17:43:46
public static Map<String, String> pdfStr(String invoiceInfo) { // 提取数据 Map<String, String> result = new HashMap<>(); invoiceInfo = invoiceInfo.replaceAll("(", "(").replaceAll(")", ")"); // 定义正则表达式模式 Map<String, String> patterns = new HashMap<>(); patterns.put("invoiceNumber", "发票号码:(\\d+)"); patterns.put("invoiceDate", "开票日期:(\\d{4}年\\d{1,2}月\\d{1,2}日)"); patterns.put("buyerName", "购 名称:(.+?) 销 名称:(.+?)\n"); patterns.put("itemDetails", "税 额\\s+(.*?)合 计"); patterns.put("total", "\\(小写\\)¥(\\d+(\\.\\d+)?)"); patterns.put("batchNumber", "批号:(.+?)/"); patterns.put("productionDate", "生产日期:(\\d{4}-\\d{1,2}-\\d{1,2})/"); patterns.put("expirationDate", "有效期至:(\\d{4}-\\d{1,2}-\\d{1,2})/"); patterns.put("taxIncludedPrice", "含税单价:(\\d+(\\.\\d+)?)"); patterns.put("manufacturer", "生产厂家:(.+?)/"); patterns.put("approvalNumber", "批准文号:(.+?)/"); patterns.put("issuer", "开票人:(.+)"); for (Map.Entry<String, String> entry : patterns.entrySet()) { Pattern pattern = Pattern.compile(entry.getValue(), Pattern.DOTALL); Matcher matcher = pattern.matcher(invoiceInfo); if (matcher.find()) { result.put(entry.getKey(), matcher.group(1).trim()); } } // 处理项目名称、规格型号、单位、数量、单价、金额、税率/征收率、税额 if (result.containsKey("itemDetails")) { String[] details = result.get("itemDetails").replace("\n", " ").split(" "); if (details.length >= 8) { result.put("productName", details[0].trim() + (details.length > 8 ? details[8].trim() : "")); result.put("specification", details[1].trim()); result.put("unit", details[2].trim()); result.put("quantity", details[3].trim()); result.put("unitPrice", details[4].trim()); result.put("amount", details[5].trim()); result.put("taxRate", details[6].trim()); result.put("taxAmount", details[7].trim()); } } return result; }