按字节截取字符串

时间:2023-01-12 11:10:01
 1 package motan.unittest.order;
 2 
 3 import com.alibaba.fastjson.JSON;
 4 import lombok.extern.slf4j.Slf4j;
 5 import utils.Lang;
 6 
 7 import java.io.UnsupportedEncodingException;
 8 import java.util.List;
 9 import java.util.Map;
10 
11 @Slf4j
12 public class ProductNameTest {
13 
14     public static void main(String[] args) {
15         String configValue = "[{keyword: '(', replace:'('}, {keyword: ')', replace:')'},{keyword: '#', replace:'#'},{keyword: '—', replace:''},{keyword: '-', replace:''},{keyword: '/', replace:''}]";
16         List<Map> nameFilterConfigs = JSON.parseArray(configValue, Map.class);
17         String productName = "索尼(SONY)()!@4324ghfg&**$#@#¥¥%……&……&*……&**((*()----专业数码录音笔ICDPX2404G黑色智能降噪可监听支持音频线转录适用商务学习采访取证频线转录适用商务学习采访取证频线转录适用商务学习采访取证";
18         String productName1 = "优必利 ///双电源桌面办公计算器 财务家用计算机 太阳能计算器 12位大屏幕语音计算机 4016双电源计算器大号-不带语音";
19         String productName2 = "优必利双电源桌面办公计算器 财务家用计算机太阳能计算器 12位大屏幕语音计算机 4016双电源计算器大号-不带语音";
20         String productName3 = "种类:4888型 (公4.5两 母3.5两) 8只 规格:1";
21         String productName4 = "种类:4888型 (公4.5两 母3.5两) 8只 规格";
22         System.out.println(filterInvoiceGGXH("我ABC", nameFilterConfigs, 4));
23         System.out.println(filterInvoiceGGXH("我ABC汉DEF", nameFilterConfigs, 6));
24         System.out.println(filterInvoiceGGXH("我ABC汉DEF", nameFilterConfigs, 100));
25         System.out.println(filterInvoiceGGXH(productName, nameFilterConfigs, 1000000000));
26         System.out.println(filterInvoiceGGXH(productName1, nameFilterConfigs, 60));
27         System.out.println(filterInvoiceGGXH(productName2, nameFilterConfigs, 36));
28         System.out.println(filterInvoiceGGXH(productName3, nameFilterConfigs, 1000000000));
29         //商品规格取36个字节,配置在数据库
30         System.out.println(filterInvoiceGGXH(productName3, nameFilterConfigs, 36));
31     }
32 
33     public static String filterInvoiceGGXH(String ggxh, List<Map> configs, Integer ggxhMaxLeng) {
34         String regx;
35         String replacement;
36         if (!Lang.isEmpty(configs)) {
37             for (Map m : configs) {
38                 if (!Lang.isEmpty(m)) {
39                     regx = (String) m.get("keyword");
40                     replacement = (String) m.get("replace");
41                     if (!Lang.isEmpty(regx) && replacement != null) {
42                         ggxh = ggxh.replaceAll(regx, replacement);
43                     }
44                 }
45             }
46         }
47         try {
48             log.info("商品规格型号截取字节数为:{},规格型号总字节数为:{}", ggxhMaxLeng, JSON.toJSONString(ggxh.getBytes("GBK").length));
49             if (!Lang.isEmpty(ggxh) && ggxh.getBytes("GBK").length > ggxhMaxLeng) {
50                 ggxh = subStringByByte(ggxh, ggxhMaxLeng);
51             }
52         } catch (Exception e) {
53             log.error("商品规格型号处理失败:{},错误信息:{}", ggxh, e);
54         }
55         return ggxh;
56     }
57 
58     /**
59      * 按字节截取字符串
60      *
61      * @param str
62      * @param splitByteNum
63      * @return
64      * @throws UnsupportedEncodingException
65      */
66     public static String subStringByByte(String str, int splitByteNum) throws UnsupportedEncodingException {
67         //输入无效判断
68         if (null == str || "".equals(str) || splitByteNum <= 0) {
69             return "";
70         }
71 
72         int tempSubStrLength = splitByteNum;
73         //截取长度为要截取字节数的子串。说明str.length()字符串中字符个数一定小于等于字节数。
74         String subStr = str.substring(0, tempSubStrLength > str.length() ? str.length() : tempSubStrLength);
75         //在GBK编码下,得到子串的字节长度
76         int subStrByteNum = subStr.getBytes("GBK").length;
77 
78         //如果子串的字节长度大于字符长度,说明一定有汉字
79         while (subStrByteNum > tempSubStrLength) {
80             /**
81              * 在子串末尾去掉一个字符,重新计算子串在GBK下的字节长度。
82              * 因为是去掉一个字符,而汉字占一个字符,所以不用考虑半个汉字的问题
83              */
84             int subStrLength = --splitByteNum;
85             log.info("处理字符串" + subStrLength + "->");
86             subStr = str.substring(0, subStrLength > str.length() ? str.length() : subStrLength);
87             subStrByteNum = subStr.getBytes("GBK").length;
88         }
89         return subStr;
90     }
91 }