字符串 汉字转拼音 pinyin4j

时间:2021-09-20 13:52:57

介绍

GitHub上的一个封装: https://github.com/belerweb/pinyin4j 

pinyin4J 是一个可以将汉字转换成拼音的lib,非常实用,其 提供的 PinyinHelper 这个静态类对外提供拼音转换的服务,主要用到以下两个方法:
static public String[] toHanyuPinyinStringArray(char ch) //将char(必须为汉字单字)转化为拼音,如果ch为非汉字,返回null
static public String[] toHanyuPinyinStringArray(char ch,HanyuPinyinOutputFormat outputFormat) //可以设置输出的格式
2
2
 
1
static public String[] toHanyuPinyinStringArray(char ch) //将char(必须为汉字单字)转化为拼音,如果ch为非汉字,返回null
2
static public String[] toHanyuPinyinStringArray(char ch,HanyuPinyinOutputFormat outputFormat) //可以设置输出的格式

测试

将字符转为拼音数组

char c='乾';
System.out.println(Arrays.toString(PinyinHelper.toHanyuPinyinStringArray(c)));//[qian2, gan1]
System.out.println(Arrays.toString(PinyinHelper.toGwoyeuRomatzyhStringArray(c)));//[chyan, gan]
System.out.println(Arrays.toString(PinyinHelper.toMPS2PinyinStringArray(c)));//[chian2, gan1]
System.out.println(Arrays.toString(PinyinHelper.toTongyongPinyinStringArray(c)));//[cian2, gan1]
System.out.println(Arrays.toString(PinyinHelper.toWadeGilesPinyinStringArray(c)));//[ch`ien2, kan1]
System.out.println(Arrays.toString(PinyinHelper.toYalePinyinStringArray(c)));//[chyan2, gan1]
7
7
 
1
char c='乾';
2
System.out.println(Arrays.toString(PinyinHelper.toHanyuPinyinStringArray(c)));//[qian2, gan1]
3
System.out.println(Arrays.toString(PinyinHelper.toGwoyeuRomatzyhStringArray(c)));//[chyan, gan]
4
System.out.println(Arrays.toString(PinyinHelper.toMPS2PinyinStringArray(c)));//[chian2, gan1]
5
System.out.println(Arrays.toString(PinyinHelper.toTongyongPinyinStringArray(c)));//[cian2, gan1]
6
System.out.println(Arrays.toString(PinyinHelper.toWadeGilesPinyinStringArray(c)));//[ch`ien2, kan1]
7
System.out.println(Arrays.toString(PinyinHelper.toYalePinyinStringArray(c)));//[chyan2, gan1]
对于 乾 和 重 的拼音转换,以上方法分别得到的结果是:
汉语拼音:[qian2, gan1],[zhong4, chong2]
国语罗马字:[chyan, gan],[jonq, chorng]
注音符号拼音:[chian2, gan1],[jung4, chung2]
通用拼音:[cian2, gan1],[jhong4, chong2]
威妥玛拼音:[ch`ien2, kan1],[chung4, ch`ung2]
耶魯拼音:[chyan2, gan1],[jung4, chung2]
6
6
 
1
汉语拼音:[qian2, gan1],[zhong4, chong2]
2
国语罗马字:[chyan, gan],[jonq, chorng]
3
注音符号拼音:[chian2, gan1],[jung4, chung2]
4
通用拼音:[cian2, gan1],[jhong4, chong2]
5
威妥玛拼音:[ch`ien2, kan1],[chung4, ch`ung2]
6
耶魯拼音:[chyan2, gan1],[jung4, chung2]

拼音输出格式设置

HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
System.out.println(Arrays.toString(PinyinHelper.toHanyuPinyinStringArray(c, format)));
2
2
 
1
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
2
System.out.println(Arrays.toString(PinyinHelper.toHanyuPinyinStringArray(c, format)));
设置大小写,默认 LOWERCASE
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);//大小写,默认 LOWERCASE
x
1
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);//大小写,默认 LOWERCASE
设置音调的样式:数字WITH_TONE_NUMBER,声调符WITH_TONE_MARK,无声调。默认 WITH_TONE_NUMBER
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);//音调的样式,默认 WITH_TONE_NUMBER
//[QIAN2, GAN1],[QIÁN, GĀN],[QIAN, GAN]
2
2
 
1
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);//音调的样式,默认 WITH_TONE_NUMBER
2
//[QIAN2, GAN1],[QIÁN, GĀN],[QIAN, GAN]
符合 V 的输出格式,使用U且带冒号WITH_U_AND_COLON, 使用U且上面带雨点WITH_U_UNICODE, 使用V符合WITH_V,默认 WITH_U_AND_COLON
format.setVCharType(HanyuPinyinVCharType.WITH_V);//符合 V 的输出格式,默认 WITH_U_AND_COLON
//[LU:, LU],[LÜ, LU],[LV, LU]
2
2
 
1
format.setVCharType(HanyuPinyinVCharType.WITH_V);//符合 V 的输出格式,默认 WITH_U_AND_COLON
2
//[LU:, LU],[LÜ, LU],[LV, LU]

字符串转拼音工具类

System.out.println(toPinYin("白乾涛"));//Bai2Qian2Tao1
System.out.println(getFirstPinYin("白乾涛"));//B
 
1
System.out.println(toPinYin("白乾涛"));//Bai2Qian2Tao1
2
System.out.println(getFirstPinYin("白乾涛"));//B
public class PinYinUtils {
	
	public static String getFirstPinYin(String content) {
		if (content == null || content.trim().length() == 0) {
			return "#";
		} else {
			char firstChar = content.trim().charAt(0);
			String[] array = PinyinHelper.toHanyuPinyinStringArray(firstChar);
			return array == null || array.length == 0 || array[0] == null || array[0].length() == 0 ? "#" : (array[0].charAt(0) + "").toUpperCase();
		}
	}
	
	public static String toPinYin(String content) {
		return toPinYin(content, "", true, null);
	}
	
	/**
	 * 将content转换成拼音,如果不是汉字或者没有对应的拼音,则不作转换
	 * 如: 明天 转换成 MINGTIAN
	 *
	 * @param content:要转化的汉字
	 * @param spera:转化结果的分割符
	 * @param firstUpperCase:是否首字母大写
	 * @param format:格式封装类
	 */
	public static String toPinYin(String content, String spera, boolean firstUpperCase, HanyuPinyinOutputFormat format) {
		if (content == null || content.trim().length() == 0) {
			return "";
		}
		if (spera == null) {
			spera = "";
		}
		if (format == null) {
			format = new HanyuPinyinOutputFormat();
		}
		
		StringBuilder pyBuilder = new StringBuilder();
		String temp;
		String[] pyArray = null;
		for (int i = 0; i < content.length(); i++) {
			char c = content.charAt(i);
			if ((int) c <= 128) {
				pyBuilder.append(c);
			} else {
				try {
					pyArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
				} catch (BadHanyuPinyinOutputFormatCombination e) {
					e.printStackTrace();
				}
				if (pyArray == null) {
					pyBuilder.append(c);
				} else {
					temp = pyArray[0];
					if (firstUpperCase) {
						temp = pyArray[0].toUpperCase().charAt(0) + temp.substring(1);
					}
					pyBuilder.append(temp).append(i == content.length() - 1 ? "" : spera);
				}
			}
		}
		return pyBuilder.toString().trim();
	}
}
1
public class PinYinUtils {
2
 
3
 public static String getFirstPinYin(String content) {
4
  if (content == null || content.trim().length() == 0) {
5
   return "#";
6
  } else {
7
   char firstChar = content.trim().charAt(0);
8
   String[] array = PinyinHelper.toHanyuPinyinStringArray(firstChar);
9
   return array == null || array.length == 0 || array[0] == null || array[0].length() == 0 ? "#" : (array[0].charAt(0) + "").toUpperCase();
10
  }
11
 }
12
 
13
 public static String toPinYin(String content) {
14
  return toPinYin(content, "", true, null);
15
 }
16
 
17
 /**
18
  * 将content转换成拼音,如果不是汉字或者没有对应的拼音,则不作转换
19
  * 如: 明天 转换成 MINGTIAN
20
  *
21
  * @param content:要转化的汉字
22
  * @param spera:转化结果的分割符
23
  * @param firstUpperCase:是否首字母大写
24
  * @param format:格式封装类
25
  */
26
 public static String toPinYin(String content, String spera, boolean firstUpperCase, HanyuPinyinOutputFormat format) {
27
  if (content == null || content.trim().length() == 0) {
28
   return "";
29
  }
30
  if (spera == null) {
31
   spera = "";
32
  }
33
  if (format == null) {
34
   format = new HanyuPinyinOutputFormat();
35
  }
36
  
37
  StringBuilder pyBuilder = new StringBuilder();
38
  String temp;
39
  String[] pyArray = null;
40
  for (int i = 0; i < content.length(); i++) {
41
   char c = content.charAt(i);
42
   if ((int) c <= 128) {
43
    pyBuilder.append(c);
44
   } else {
45
    try {
46
     pyArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
47
    } catch (BadHanyuPinyinOutputFormatCombination e) {
48
     e.printStackTrace();
49
    }
50
    if (pyArray == null) {
51
     pyBuilder.append(c);
52
    } else {
53
     temp = pyArray[0];
54
     if (firstUpperCase) {
55
      temp = pyArray[0].toUpperCase().charAt(0) + temp.substring(1);
56
     }
57
     pyBuilder.append(temp).append(i == content.length() - 1 ? "" : spera);
58
    }
59
   }
60
  }
61
  return pyBuilder.toString().trim();
62
 }
63
}
2018-7-2