ry

    * @return

pinyin4j的使用,pinyin4j使用

pinyin4j的使用  
pinyin4j是一个功能强悍的汉语拼音工具包,主要是从汉语获取各种格式和需求的拼音,功能强悍,下面看看如何使用pinyin4j。
 

import java.util.HashSet;
import java.util.Set;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import
net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

public class PingYingChange {

/**
* 获取汉字串拼音首字母,英文字符不变
*
* @param chinese 汉字串
* @return 汉语拼音首字母
*/
public static String cn2FirstSpell(String chinese) {
StringBuffer pybf = new StringBuffer();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
String[] _t = PinyinHelper.toHanyuPinyinStringArray(arr[i],
defaultFormat);
if (_t != null) {
pybf.append(_t[0].charAt(0));
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pybf.append(arr[i]);
}
}
return pybf.toString().replaceAll(“\\W”, “”).trim();
}

/**
* 获取汉字串拼音,英文字符不变
*
* @param chinese 汉字串
* @return 汉语拼音
*/
public static String cn2Spell(String chinese) {
StringBuffer pybf = new StringBuffer();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i],
defaultFormat)[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pybf.append(arr[i]);
}
}
return pybf.toString();
}
public static String cnToSpell(String chines) {

String pinyinName = “”;
StringBuffer strbuf = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
char name = quanbianban(nameChar[i]);
nameChar[i] = name;
if (128 < nameChar[i]) {
try {
strbuf.append(PinyinHelper.toHanyuPinyinStringArray(nameChar[i],
defaultFormat)[0].charAt(0));
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
strbuf.append(nameChar[i]);
}
}

pinyinName = strbuf.toString();

return pinyinName;
}

public static char quanbianban(char quan) {
switch (quan) {

case ‘0’:
return ‘0’;

case ‘1’:
return ‘1’;

case ‘2’:
return ‘2’;

case ‘3’:
return ‘3’;

case ‘4’:
return ‘4’;

case ‘5’:
return ‘5’;

case ‘6’:
return ‘6’;

case ‘7’:
return ‘7’;

case ‘8’:
return ‘8’;

case ‘9’:
return ‘9’;

default:
return quan;

}
}
/**
* 字符串集合转换字符串(逗号分隔)
* @author wyh
* @param stringSet
* @return
*/
public static String makeStringByStringSet(Set<String>
stringSet){
StringBuilder str = new StringBuilder();
int i=0;
for(String s : stringSet){
if(i == stringSet.size() – 1){
str.append(s);
}else{
str.append(s + “,”);
}
i++;
}
return str.toString().toLowerCase();
}

/**
* 获取拼音集合
* @author wyh
* @param src
* @return Set<String>
*/
public static Set<String> getPinyin(String src){
if(src!=null && !src.trim().equalsIgnoreCase(“”)){
char[] srcChar ;
srcChar=src.toCharArray();
//汉语拼音格式输出类
HanyuPinyinOutputFormat hanYuPinOutputFormat = new
HanyuPinyinOutputFormat();

//输出设置,大小写,音标方式等
hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);

String[][] temp = new String[src.length()][];
for(int i=0;i<srcChar.length;i++){
char c = srcChar[i];
//是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
if(String.valueOf(c).matches(“[\\u4E00-\\u9FA5]+”)){
try{
temp[i] = PinyinHelper.toHanyuPinyinStringArray(srcChar[i],
hanYuPinOutputFormat);
}catch(BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}else if(((int)c>=65 && (int)c<=90) || ((int)c>=97 &&
(int)c<=122)){
temp[i] = new String[]{String.valueOf(srcChar[i])};
}else{
temp[i] = new String[]{“”};
}
}
String[] pingyinArray = Exchange(temp);
Set<String> pinyinSet = new HashSet<String>();
for(int i=0;i<pingyinArray.length;i++){
pinyinSet.add(pingyinArray[i]);
}
return pinyinSet;
}
return null;
}

/**
* 递归
* @author wyh
* @param strJaggedArray
* @return
*/
public static String[] Exchange(String[][] strJaggedArray){
String[][] temp = DoExchange(strJaggedArray);
return temp[0];
}

/**
* 递归
* @author wyh
* @param strJaggedArray
* @return
*/
private static String[][] DoExchange(String[][]
strJaggedArray){
int len = strJaggedArray.length;
if(len >= 2){
int len1 = strJaggedArray[0].length;
int len2 = strJaggedArray[1].length;
int newlen = len1*len2;
String[] temp = new String[newlen];
int Index = 0;
for(int i=0;i<len1;i++){
for(int j=0;j<len2;j++){
temp[Index] = strJaggedArray[0][i] + strJaggedArray[1][j];
Index ++;
}
}
String[][] newArray = new String[len-1][];
for(int i=2;i<len;i++){
newArray[i-1] = strJaggedArray[i];
}
newArray[0] = temp;
return DoExchange(newArray);
}else{
return strJaggedArray;
}
}

public static void main(String[] args) throws Exception {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

// UPPERCASE:大写 (ZHONG)
// LOWERCASE:小写 (zhong)
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);

// WITHOUT_TONE:无音标 (zhong)
// WITH_TONE_NUMBER:1-4数字表示英标 (zhong4)
// WITH_TONE_MARK:直接用音标符(必须WITH_U_UNICODE否则异常)
(zhòng)
format.setToneType(HanyuPinyinToneType.WITH_TONE_MARK);

// WITH_V:用v表示ü (nv)
// WITH_U_AND_COLON:用”u:”表示ü (nu:)
// WITH_U_UNICODE:直接用ü (nü)
format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
String[] pinyin = PinyinHelper.toHanyuPinyinStringArray(‘重’,
format);
System.out.println(PinyinHelper.toHanyuPinyinStringArray(‘重’,
format)[1]);
System.out.println(PingYingChange.cnToSpell(“镇江abc”));
System.out.println(PingYingChange.quanbianban(‘o’));
String x = “嘅囧誰說壞學生來勼髮視頻裆児”;
System.out.println(cn2FirstSpell(x));
System.out.println(cn2Spell(x));
String str = “单田芳”;
System.out.println(makeStringByStringSet(getPinyin(str)));
System.out.println(“”);
}
}

图片 1

 

pinyin4j的使用pinyin4j是一个功能强悍的汉语拼音工具包,主要是从汉语获取各种格式和需求的拼音,功能强悍,下…

        private static int byte2Int(byte b)
        {
            if (b < 0)
            {
                return 256 + b;
            }
            else
            {
                return b;
            }
        }

Process finished with exit code 0

                    }

        public static char getFirstPY(char ch)
        {
            if (ch >= 0 && ch <= 0x7F)
            {
                return ch;
            }
            int gb = 0;

        }

        String[] srcStr = new String[srcCount];

          汉字转换拼音部分代码如下:

        /**
         * 取出汉字的编码
         * cn 汉字
         */
        private static int gbValue(char ch) {
                String str = new String();
                str += ch;
                try {
                        byte[] bytes = str.getBytes(“GB2312”);
                        if (bytes.length < 2)
                                return 0;
                        return (bytes[0] << 8 & 0xff00) + (bytes[1] & 0xff);
                } catch (Exception e) {
                        return 0;
                }
        }

        return stringToPinyin(src, false, null);

        public static bool isSpliter(char c)
        {
            char[] spliter = { ‘,’, ‘,’, ‘;’, ‘;’ };
            foreach (char cc in spliter)
            {
                if (c == cc)
                {
                    return true;
                }
            }
            return false;
        }

        //i, u, v都不做声母, 跟随前面的字母

    * @param src

      这样基本上就很容易实现上面图例所示的功能了。

        static {
                for (int i = 0; i < 27; ++i) {
                        table[i] = gbValue(chartable[i]);
                }
        }

    * @return

        
        public static string getAllPY(char gb2312)
        {
            int ascii = getCnAscii(gb2312);
            if (ascii == 0)
            { // 取ascii时出错
                return new string(gb2312, 1);
            }
            else
            {
                string spell = getSpellByAscii(ascii);
                if (spell == null)
                {
                    return new string(gb2312, 1);
                }
                else
                {
                    return spell;
                } // end of if spell == null
            }
        }

 

        return sb.toString();

             具体代码可以参考另一篇博客:

        //————————private方法区————————
        /**
         * 输入字符,得到他的声母,英文字母返回对应的大写字母,其他非简体汉字返回 ‘0’
         *
         * @param ch 字符
         * @return 拼音首字母
         */
        private static char Char2Initial(char ch) {
                if (ch >= ‘a’ && ch <= ‘z’)
                        return (char) (ch – ‘a’ + ‘A’);
                if (ch >= ‘A’ && ch <= ‘Z’)
                        return ch;
                int gb = gbValue(ch);
                if (gb < table[0])
                        return ch;
                int i;
                for (i = 0; i < 26; ++i) {
                        if (match(i, gb))
                                break;
                }
                if (i >= 26)
                        return ch;
                else
                        return initialtable[i];
        }

            sb.append(str[i]);

        
通过异步方式即时获取检索内容(JAVASCRIPT脚本中实现缓存机制,保证性能)

此工具用来获取拼音首字母,原理是获取汉字编码的首字节,对比特殊汉字的首字节,并设定对应的拼音首字母关系来获取。

        }

      如图:

        /**
         * 根据一个包含汉字的字符串返回一个汉字拼音首字母的字符串
         *
         * @param SourceStr 源字符串
         * @return 拼音首字母的字符串
         */
        public static String cn2Pinyin(String
SourceStr) {
                String Result = “”;
                int StrLength = SourceStr.length();
                int i;
                try {
                        for (i = 0; i < StrLength; i++) {
                                Result +=
Char2Initial(SourceStr.charAt(i));
                        }
                } catch (Exception e) {
                        Result = “”;
                }
                return Result;
        }

 

        private static bool match(int i, int gb)
        {
            if (gb < FIRST_TABLE[i])
            {
                return false;
            }
            int j = i + 1;
            // 字母Z使用了两个标签
            while (j < 26 && (FIRST_TABLE[j] == FIRST_TABLE[i]))
            {
                ++j;
            }
            if (j == 26)
                return gb <= FIRST_TABLE[j];
            else
                return gb < FIRST_TABLE[j];
        }

/**
* 拼音首字母的工具
*
* @author leizhimin 2009-7-4 16:41:28
*/
public class PinyinToolkit {
        private PinyinToolkit() {
        }

        String[] headString = new String[chars.length];

 

 

    public static String charArrayToString(char[] ch) {

        public static string getFirstPY(string src)
        {
            StringBuilder sb = new StringBuilder();
            int len = src.Length;
            int i;
            for (i = 0; i < len; i++)
            {
                sb.Append(getFirstPY(src[i]));
            }
            return sb.ToString();
        }

        //初始化

            String separator) {

public class GBToPY
    {
        private static int[] FIRST_TABLE = { 45217, 45253, 45761, 46318, 46826,
                47010, 47297, 47614, 47614, 48119, 49062, 49324, 49896, 50371,
                50614, 50622, 50906, 51387, 51446, 52218, 52218, 52218, 52698,
                52980, 53689, 54481, 55289 };

        public static void main(String[] args)
throws Exception {
                System.out.println(cn2Pinyin(“熔岩”));
                System.out.println(cn2Pinyin(“”));
                System.out.println(cn2Pinyin(“熔岩abc123”));
        }
}

 

        public static string[] split(string src)
        {
            string text = src.Trim();
            StringBuilder sb = new StringBuilder();
            ArrayList al = new ArrayList();
            int i = 0;
            //跳过之前的分隔符
            for (i = 0; i < text.Length; i++)
            {
                if (!isSpliter(text[i]))
                {
                    break;
                }
            }
            for (; i < text.Length; i++)
            {
                if (isSpliter(text[i]))
                {
                    if (sb.Length > 0)
                    {
                        al.Add(sb.ToString());
                    }
                    sb = new StringBuilder();
                }
                else
                {
                    sb.Append(text[i]);
                }
            }
            if (sb.Length > 0)
            {
                al.Add(sb.ToString());
            }
            if (al.Count > 0)
            {
                string[] ret = new string[al.Count];
                for (i = 0; i < al.Count; i++)
                {
                    ret[i] = (string)al[i];
                }
                return ret;
            }
            else
            {
                return null;
            }
        }
    }

当然,如果要获取完整的拼音,则需要借助拼音库来实现了。

    * @param src

        private static string[] ALL_VALUE = { “zuo”, “zun”, “zui”, “zuan”, “zu”,
                “zou”, “zong”, “zi”, “zhuo”, “zhun”, “zhui”, “zhuang”, “zhuan”,
                “zhuai”, “zhua”, “zhu”, 图片 2..};

此算法的来自网上,我做了修改与封装,拿出来以方便使用。

    public static char[] getHeadByChar(char src) {

         需要检索的内容,数据库里保存汉字相应的拼音首字母和拼音。
(E8.Net中已经包含了将汉字转换为拼音及首拼的全部源码)

        private static char[] initialtable = {
                        ‘a’, ‘b’, ‘c’, ‘d’, ‘e’, ‘f’, ‘g’, ‘h’, ‘i’,
                        ‘j’, ‘k’, ‘l’, ‘m’, ‘n’, ‘o’, ‘p’, ‘q’, ‘r’,
                        ‘s’, ‘t’, ‘u’, ‘v’, ‘w’, ‘x’, ‘y’, ‘z’
        };

 

            

特别说明:本工具支持的中文文字有限,如果您有更好的算法,也请奉献出来,或与我交流。

    * @param hanzi

        private static string getSpellByAscii(int ascii)
        {
            if (ascii > 0 && ascii < 160)
            { // 单字符
                return new string((char)ascii, 1);
            }
            if (ascii < -20319 || ascii > -10247)
            { // 不知道的字符
                return null;
            }
            int first = 0;
            int sLast = ALL_CODE.Length – 1;
            int last = ALL_CODE.Length – 1;
            int mid;
            int temp;
            while (true)
            {
                mid = (first + last) >> 1;
                if (ascii == ALL_CODE[mid])
                {
                    return ALL_VALUE[mid];
                }
                else if (ascii > ALL_CODE[mid])
                {
                    temp = mid – 1;
                    if (temp >= 0)
                    {
                        if (ascii < ALL_CODE[temp])
                        {
                            return ALL_VALUE[mid];
                        }
                        else
                        {
                            last = mid;
                        }
                    }
                    else
                    {
                        return ALL_VALUE[0];
                    }
                }
                else
                {
                    if (mid + 1 <= sLast)
                    {
                        first = mid + 1;
                    }
                    else
                    {
                        return ALL_VALUE[sLast];
                    }
                }
            }
        }

                while (j < 26 && (table[j] == table[i]))
                        ++j;
                if (j == 26)
                        return gb <= table[j];
                else
                        return gb < table[j];

        for (char ch : chars) {

        private static int[] ALL_CODE = { -10254, -10256, -10260, -10262,
                -10270, -10274, -10281, -10296, -10307, -10309, -10315, -10322,
                -10328, -10329, -10331, -10519, -10533, -10544, -10587, -10764,
                -10780, -10790, -10800, -10815, -10832, -10838, -11014, -11018,
                -11019, -11020, -11024, -11038, -11041, -11045, -11052, -11055,
                -11067, -11077, -11097, -11303, -11324, -11339, -11340, -11358,
                -12607, -12802, -12812, -12829, -12831, -12838, -12849, -12852,
                -12858, -12860, -12871, -12875, -12888, -13060, -13063, -13068,
                -13076, -13091, -13095, -13096, -13107, -13120, -13138, -13147,
                -13318, -13326, -13329, -13340, -13343, -13356, -13359, -13367,
                -13383, -13387, -13391, -13395, -13398, -13400, -13404, -13406,
                -13601, -13611, -13658图片 3.};

        private static int[] table = new int[27];

        StringBuffer tempPinying = new StringBuffer();

    实现思路如下:

 

    * @return

    
输入拼音首字母或全拼均可以快速检索,这个功能对于很多软件系统中人名检索,知识库关键字等快速检索很有帮助。由于E8.Net工作流架构里本来就已经实现了类似google的快速检索的控件(有博客介绍),因此实现通过拼音检索就变得相对容易了。

        private static boolean match(int i, int gb) {
                if (gb < table[i])
                        return false;
                int j = i + 1;
                //字母Z使用了两个标签

    * @param isCapital

               

        //字母Z使用了两个标签,这里有27个值

        for (int i = 0; i < ch.length; i++) {

 

运行结果:

            }

            byte[] bytes = Encoding.GetEncoding(“gb2312”).GetBytes(new string(ch, 1));
            if (bytes.Length < 2)
            {
                gb = byte2Int(bytes[0]);
            }
            gb = (bytes[0] << 8 & 0xff00) + (bytes[1] & 0xff);
            if (gb < FIRST_TABLE[0])
                return ch;
            int i;
            for (i = 0; i < 26; ++i)
            {
                if (match(i, gb))
                    break;
            }
            if (i >= 26)
                return ch;
            else
                return (char)(65 + i);
        }

 

                        sb.append(separator);

        private static int getCnAscii(char cn)
        {
            byte[] bytes = null;
            bytes = Encoding.GetEncoding(“gb2312”).GetBytes(new string(cn, 1));
            if (bytes == null || bytes.Length > 2 || bytes.Length <= 0)
            {
                return 0;
            }
            if (bytes.Length == 1)
            {
                return bytes[0];
            }
            else
            {
                int hightByte = bytes[0];
                int lowByte = bytes[1];
                int ascii = (256 * hightByte + lowByte) – 256 * 256;
                return ascii;
            }
        }

ryABC123

import
net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

 前段时间有难得的一周左右的时间空闲,在朋友的拉动下玩了开心网,里面有一个小功能很有意思,选择人名的时候,可以根据拼音

拼音首字母快速输入,这个功能如果用在我们的应用系统里,对软件的易用性非常有帮助,于是研究了一把,并把全部代码扩充到了E8.Net工作流架构里面。

        private static char[] chartable = {
                        ‘啊’, ‘芭’, ‘擦’, ‘搭’, ‘蛾’, ‘发’, ‘噶’, ‘哈’, ‘哈’,
                        ‘击’, ‘喀’, ‘垃’, ‘妈’, ‘拿’, ‘哦’, ‘啪’, ‘期’, ‘然’,
                        ‘撒’, ‘塌’, ‘塌’, ‘塌’, ‘挖’, ‘昔’, ‘压’, ‘匝’, ‘座’
        };
        //首字母表

    }

     
图片 4图片 5

 

 

            if (str.length != (i + 1)) {

 

    * @return

    */

 

        }

        String pinyingStr = “”;

        for (int i = 0; i < str.length; i++) {

                    }

       
System.out.println(Arrays.toString(stringToPinyin(s2,true,”,”)));

        return stringArrayToString(str, “”);

    public static String stringArrayToString(String[] str, String
separator) {

            // TODO Auto-generated catch block

    * 将字符数组转换成字符串

            return new char[] { src };

                String[] strs =
PinyinHelper.toHanyuPinyinStringArray(src,

import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;

    * 

 

    * 将字符串转换成拼音数组

        }

    * 取汉字的首字母(默认是大写)

    */

        // 创建返回对象

        // 如果是中文

    }

        if (“”.equals(src) || null == src) {

    * 

        return pinyingStr;

    /**

            tempPinying.append(src);

    * 

    * @return

    *            是否大写

    * @param separator

    *            各个字符串之间的分隔符

            headString[i] = sb.toString();

    /**

                    tempPinying.append(strs[0]);

    * 简单的将各个字符数组之间连接起来

    * 

        StringBuffer sb = new StringBuffer();

    * 

    public static String[] stringToPinyin(String src, String
separator) {

    * @param src

    * @return

                e.printStackTrace();

    * @param src

    * 

    /**

相关文章