package splitString;
public class SplitString {
String SplitStr;
int SplitByte;
public SplitString(String str, int bytes) {
SplitStr = str;
SplitByte = bytes;
System.out.println("The String is:′" + SplitStr + "′;SplitBytes="
+ SplitByte);
}
public void SplitIt() {
char[] c = new char[1];
int bytes = 0;
int indexBegin = 0;
for (int i = 0; i < SplitStr.length(); i++) {
c[0] = SplitStr.charAt(i);
bytes += (new String(c)).getBytes().length;
// 如果加上字节数后还是等于SplitByte,则添加c,并输出
if (bytes == SplitByte) {
if (i + SplitByte < SplitStr.length()) {
//输出indexBegin, i+1之间的值,不包括i+1
System.out.println(SplitStr.substring (indexBegin, i+1));
indexBegin = i+1; //起始位置加1
bytes = 0; //字节累加值加1
}
//如果剩余字节数不够bytes数,则把结尾的所有字符串都输出
else {
System.out.println(
SplitStr.substring(i,SplitStr.length()));
break;
}
}
if (bytes > SplitByte) { // 如果加上字节数后还是大于SplitByte,不添加c,并输出
i--; //退一索引
if (i + SplitByte < SplitStr.length()) {
System.out.println(SplitStr.substring(indexBegin, i+1));
indexBegin = i+1; //起始位置加1
bytes = 0;//字节累加值加1
} else {
System.out.println(SplitStr.substring(i, SplitStr.length()));
break;
}
}
}
}
public static void main(String[] args) {
SplitString ss = new SplitString(
"test中dd文dsaf中男大3443n中国43中国人0ewldfls=103", 4);
ss.SplitIt();
}
}
//输出结果为:
The String is:′test中dd文dsaf中男大3443n中国43中国人0ewldfls=103′;SplitBytes=4
test
中dd
文ds
af中
男大
3443
n中
国43
中国
人0e
wldf
103
7 个解决方案
#1
import java.util.*;
import java.util.regex.*;
public class Split {
private String str;
private int len;
private List<String>list;
public Split(String str,int len){
this.str = str;
this.len = len;
list = new ArrayList<String>();
}
public void split(){
int lenTmp=0;//count length
String strTmp="";
for(int i=0; i<str.length(); i++){
char c = str.charAt(i);
//System.out.println(c);
if(isCHN(c))
lenTmp += 2;
else
lenTmp += 1;
strTmp += c;
if(lenTmp == len){
list.add(strTmp);
lenTmp=0;
strTmp="";
}
else if(lenTmp>len){
i--;
list.add(strTmp.substring(0,strTmp.length()-1));
lenTmp=0;
strTmp="";
}
/* if(i+1==str.length()){
list.add(strTmp);
return;
} */
//System.out.println(strTmp);
}
if(!strTmp.equals(""))
list.add(strTmp);
}
public void print(){
System.out.println(list);
}
private boolean isCHN(char c){
Pattern p = Pattern.compile("^[\u4E00-\u9FA5]$");
Matcher m = p.matcher(""+c);
return m.find();
}
public static void main(String args[]){
Split s = new Split("我A我BfffC",4);
s.split();
s.print();
s = new Split("我ABC汉DEF",6);
s.split();
s.print();
}
}
import java.util.regex.*;
public class Split {
private String str;
private int len;
private List<String>list;
public Split(String str,int len){
this.str = str;
this.len = len;
list = new ArrayList<String>();
}
public void split(){
int lenTmp=0;//count length
String strTmp="";
for(int i=0; i<str.length(); i++){
char c = str.charAt(i);
//System.out.println(c);
if(isCHN(c))
lenTmp += 2;
else
lenTmp += 1;
strTmp += c;
if(lenTmp == len){
list.add(strTmp);
lenTmp=0;
strTmp="";
}
else if(lenTmp>len){
i--;
list.add(strTmp.substring(0,strTmp.length()-1));
lenTmp=0;
strTmp="";
}
/* if(i+1==str.length()){
list.add(strTmp);
return;
} */
//System.out.println(strTmp);
}
if(!strTmp.equals(""))
list.add(strTmp);
}
public void print(){
System.out.println(list);
}
private boolean isCHN(char c){
Pattern p = Pattern.compile("^[\u4E00-\u9FA5]$");
Matcher m = p.matcher(""+c);
return m.find();
}
public static void main(String args[]){
Split s = new Split("我A我BfffC",4);
s.split();
s.print();
s = new Split("我ABC汉DEF",6);
s.split();
s.print();
}
}
#2
String str="我ABC汉DE";
int count=6;
int i=0;
for(i=0;i<str.length;i++)
{
char c=str.charAt(i);
int len=(c+"").getBytes().length;
count-=len;
if(count<=0)break;
}
if(count<0)i--;
System.out.println(str.substring(0,i));
int count=6;
int i=0;
for(i=0;i<str.length;i++)
{
char c=str.charAt(i);
int len=(c+"").getBytes().length;
count-=len;
if(count<=0)break;
}
if(count<0)i--;
System.out.println(str.substring(0,i));
#3
楼上的测试过了吗
#4
import java.io.*;
public class Teststr{
public static void main(String args[]){
String s="ksji你好ki大0家好";
Teststr ts = new Teststr();
try{
ts.splitStr(s,8);
}catch(Exception e){
e.printStackTrace();
}
}
public void splitStr(String s, int num) throws UnsupportedEncodingException {
String tmpString = new String(s.getBytes("GBK"),"ISO8859_1");
tmpString = tmpString.substring(0,num);
byte bt[] = tmpString.getBytes("GBK");
String p="";
if(bt[bt.length-1]==63){
if(NumOf63(bt)){
p = new String(tmpString.substring(0,num-1).getBytes("ISO8859_1"),"GBK");
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
System.out.println(p);
}
public boolean NumOf63(byte[] bt){
int count=0;
for(int i=0;i<=bt.length-1;i++){
if(bt[i]==63)
count++;
}
if(count%2==0)
return false;
else
return true;
}
}
public class Teststr{
public static void main(String args[]){
String s="ksji你好ki大0家好";
Teststr ts = new Teststr();
try{
ts.splitStr(s,8);
}catch(Exception e){
e.printStackTrace();
}
}
public void splitStr(String s, int num) throws UnsupportedEncodingException {
String tmpString = new String(s.getBytes("GBK"),"ISO8859_1");
tmpString = tmpString.substring(0,num);
byte bt[] = tmpString.getBytes("GBK");
String p="";
if(bt[bt.length-1]==63){
if(NumOf63(bt)){
p = new String(tmpString.substring(0,num-1).getBytes("ISO8859_1"),"GBK");
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
System.out.println(p);
}
public boolean NumOf63(byte[] bt){
int count=0;
for(int i=0;i<=bt.length-1;i++){
if(bt[i]==63)
count++;
}
if(count%2==0)
return false;
else
return true;
}
}
#5
这个是经过测试的了!楼主赶紧给分吧!那个63我也不大明白它的来历,不过在网上看到有这么判断的。
#6
对于楼上的所讲的63是指在iso88591中,所有汉字的byte值都为63,以此来判断是否是汉字
我给出两个方法,一个是不除乱码的,一个是除掉拆分后的乱码 都经过测试,这道题主要就是考对编码的了解:
import java.io.*;
import java.util.Iterator;
public class GoodSplitString
{
public GoodSplitString(String s,int mod)
{
System.out.println("原始字符串为: "+s);
split1(s,mod);
split2(s,mod);
}
public void split1(String s,int mod)
{
System.out.println("不除掉乱码:split1(String,int)");
final int MOD=mod;
byte temp[] = new byte[MOD];
ArrayList list = new ArrayList();
try
{
byte[] stringToByte = s.getBytes("gb2312");
for (int i = 0; i < stringToByte.length; i++)
{
if ((i + MOD) > stringToByte.length) //这是判断是否是最后几个字母
{
byte[] tt = new byte[stringToByte.length - i]; //这里要用一个新的数组,或清空temp也行
System.arraycopy(stringToByte, i, tt, 0, stringToByte.length - i);
list.add(new String(tt, "gb2312"));
i = stringToByte.length;
}
else
{
System.arraycopy(stringToByte, i, temp, 0, MOD);
list.add(new String(temp, "gb2312"));
i = i + MOD - 1;
}
}
Iterator ite = list.iterator();
while (ite.hasNext())
{
System.out.println(ite.next());
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gb2312 ");
}
}
public void split2(String stringSrc,int mod)
{
System.out.println("除掉乱码:splist2(String,int)");
ArrayList list=new ArrayList();
int chLetter=0;
byte stringToByte[];
final int MOD;
int k=0;
byte temp[];
try
{
MOD=mod;
temp=new byte[MOD];
stringToByte=stringSrc.getBytes("gbk");
for(int i=0;i<stringToByte.length;i++)
{
if(stringToByte[i]<0)//根据GBK编码,汉字都是小于0,并分为2个byte存放
{
if(k<MOD-1)
{
temp[k]=stringToByte[i];
temp[++k]=stringToByte[++i];
k++;
if(k==MOD)
{
list.add(new String(temp,"gbk"));
k=0;
}
}
else
{
byte emitLetter[]=new byte[2];
emitLetter[0]=stringToByte[i];
emitLetter[1]=stringToByte[i+1];
i=i+1;
chLetter=chLetter+2;
System.out.println("除掉的汉字:" +(chLetter/2)+" "+new String(emitLetter,"gbk"));
}
}
else
{
temp[k]=stringToByte[i];
k++;
if (k == MOD)
{
list.add(new String(temp, "gbk"));
k = 0;
}
}
if(i==stringToByte.length-1)
{
byte [] lastData=new byte[k];
System.arraycopy(temp,0,lastData,0,k);
list.add(new String(lastData,"gbk"));
}
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gbk");
}
Iterator ite=list.iterator();
while(ite.hasNext())
System.out.println(ite.next());
}
public static void main(String[] args)
{
String s="a=我,人e们e为中以35经4产1人ie为joe经1发pl";
new GoodSplitString(s,6);
}
}
测试结果:
原始字符串为: a=我,人e们e为中以35经4产1人ie为joe经1发pl
不除掉乱码:split1(String,int)
a=我,?
?e们e?
?中以3
5经4产
1人ie?
?joe经
1发pl
除掉乱码:splist2(String,int)
除掉的汉字:1 人
除掉的汉字:2 中
除掉的汉字:3 以
除掉的汉字:4 为
除掉的汉字:5 发
a=我,e
们e为3
5经4产
1人iej
oe经1p
l
我给出两个方法,一个是不除乱码的,一个是除掉拆分后的乱码 都经过测试,这道题主要就是考对编码的了解:
import java.io.*;
import java.util.Iterator;
public class GoodSplitString
{
public GoodSplitString(String s,int mod)
{
System.out.println("原始字符串为: "+s);
split1(s,mod);
split2(s,mod);
}
public void split1(String s,int mod)
{
System.out.println("不除掉乱码:split1(String,int)");
final int MOD=mod;
byte temp[] = new byte[MOD];
ArrayList list = new ArrayList();
try
{
byte[] stringToByte = s.getBytes("gb2312");
for (int i = 0; i < stringToByte.length; i++)
{
if ((i + MOD) > stringToByte.length) //这是判断是否是最后几个字母
{
byte[] tt = new byte[stringToByte.length - i]; //这里要用一个新的数组,或清空temp也行
System.arraycopy(stringToByte, i, tt, 0, stringToByte.length - i);
list.add(new String(tt, "gb2312"));
i = stringToByte.length;
}
else
{
System.arraycopy(stringToByte, i, temp, 0, MOD);
list.add(new String(temp, "gb2312"));
i = i + MOD - 1;
}
}
Iterator ite = list.iterator();
while (ite.hasNext())
{
System.out.println(ite.next());
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gb2312 ");
}
}
public void split2(String stringSrc,int mod)
{
System.out.println("除掉乱码:splist2(String,int)");
ArrayList list=new ArrayList();
int chLetter=0;
byte stringToByte[];
final int MOD;
int k=0;
byte temp[];
try
{
MOD=mod;
temp=new byte[MOD];
stringToByte=stringSrc.getBytes("gbk");
for(int i=0;i<stringToByte.length;i++)
{
if(stringToByte[i]<0)//根据GBK编码,汉字都是小于0,并分为2个byte存放
{
if(k<MOD-1)
{
temp[k]=stringToByte[i];
temp[++k]=stringToByte[++i];
k++;
if(k==MOD)
{
list.add(new String(temp,"gbk"));
k=0;
}
}
else
{
byte emitLetter[]=new byte[2];
emitLetter[0]=stringToByte[i];
emitLetter[1]=stringToByte[i+1];
i=i+1;
chLetter=chLetter+2;
System.out.println("除掉的汉字:" +(chLetter/2)+" "+new String(emitLetter,"gbk"));
}
}
else
{
temp[k]=stringToByte[i];
k++;
if (k == MOD)
{
list.add(new String(temp, "gbk"));
k = 0;
}
}
if(i==stringToByte.length-1)
{
byte [] lastData=new byte[k];
System.arraycopy(temp,0,lastData,0,k);
list.add(new String(lastData,"gbk"));
}
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gbk");
}
Iterator ite=list.iterator();
while(ite.hasNext())
System.out.println(ite.next());
}
public static void main(String[] args)
{
String s="a=我,人e们e为中以35经4产1人ie为joe经1发pl";
new GoodSplitString(s,6);
}
}
测试结果:
原始字符串为: a=我,人e们e为中以35经4产1人ie为joe经1发pl
不除掉乱码:split1(String,int)
a=我,?
?e们e?
?中以3
5经4产
1人ie?
?joe经
1发pl
除掉乱码:splist2(String,int)
除掉的汉字:1 人
除掉的汉字:2 中
除掉的汉字:3 以
除掉的汉字:4 为
除掉的汉字:5 发
a=我,e
们e为3
5经4产
1人iej
oe经1p
l
#7
少写了一行
import java.util.ArrayList;
import java.util.ArrayList;
#1
import java.util.*;
import java.util.regex.*;
public class Split {
private String str;
private int len;
private List<String>list;
public Split(String str,int len){
this.str = str;
this.len = len;
list = new ArrayList<String>();
}
public void split(){
int lenTmp=0;//count length
String strTmp="";
for(int i=0; i<str.length(); i++){
char c = str.charAt(i);
//System.out.println(c);
if(isCHN(c))
lenTmp += 2;
else
lenTmp += 1;
strTmp += c;
if(lenTmp == len){
list.add(strTmp);
lenTmp=0;
strTmp="";
}
else if(lenTmp>len){
i--;
list.add(strTmp.substring(0,strTmp.length()-1));
lenTmp=0;
strTmp="";
}
/* if(i+1==str.length()){
list.add(strTmp);
return;
} */
//System.out.println(strTmp);
}
if(!strTmp.equals(""))
list.add(strTmp);
}
public void print(){
System.out.println(list);
}
private boolean isCHN(char c){
Pattern p = Pattern.compile("^[\u4E00-\u9FA5]$");
Matcher m = p.matcher(""+c);
return m.find();
}
public static void main(String args[]){
Split s = new Split("我A我BfffC",4);
s.split();
s.print();
s = new Split("我ABC汉DEF",6);
s.split();
s.print();
}
}
import java.util.regex.*;
public class Split {
private String str;
private int len;
private List<String>list;
public Split(String str,int len){
this.str = str;
this.len = len;
list = new ArrayList<String>();
}
public void split(){
int lenTmp=0;//count length
String strTmp="";
for(int i=0; i<str.length(); i++){
char c = str.charAt(i);
//System.out.println(c);
if(isCHN(c))
lenTmp += 2;
else
lenTmp += 1;
strTmp += c;
if(lenTmp == len){
list.add(strTmp);
lenTmp=0;
strTmp="";
}
else if(lenTmp>len){
i--;
list.add(strTmp.substring(0,strTmp.length()-1));
lenTmp=0;
strTmp="";
}
/* if(i+1==str.length()){
list.add(strTmp);
return;
} */
//System.out.println(strTmp);
}
if(!strTmp.equals(""))
list.add(strTmp);
}
public void print(){
System.out.println(list);
}
private boolean isCHN(char c){
Pattern p = Pattern.compile("^[\u4E00-\u9FA5]$");
Matcher m = p.matcher(""+c);
return m.find();
}
public static void main(String args[]){
Split s = new Split("我A我BfffC",4);
s.split();
s.print();
s = new Split("我ABC汉DEF",6);
s.split();
s.print();
}
}
#2
String str="我ABC汉DE";
int count=6;
int i=0;
for(i=0;i<str.length;i++)
{
char c=str.charAt(i);
int len=(c+"").getBytes().length;
count-=len;
if(count<=0)break;
}
if(count<0)i--;
System.out.println(str.substring(0,i));
int count=6;
int i=0;
for(i=0;i<str.length;i++)
{
char c=str.charAt(i);
int len=(c+"").getBytes().length;
count-=len;
if(count<=0)break;
}
if(count<0)i--;
System.out.println(str.substring(0,i));
#3
楼上的测试过了吗
#4
import java.io.*;
public class Teststr{
public static void main(String args[]){
String s="ksji你好ki大0家好";
Teststr ts = new Teststr();
try{
ts.splitStr(s,8);
}catch(Exception e){
e.printStackTrace();
}
}
public void splitStr(String s, int num) throws UnsupportedEncodingException {
String tmpString = new String(s.getBytes("GBK"),"ISO8859_1");
tmpString = tmpString.substring(0,num);
byte bt[] = tmpString.getBytes("GBK");
String p="";
if(bt[bt.length-1]==63){
if(NumOf63(bt)){
p = new String(tmpString.substring(0,num-1).getBytes("ISO8859_1"),"GBK");
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
System.out.println(p);
}
public boolean NumOf63(byte[] bt){
int count=0;
for(int i=0;i<=bt.length-1;i++){
if(bt[i]==63)
count++;
}
if(count%2==0)
return false;
else
return true;
}
}
public class Teststr{
public static void main(String args[]){
String s="ksji你好ki大0家好";
Teststr ts = new Teststr();
try{
ts.splitStr(s,8);
}catch(Exception e){
e.printStackTrace();
}
}
public void splitStr(String s, int num) throws UnsupportedEncodingException {
String tmpString = new String(s.getBytes("GBK"),"ISO8859_1");
tmpString = tmpString.substring(0,num);
byte bt[] = tmpString.getBytes("GBK");
String p="";
if(bt[bt.length-1]==63){
if(NumOf63(bt)){
p = new String(tmpString.substring(0,num-1).getBytes("ISO8859_1"),"GBK");
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
}else{
p = new String(tmpString.getBytes("ISO8859_1"),"GBK");
}
System.out.println(p);
}
public boolean NumOf63(byte[] bt){
int count=0;
for(int i=0;i<=bt.length-1;i++){
if(bt[i]==63)
count++;
}
if(count%2==0)
return false;
else
return true;
}
}
#5
这个是经过测试的了!楼主赶紧给分吧!那个63我也不大明白它的来历,不过在网上看到有这么判断的。
#6
对于楼上的所讲的63是指在iso88591中,所有汉字的byte值都为63,以此来判断是否是汉字
我给出两个方法,一个是不除乱码的,一个是除掉拆分后的乱码 都经过测试,这道题主要就是考对编码的了解:
import java.io.*;
import java.util.Iterator;
public class GoodSplitString
{
public GoodSplitString(String s,int mod)
{
System.out.println("原始字符串为: "+s);
split1(s,mod);
split2(s,mod);
}
public void split1(String s,int mod)
{
System.out.println("不除掉乱码:split1(String,int)");
final int MOD=mod;
byte temp[] = new byte[MOD];
ArrayList list = new ArrayList();
try
{
byte[] stringToByte = s.getBytes("gb2312");
for (int i = 0; i < stringToByte.length; i++)
{
if ((i + MOD) > stringToByte.length) //这是判断是否是最后几个字母
{
byte[] tt = new byte[stringToByte.length - i]; //这里要用一个新的数组,或清空temp也行
System.arraycopy(stringToByte, i, tt, 0, stringToByte.length - i);
list.add(new String(tt, "gb2312"));
i = stringToByte.length;
}
else
{
System.arraycopy(stringToByte, i, temp, 0, MOD);
list.add(new String(temp, "gb2312"));
i = i + MOD - 1;
}
}
Iterator ite = list.iterator();
while (ite.hasNext())
{
System.out.println(ite.next());
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gb2312 ");
}
}
public void split2(String stringSrc,int mod)
{
System.out.println("除掉乱码:splist2(String,int)");
ArrayList list=new ArrayList();
int chLetter=0;
byte stringToByte[];
final int MOD;
int k=0;
byte temp[];
try
{
MOD=mod;
temp=new byte[MOD];
stringToByte=stringSrc.getBytes("gbk");
for(int i=0;i<stringToByte.length;i++)
{
if(stringToByte[i]<0)//根据GBK编码,汉字都是小于0,并分为2个byte存放
{
if(k<MOD-1)
{
temp[k]=stringToByte[i];
temp[++k]=stringToByte[++i];
k++;
if(k==MOD)
{
list.add(new String(temp,"gbk"));
k=0;
}
}
else
{
byte emitLetter[]=new byte[2];
emitLetter[0]=stringToByte[i];
emitLetter[1]=stringToByte[i+1];
i=i+1;
chLetter=chLetter+2;
System.out.println("除掉的汉字:" +(chLetter/2)+" "+new String(emitLetter,"gbk"));
}
}
else
{
temp[k]=stringToByte[i];
k++;
if (k == MOD)
{
list.add(new String(temp, "gbk"));
k = 0;
}
}
if(i==stringToByte.length-1)
{
byte [] lastData=new byte[k];
System.arraycopy(temp,0,lastData,0,k);
list.add(new String(lastData,"gbk"));
}
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gbk");
}
Iterator ite=list.iterator();
while(ite.hasNext())
System.out.println(ite.next());
}
public static void main(String[] args)
{
String s="a=我,人e们e为中以35经4产1人ie为joe经1发pl";
new GoodSplitString(s,6);
}
}
测试结果:
原始字符串为: a=我,人e们e为中以35经4产1人ie为joe经1发pl
不除掉乱码:split1(String,int)
a=我,?
?e们e?
?中以3
5经4产
1人ie?
?joe经
1发pl
除掉乱码:splist2(String,int)
除掉的汉字:1 人
除掉的汉字:2 中
除掉的汉字:3 以
除掉的汉字:4 为
除掉的汉字:5 发
a=我,e
们e为3
5经4产
1人iej
oe经1p
l
我给出两个方法,一个是不除乱码的,一个是除掉拆分后的乱码 都经过测试,这道题主要就是考对编码的了解:
import java.io.*;
import java.util.Iterator;
public class GoodSplitString
{
public GoodSplitString(String s,int mod)
{
System.out.println("原始字符串为: "+s);
split1(s,mod);
split2(s,mod);
}
public void split1(String s,int mod)
{
System.out.println("不除掉乱码:split1(String,int)");
final int MOD=mod;
byte temp[] = new byte[MOD];
ArrayList list = new ArrayList();
try
{
byte[] stringToByte = s.getBytes("gb2312");
for (int i = 0; i < stringToByte.length; i++)
{
if ((i + MOD) > stringToByte.length) //这是判断是否是最后几个字母
{
byte[] tt = new byte[stringToByte.length - i]; //这里要用一个新的数组,或清空temp也行
System.arraycopy(stringToByte, i, tt, 0, stringToByte.length - i);
list.add(new String(tt, "gb2312"));
i = stringToByte.length;
}
else
{
System.arraycopy(stringToByte, i, temp, 0, MOD);
list.add(new String(temp, "gb2312"));
i = i + MOD - 1;
}
}
Iterator ite = list.iterator();
while (ite.hasNext())
{
System.out.println(ite.next());
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gb2312 ");
}
}
public void split2(String stringSrc,int mod)
{
System.out.println("除掉乱码:splist2(String,int)");
ArrayList list=new ArrayList();
int chLetter=0;
byte stringToByte[];
final int MOD;
int k=0;
byte temp[];
try
{
MOD=mod;
temp=new byte[MOD];
stringToByte=stringSrc.getBytes("gbk");
for(int i=0;i<stringToByte.length;i++)
{
if(stringToByte[i]<0)//根据GBK编码,汉字都是小于0,并分为2个byte存放
{
if(k<MOD-1)
{
temp[k]=stringToByte[i];
temp[++k]=stringToByte[++i];
k++;
if(k==MOD)
{
list.add(new String(temp,"gbk"));
k=0;
}
}
else
{
byte emitLetter[]=new byte[2];
emitLetter[0]=stringToByte[i];
emitLetter[1]=stringToByte[i+1];
i=i+1;
chLetter=chLetter+2;
System.out.println("除掉的汉字:" +(chLetter/2)+" "+new String(emitLetter,"gbk"));
}
}
else
{
temp[k]=stringToByte[i];
k++;
if (k == MOD)
{
list.add(new String(temp, "gbk"));
k = 0;
}
}
if(i==stringToByte.length-1)
{
byte [] lastData=new byte[k];
System.arraycopy(temp,0,lastData,0,k);
list.add(new String(lastData,"gbk"));
}
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gbk");
}
Iterator ite=list.iterator();
while(ite.hasNext())
System.out.println(ite.next());
}
public static void main(String[] args)
{
String s="a=我,人e们e为中以35经4产1人ie为joe经1发pl";
new GoodSplitString(s,6);
}
}
测试结果:
原始字符串为: a=我,人e们e为中以35经4产1人ie为joe经1发pl
不除掉乱码:split1(String,int)
a=我,?
?e们e?
?中以3
5经4产
1人ie?
?joe经
1发pl
除掉乱码:splist2(String,int)
除掉的汉字:1 人
除掉的汉字:2 中
除掉的汉字:3 以
除掉的汉字:4 为
除掉的汉字:5 发
a=我,e
们e为3
5经4产
1人iej
oe经1p
l
#7
少写了一行
import java.util.ArrayList;
import java.util.ArrayList;