代码改错,已更新为:http://blog.csdn.net/lrq1988/article/details/17954715
工作之故,要读取一个几十万条的文本,就写了这个程序,倒腾了俩天,改来改去,并不一定是最终版,姑且先记录下来。
1、本地读取以后改为网络读取
2、timer是为了作定时刷新
3、容器启动时,首先加载MobileUtil.init()方法
4、多核服务器,加载会更快,根据服务器内核切割获取的内容来组装map
5、基于线程安全考虑,HashMap可能改为ConcurrentHashMap
6、之所以没用NIO,是因为文本行数的计算在JDK6不支持,另外实现的代价又高。貌似JDK7已提供相应API。
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import org.apache.commons.codec.digest.DigestUtils;
//import org.apache.commons.codec.digest.DigestUtils;
public class MobileUtil {
private static final ScheduledExecutorService timer = Executors
.newScheduledThreadPool(Runtime.getRuntime().availableProcessors());
private static final String fileName = "/Users/leefelix/Downloads/all.csv";
private static long crc32;// 字符串进行crc32数据校验
private static String content = null;// 获取的内容字符串
public static HashMap<String, Location> locationMap = new HashMap<String, Location>();
public static volatile boolean started = false;
private MobileUtil() {
}
private void initial() {
timer.scheduleWithFixedDelay(new Runnable() {
public void run() {
long start = System.nanoTime();
try {
//crc32校验
System.out.println("md4....");
if(crc32 == crc32(fileName)){
System.out.println("md5....");
return;
}
crc32 = crc32(fileName);
FileInputStream fi = new FileInputStream(fileName);
InputStreamReader inreader = new InputStreamReader(fi,
"UTF-8");
BufferedReader reader = new BufferedReader(inreader);
String line = null;
long start1 = System.nanoTime();
StringBuilder sb = new StringBuilder();
System.out.println(start1);
int strCount = 0;
while ((line = reader.readLine()) != null) {
strCount++;
sb.append(line + "\r\n");
}
System.out.println("end...");
String tmp=sb.toString();
System.out.println("tmp length:"+tmp.length());
//System.out.println("content length:"+content.length());
content = sb.toString();
System.out.println("time1:" + (System.nanoTime() - start1));
String contentCopy = content;
int total = contentCopy.length();
System.out.println(total);
// 使用的线程数量
int threadCounts = Runtime.getRuntime()
.availableProcessors();
ExecutorService exec = Executors
.newFixedThreadPool(threadCounts);
List<Callable<HashMap<String, Location>>> callList = new ArrayList<Callable<HashMap<String, Location>>>();
int len = strCount / threadCounts;// 平均分割strCount
// strCount小于线程数
if (len == 0) {
threadCounts = strCount;// 采用一个线程处理List中的一个元素
len = strCount / threadCounts;// 重新平均分割List
}
for (int i = 0; i < threadCounts; i++) {
// 根据线程数量切割字符串为线程数量个子字符串
final String subContent;
if (0 == threadCounts - 1) {
subContent = contentCopy;
} else {
int startPos = i * total / threadCounts;
int endPos = (i + 1) * total / threadCounts;
if (i != 0)
while (!contentCopy.substring(startPos - 2,
startPos).endsWith("\r\n")) {
startPos++;
}
if (i != threadCounts - 1)
while (!contentCopy.substring(endPos - 2,
endPos).endsWith("\r\n")) {
endPos++;
}
subContent = contentCopy
.substring(startPos, endPos);
}
callList.add(new Callable<HashMap<String, Location>>() {
public HashMap<String, Location> call()
throws Exception {
String contentCopy = subContent;
HashMap<String, Location> map = new HashMap<String, Location>();
while (true) {
String splitStr = null;
int j = contentCopy.indexOf("\r\n");
if (j < 0) {
break;
}
splitStr = contentCopy.substring(0, j);
Location lc = new Location();
String[] arr = new String[4];
arr[0] = splitStr.substring(0,
splitStr.indexOf("\t")).trim();
splitStr = splitStr.substring(splitStr
.indexOf("\t") + 1);
arr[1] = splitStr.substring(0,
splitStr.indexOf("\t")).trim();
splitStr = splitStr.substring(splitStr
.indexOf("\t") + 1);
arr[2] = splitStr.substring(0,
splitStr.indexOf("\t")).trim();
splitStr = splitStr.substring(splitStr
.indexOf("\t") + 1);
arr[3] = splitStr.trim();
lc.setNum(arr[0]);
lc.setProvince(arr[1]);
lc.setCity(arr[2]);
lc.setOperator(arr[3]);
map.put(arr[0], lc);
System.out.println(arr[1]);
contentCopy = contentCopy.substring(j + 1);
}
return map;
}
});
List<Future<HashMap<String, Location>>> futureList = exec
.invokeAll(callList);
HashMap<String, Location> result = new HashMap<String, Location>();
for (Future<HashMap<String, Location>> future : futureList) {
result.putAll(future.get());
}
locationMap = result;
System.out.println("locationMap:"+result.size());
started = true;
System.out.println(true);
System.out.println(System.nanoTime() - start);
exec.shutdown();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
System.out.println("找不到文件" + fileName + "...");
} catch (IOException e) {
e.printStackTrace();
System.out.println("与文件" + fileName + "通信异常...");
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
}
}, 0, 1, TimeUnit.MINUTES);
}
public static void init() {
final MobileUtil mobileUtil = new MobileUtil();
mobileUtil.initial();
}
public static MobileUtil create(){
final MobileUtil mobileUtil = new MobileUtil();
while(!started){
if(started)break;
}
return mobileUtil;
}
// static {
// long start = System.nanoTime();
// // FileInputStream fis;
// // FileChannel fc;
// // ByteBuffer bf;
// try {
// if (content != null && md5Data.equals(DigestUtils.md5Hex(content))) {
//
// } else {
// // fis = new FileInputStream(fileName);
// // // 创建UTF-8/GBK符集
// // Charset charset = Charset.forName("GBK");
// // // 得到文件通道
// // fc = fis.getChannel();
// // // 分配与文件尺寸等大的缓冲区
// // bf = ByteBuffer.allocate((int) fc.size());
// // // 整个文件内容全读入缓冲区,即是内存映射文件
// // fc.read(bf);
// // // 把缓冲中当前位置回复为零
// // bf.rewind();
// // // 输出缓冲区中的内容
// // content = charset.decode(bf).toString();
// // fc.close();
// // int strCount = 0;
// // while (true) {
// // int j = contentCopy.indexOf("\r\n");
// // if (j < 0) {
// // break;
// // }
// // strCount++;
// // contentCopy = contentCopy.substring(j + 1);
// // }
// //之所以使用BufferedReader而不使用NIO,是为了方便计算行数
// int strCount = 0;
// FileInputStream fi = new FileInputStream(fileName);
// InputStreamReader inreader = new InputStreamReader(fi, "GBK");
// BufferedReader reader = new BufferedReader(inreader);
// String line = null;
// long start1 = System.nanoTime();
// StringBuilder sb = new StringBuilder();
// while ((line = reader.readLine()) != null) {
// strCount++;
// sb.append(line+"\r\n");
// }
// content=sb.toString();
// System.out.println("time1:"+(System.nanoTime()-start1));
// // long start2 = System.nanoTime();
// // String ss=null;
// // while ((line = reader.readLine()) != null) {
// // strCount++;
// // ss+=line+"\r\n";
// // }
// // content=ss;
// // System.out.println("time2:"+(System.nanoTime()-start2));
// md5Data = DigestUtils.md5Hex(content);
// String contentCopy = content;
// int total = contentCopy.length();
// // 使用的线程数量
// int threadCounts = Runtime.getRuntime().availableProcessors();
// ExecutorService exec = Executors
// .newFixedThreadPool(threadCounts);
// List<Callable<HashMap<String, Location>>> callList = new
// ArrayList<Callable<HashMap<String, Location>>>();
// int len = strCount / threadCounts;// 平均分割strCount
// // strCount小于线程数
// if (len == 0) {
// threadCounts = strCount;// 采用一个线程处理List中的一个元素
// len = strCount / threadCounts;// 重新平均分割List
// }
// for (int i = 0; i < threadCounts; i++) {
// //根据线程数量切割字符串为线程数量个子字符串
// final String subContent;
// if(0 == threadCounts -1){
// subContent = contentCopy;
// }else{
// int startPos = i*total/threadCounts;
// int endPos = (i+1)*total/threadCounts;
// if(i!=0)
// while(!contentCopy.substring(startPos-2,startPos).endsWith("\r\n")){
// startPos++;
// }
// if(i!=threadCounts -1)
// while(!contentCopy.substring(endPos-2,endPos).endsWith("\r\n")){
// endPos++;
// }
// subContent = contentCopy.substring(startPos,endPos);
// }
// callList.add(new Callable<HashMap<String, Location>>() {
// public HashMap<String, Location> call()
// throws Exception {
// String contentCopy = subContent;
// HashMap<String, Location> map = new HashMap<String, Location>();
// while (true) {
// String splitStr = null;
// int j = contentCopy.indexOf("\r\n");
// if (j < 0) {
// break;
// }
// splitStr = contentCopy.substring(0, j);
// Location lc = new Location();
// String[] arr = new String[4];
// arr[0] = splitStr.substring(0,
// splitStr.indexOf("\t")).trim();
// splitStr = splitStr.substring(splitStr
// .indexOf("\t") + 1);
// arr[1] = splitStr.substring(0,
// splitStr.indexOf("\t")).trim();
// splitStr = splitStr.substring(splitStr
// .indexOf("\t") + 1);
// arr[2] = splitStr.substring(0,
// splitStr.indexOf("\t")).trim();
// splitStr = splitStr.substring(splitStr
// .indexOf("\t") + 1);
// arr[3] = splitStr.trim();
// lc.setNum(arr[0]);
// lc.setProvince(arr[1]);
// lc.setCity(arr[2]);
// lc.setOperator(arr[3]);
// map.put(arr[0], lc);
// contentCopy = contentCopy.substring(j + 1);
// }
// return map;
// }
// });
// }
//
// List<Future<HashMap<String, Location>>> futureList = exec
// .invokeAll(callList);
// HashMap<String, Location> result = new HashMap<String, Location>();
// for (Future<HashMap<String, Location>> future : futureList) {
// result.putAll(future.get());
// }
// md5Data = DigestUtils.md5Hex(content);
// locationMap=result;
// System.out.println(System.nanoTime() - start);
// exec.shutdown();
// }
// } catch (FileNotFoundException e) {
// e.printStackTrace();
// System.out.println("找不到文件" + fileName + "...");
// } catch (IOException e) {
// e.printStackTrace();
// System.out.println("与文件" + fileName + "通信异常...");
//
// } catch (InterruptedException e) {
// e.printStackTrace();
// } catch (ExecutionException e) {
// e.printStackTrace();
// }
// }
public static void main(String args[]) throws IOException {
System.out.println(getCity("13811014978"));
System.out.println(getOperator("13811014978"));
}
static class Location {
private String num;
private String province;
private String city;
private String operator;
public Location() {
}
public String getNum() {
return num;
}
public void setNum(String num) {
this.num = num;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getOperator() {
return operator;
}
public void setOperator(String operator) {
this.operator = operator;
}
}
public static Location getLocation(String mobile) {
mobile = getNum7(mobile);
while(!started){
if(started)break;
}
return locationMap.get(mobile);
}
public static String getCity(String mobile) {
while(!started){
if(started)break;
}
mobile = getNum7(mobile);
System.out.println("mobile:"+mobile);
if (locationMap.get(mobile) == null)
return null;
return locationMap.get(mobile).getCity();
}
public static String getProvince(String mobile) {
mobile = getNum7(mobile);
while(!started){
if(started)break;
}
if (locationMap.get(mobile) == null)
return null;
return locationMap.get(mobile).getProvince();
}
public static String getOperator(String mobile) {
mobile = getNum7(mobile);
while(!started){
if(started)break;
}
if (locationMap.get(mobile) == null)
return null;
return locationMap.get(mobile).getOperator();
}
private static String getNum7(String mobile) {
mobile = mobile.trim();
if (mobile.length() != 11 || !mobile.startsWith("1")
|| !mobile.matches("\\d+"))
throw new IllegalArgumentException("传入的手机号码" + mobile
+ "不正确,请使用正确的11位数字号码");
return mobile.substring(0, 7);
}
private static String getContentByUri(String uri) {
URL url;
URLConnection urlconn;
try {
url = new URL(uri);
urlconn = url.openConnection();
HttpURLConnection httpConnection = (HttpURLConnection) urlconn;
httpConnection.setConnectTimeout(1000000);
httpConnection.setReadTimeout(1000000);
httpConnection.setRequestProperty("User-Agent", "new");
httpConnection.setRequestMethod("POST");
InputStream in = httpConnection.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(in,
"UTF-8"));
String line = "";
while ((line = br.readLine()) != null) {
System.out.println(line);
}
br.close();
in.close();
return line;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
private static int getNumber(String des, String reg) {
Pattern p = Pattern.compile(reg);
Matcher m = p.matcher(des);
int count = 0;// 记录个数
while (m.find()) {
count++;
}
return count;
}
public static String getUrlContent(String url) {
// CloseableHttpClient httpclient = HttpClients.createDefault();
// HttpGet httpget = new HttpGet(url);
// CloseableHttpResponse response = null;
String value = null;
// try {
// response = httpclient.execute(httpget);
// HttpEntity entity = response.getEntity();
// if (entity != null) {
// value=EntityUtils.toString(entity);
// }
// } catch (Exception e) {
// e.printStackTrace();
// } finally {
// try {
// response.close();
// httpclient.close();
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
return value;
}
private static long crc32(String str){
CRC32 crc32 = new CRC32();
byte[] data = str.getBytes();
for(byte i=0;i<data.length;i++) {
data[i] = i;
}
crc32.update(data);
return crc32.getValue();
}
}