map map

时间:2023-03-09 06:30:53
map map
下面的无法运行。
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
int tot = Integer.parseInt(conf.get("TOTALWORDS")); System.out.println("total === " + total);
System.out.println("tot = " + tot); // 输入的格式如下:
// ALB weekend 1
// ALB weeks 3
Map<String, List<String>> baseMap = new HashMap<String, List<String>>(); // 保存基础数据
// Map<String, List<Double>> priorMap = new HashMap<String, List<Double>>(); // 保存每个单词出现的概率 String[] temp = value.toString().split("\t");
// 先将数据存到baseMap中
if (temp.length == 3) {
// 文件夹名类别名temp[0]
String wordAndNumber = null;
wordAndNumber = temp[1] + "\t" + temp[2];
if (baseMap.containsKey(temp[0])) { baseMap.get(temp[0]).add(wordAndNumber);
} else {
List<String> oneList = new ArrayList<String>();
oneList.add(wordAndNumber);
baseMap.put(temp[0], oneList);
} } // 读取数据完毕,全部保存在baseMap中 // 两层循环计算出每个类别中每个单词的概率 Iterator<Map.Entry<String, List<String>>> iterators = baseMap.entrySet().iterator();
while (iterators.hasNext()) {// 遍历类别
Map.Entry<String, List<String>> iterator = iterators.next();
int allWordsInClass = 0; // list遍历
Iterator<String> its = iterator.getValue().iterator(); // 得到每个类别的单词总数
while (its.hasNext()) {
String[] temp1 = its.next().split("\t");
allWordsInClass += Integer.parseInt(temp1[1]);
}
System.out.println(allWordsInClass);// 这个数据没有计算成功???? //
// Map<String, List<Double>> pMap = new HashMap<String, List<Double>>();
// List<Double> pList = new ArrayList<Double>();
// 遍历每个单词的词频计算器概率
while (its.hasNext()) {
String[] temp1 = its.next().split("\t");
double p = (Integer.parseInt(temp1[1]) + 1) / (allWordsInClass + total);
String classAndWord = iterator.getKey() + "\t" + temp1[0];
className.set(classAndWord);
number.set(p);
LOG.info("------>p = " + p);
// context.write(className, number);
mos.write(iterator.getKey(), temp1[0], p);
} }
}

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
int tot = Integer.parseInt(conf.get("TOTALWORDS")); System.out.println("total === " + total);
System.out.println("tot = " + tot); // 输入的格式如下:
// ALB weekend 1
// ALB weeks 3
Map<String, Map<String, Integer>> baseMap = new HashMap<String, Map<String, Integer>>(); // 保存基础数据
Map<String, Map<String, Double>> priorMap = new HashMap<String, Map<String, Double>>(); // 保存每个单词出现的概率 String[] temp = value.toString().split("\t");
// 先将数据存到baseMap中
if (temp.length == 3) {
// 文件夹名类别名
if (baseMap.containsKey(temp[0])) {
baseMap.get(temp[0]).put(temp[1], Integer.parseInt(temp[2]));
} else {
Map<String, Integer> oneMap = new HashMap<String, Integer>();
oneMap.put(temp[1], Integer.parseInt(temp[2]));
baseMap.put(temp[0], oneMap);
} } // 读取数据完毕,全部保存在baseMap中 // 两层循环计算出每个类别中每个单词的概率
Iterator<Map.Entry<String, Map<String, Integer>>> iterators = baseMap.entrySet().iterator();
while (iterators.hasNext()) {// 遍历类别
Map.Entry<String, Map<String, Integer>> iterator = iterators.next();
int allWordsInClass = 0; for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 遍历类别中的单词,先求出类别中的单词总数
allWordsInClass += entry.getValue();
}
System.out.println(allWordsInClass);//这个数据没有计算成功
//
Map<String, Double> pMap = new HashMap<String, Double>();
for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 在遍历每个单词的个数计算单词出现的概率
double p = (entry.getValue()+ 1.0) / (allWordsInClass + tot);//
pMap.put(entry.getKey(), p);
priorMap.put(iterator.getKey(), pMap);
className.set(iterator.getKey() + "\t" + entry.getKey());
number.set(p);
LOG.info("------>p = " + p); context.write(className, number);
// mos.write(iterator.getKey(), entry.getKey(), p);
} } /*
* value.set(temp[1]); number.set(Integer.parseInt(temp[2]));
* mos.write(value, number, dirName);
*/
}