矩阵相乘能够查看百度百科的解释http://baike.baidu.com/view/2455255.htm?fr=aladdin
有a和b两个矩阵
a: 1 2 3
4 5 0
7 8 9
10 11 12
b: 10 15
0 2
11 9
c = a*b
1:将矩阵存到HDFS中:
矩阵a:
行 | 列 | 值 | hdfs存储 |
1 | 1 | 1 | 1,1,1 |
1 | 2 | 2 | 1,2,2 |
1 | 3 | 3 | 1,3,3 |
2 | 1 | 4 | 2,1,4 |
2 | 2 | 5 | 2,2,5 |
2 | 3 | 0 | 0不存储 |
3 | 1 | 7 | 3,1,7 |
3 | 2 | 8 | 3,2,8 |
3 | 3 | 9 | 3,3,9 |
4 | 1 | 10 | 4,1,10 |
4 | 2 | 11 | 4,2,11 |
4 | 3 | 12 | 4,3,12 |
矩阵b:
行 | 列 | 值 | hdfs存储 |
1 | 1 | 10 | 1,1,10 |
1 | 2 | 15 | 1,2,15 |
2 | 1 | 0 | 0不存储 |
2 | 2 | 2 | 2,2,2 |
3 | 1 | 11 | 3,1,11 |
3 | 2 | 9 | 3,2,9 |
2:a的map读取
读取第一个值是1,1,1。它是矩阵a的第一行第一列。那么它要在计算c(1,1) c(1,2)的时候使用(这里c仅仅用2列,假设用n列,那么它的值要在计算c(1,1),c(1,2),c(1,3)...c(1,n)的时候使用)。我们就以 key = 1,1 value = a,1,1 , key= 1,2 value = a,1,1输出两条数据(1,1) (1,2)是 c(1,1) ,c(1,2)的坐标。
b的map读取
读取第一个值是1,1,10。它是矩阵b的第一行第一列。那么它要在计算c(1,1) c(2,1) c(3,1) c(4,1)的时候使用(这里c仅仅用4行,假设用m行,那么它的值要在计算c(1,1),c(2,1),c(3,1)...c(m,1)的时候使用)。我们就以 key = 1,1 value = b,1,10 , key= 2,1 value = b,1,10 , key = 3,1 value = b,1,10 , key= 4,1 value = b,1,10输出
3:reduce读取计算
通过mapA和mapB的输出能够得到 key = 1,1 , value=a,1,1 value=b,1,10 value=a,2,2 value=a,3,3 value = b,3,11l来计算c(1,1)的值
代码例如以下:
package MyMatrix; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class MartrixMultiply{ /** * 最后得到的矩阵的列数 */ public static final int COL_COUNT = 2; /** * 最后得到的矩阵的行数 */ public static final int ROW_COUNT = 4; /** * A矩阵的列数或者是B矩阵的行数 */ public static final int BROW_ACOL= 3; public static class MartrixMaperA extends Mapper<LongWritable, Text, Text, Text>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String valueStr = value.toString(); String[] items = valueStr.split(","); int rowIndex = Integer.parseInt(items[0]); int colIndex = Integer.parseInt(items[1]); int valueInt = Integer.parseInt(items[2]); Text outKey = null; Text outValue = null; for(int i=0;i<COL_COUNT;i++){ outKey = new Text(rowIndex + "," + (i+1)); outValue = new Text("a,"+colIndex+","+valueInt); context.write(outKey, outValue); } } } public static class MartrixMaperB extends Mapper<LongWritable, Text, Text, Text>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String valueStr = value.toString(); String[] items = valueStr.split(","); int rowIndex = Integer.parseInt(items[0]); int colIndex = Integer.parseInt(items[1]); int valueInt = Integer.parseInt(items[2]); Text outKey = null; Text outValue = null; for(int i=0;i<ROW_COUNT;i++){ outKey = new Text((i+1) + "," + colIndex); outValue = new Text("b,"+rowIndex+","+valueInt); context.write(outKey, outValue); } } } public static class MartrixReducer extends Reducer<Text,Text,Text,IntWritable>{ @Override protected void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException { String[] items = new String[3]; int[] valueA = new int[BROW_ACOL]; int[] valueB = new int[BROW_ACOL]; Iterator<Text> it = values.iterator(); while(it.hasNext()){ items = it.next().toString().split(","); if(items[0].equals("a")){ valueA[Integer.parseInt(items[1])-1] = Integer.parseInt(items[2]); }else if(items[0].equals("b")){ valueB[Integer.parseInt(items[1])-1] = Integer.parseInt(items[2]); } } int result = 0; for(int i=0;i<BROW_ACOL;i++){ result += valueA[i]*valueB[i]; } context.write(key, new IntWritable(result)); } } @SuppressWarnings("deprecation") public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Path pathA = new Path("hdfs://localhost:9000/Martrix/a.txt"); Path pathB = new Path("hdfs://localhost:9000/Martrix/b.txt"); Path pathOut = new Path("hdfs://localhost:9000/Martrix/out"); Configuration conf = new Configuration(); Job job = new Job(conf,"MartrixMultiply"); job.setJarByClass(MartrixMultiply.class); MultipleInputs.addInputPath(job, pathA, TextInputFormat.class, MartrixMaperA.class); MultipleInputs.addInputPath(job, pathB, TextInputFormat.class, MartrixMaperB.class); job.setReducerClass(MartrixReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, pathOut); if(job.waitForCompletion(true)){ System.exit(0); }else{ System.exit(1); } } }