Title: Map%20Reduce%20Programming
1Map Reduce Programming
- ??? ???
- Jazz_at_nchc.org.tw
- waue_at_nchc.org.tw
- ???????????(NCHC)
2Outline
- ??
- ?????????????
- ???
- Hadoop ? Hello World gt Word Count
- ??
- ???
- ???
- ???gt Word Count 2
- ??
- ???
3Program Prototype (v 0.18)
???? ??
- Class MR
- Class Mapper
-
- Class Reducer
-
- main()
- JobConf conf new JobConf(MR.class)
- conf.setMapperClass(Mapper.class)
- conf.setReduceClass(Reducer.class)
- FileInputFormat.setInputPaths(conf, new
Path(args0)) - FileOutputFormat.setOutputPath(conf, new
Path(args1)) - JobClient.runJob(conf)
Map ?
Map ???
Reduce ?
Reduce ???
???
??????????
4Class Mapper
???? ??
class MyMap extends MapReduceBase implements
Mapper lt , ,
, gt // ????? public void
map ( key, value,
OutputCollectorlt ,
gt output, Reporter
reporter) throws IOException //
?????????? output.collect( NewKey,
NewValue)
1 2 3 4 5 6 7 8 9
INPUT KEY
OUTPUT VALUE
OUTPUT KEY
INPUT VALUE
INPUT VALUE
INPUT KEY
OUTPUT VALUE
OUTPUT KEY
5Class Reducer
???? ??
class MyRed extends MapReduceBase implements
Reducer lt , ,
, gt // ????? public
void reduce ( key, Iteratorlt
gt values, OutputCollectorlt
, gt output,
Reporter reporter) throws IOException //
?????????? output.collect( NewKey,
NewValue)
1 2 3 4 5 6 7 8 9
INPUT KEY
OUTPUT VALUE
OUTPUT KEY
INPUT VALUE
INPUT VALUE
INPUT KEY
OUTPUT VALUE
OUTPUT KEY
6Class Combiner
???? ??
- ????combiner,???????????????,????????Mapper ?
Reducer?????? - ???????Hadoop??
- ????????,??Reducer
- ??
- JobConf.setCombinerClass(Class)
7Run Job
???? ??
- runJob(JobConf )
- ????,??????????
- submitJob(JobConf )
- ?????,??????????? RunningJob ?????,????????
- JobConf.setJobEndNotificationURI(String )
- ??????????,??????
8Word Count Sample (1)
?? ???
class MapClass extends MapReduceBase implements
MapperltLongWritable, Text, Text, IntWritablegt
private final static IntWritable one new
IntWritable(1) private Text word new
Text() public void map( LongWritable key, Text
value, OutputCollectorltText, IntWritablegt
output, Reporter reporter) throws
IOException String line ((Text)
value).toString() StringTokenizer itr new
StringTokenizer(line) while (itr.hasMoreTokens(
)) word.set(itr.nextToken()) output.colle
ct(word, one)
1 2 3 4 5 6 7 8 9
ltword,onegt
lt no , 1 gt
lt news , 1 gt
lt is , 1 gt
lt a, 1 gt
lt good , 1 gt
lt news, 1 gt
9Word Count Sample (2)
?? ???
class ReduceClass extends MapReduceBase
implements Reducerlt Text, IntWritable, Text,
IntWritablegt IntWritable SumValue new
IntWritable() public void reduce( Text key,
IteratorltIntWritablegt values, OutputCollectorltTex
t, IntWritablegt output, Reporter
reporter) throws IOException int sum
0 while (values.hasNext()) sum
values.next().get() SumValue.set(sum) output
.collect(key, SumValue)
1 2 3 4 5 6 7 8
ltword,onegt
news
lt no , 1 gt
ltkey,SunValuegt
lt news , 1 gt
1
1
lt news , 2 gt
lt is , 1 gt
lt a, 1 gt
lt good , 1 gt
lt news, 1 gt
10Word Count Sample (3)
?? ???
Class WordCount main() JobConf conf new
JobConf(WordCount.class) conf.setJobName("wordco
unt") // set path FileInputFormat.setInputPat
hs(new Path(args0)) FileOutputFormat.setOutput
Path(new Path(args1)) // set map
reduce conf.setMapperClass(MapClass.class) conf
.setCombinerClass(Reduce.class) conf.setReducerC
lass(ReduceClass.class) // run JobClient.runJob
(conf)
11?????
???? ????
- ??
- javac ? -classpath ? hadoop--core.jar ? -d ?
MyJava ? MyCode.java - ??
- jar ? -cvf ? MyJar.jar ? -C ? MyJava ? .
- ??
- bin/hadoop ? jar ? MyJar.jar ? MyCode ?
HDFS_Input/ ? HDFS_Output/
- ???????HDFS??input??
- ./input ./ouput hdfs????????
- ????????Hadoop_Home
- ./MyJava ????????
- Myjar.jar ???????
12WordCount1 ?? (I)
??? ???
- cd HADOOP_HOME
- bin/hadoop dfs -mkdir input
- echo "I like NCHC Cloud Course." gt inputwc/input1
- echo "I like nchc Cloud Course, and we enjoy this
crouse." gt inputwc/input2 - bin/hadoop dfs -put inputwc inputwc
- bin/hadoop dfs -ls input
13WordCount1 ?? (II)
??? ???
- ??WordCount.java http//trac.nchc.org.tw/cloud/att
achment/wiki/jazz/Hadoop_Lab6/WordCount.java?forma
traw - mkdir MyJava
- javac -classpath hadoop--core.jar -d MyJava
WordCount.java - jar -cvf wordcount.jar -C MyJava .
- bin/hadoop jar wordcount.jar WordCount input/
output/
- ????????Hadoop_Home(??hadoop--core.jar )
- javac?????classpath, ?hadoop jar???
- wordcount.jar ???????,???????class name
- Hadoop?????,?? input ????hdfs?,??hadoop???????(wo
rdcount.jar)????,?????node??,???????java??
14WordCount1 ??(III)
??? ???
15WordCount1 ??(IV)
??? ???
16WordCount ???
??? ???
- WordCount2
- http//trac.nchc.org.tw/cloud/attachment/wiki/jazz
/Hadoop_Lab6/WordCount2.java?formatraw - ??
- ??????
- ?????
- ?? (?? WordCount ???)
- echo "\." gtpattern.txt echo "\," gtgtpattern.txt
- bin/hadoop dfs -put pattern.txt ./
- mkdir MyJava2
- javac -classpath hadoop--core.jar -d MyJava2
WordCount2.java - jar -cvf wordcount2.jar -C MyJava2 .
17??????
??? ???
- ??
- bin/hadoop jar wordcount2.jar WordCount2 input
output2 -skip pattern.txt dfs -cat
output2/part-00000
18?????
??? ???
- ??
- bin/hadoop jar wordcount2.jar WordCount2
-Dwordcount.case.sensitivefalse input output3
-skip pattern.txt
19Tool
??? ????
- ??Hadoop???????
- -conf ltconfiguration filegt
- -D ltpropertyvaluegt
- -fs ltlocalnamenodeportgt
- -jt ltlocaljobtrackerportgt
- ??????????
- ToolRunner.run(Tool, String)
20DistributedCache
??? ????
- ???????????????????????????????????
- ?pattern.txt?
- DistributedCache.addCacheFile(URI,conf)
- URIhdfs//hostport/FilePath
21Options without Java
?JAVA???
- ??Hadoop????Java??,?Map/Reduce?????????? Java??
- Hadoop Streaming
- ???????,?????????? (?PHP)???Hadoop?mapper?reducer
- Hadoop PipesC API