Hadoop大数据之第一个程序--WordCount

首页 > 代码库 > Hadoop大数据之第一个程序--WordCount

Hadoop大数据之第一个程序--WordCount

2024-11-16 17:56:39 203人阅读

Map类：

 1 package lyc.yushao.hadoop.mr.wc; 2  3 import java.io.IOException; 4  5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Mapper; 8  9 public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> {10 11     @Override12     protected void map(LongWritable key, Text value, Context context)13             throws IOException, InterruptedException {14         // 首先，接收数据15         // accept data16         String line = value.toString();17         // 进行拆分18         // split19         String[] words = line.split(" ");20         // 进行循环21         // loop22         for (String w : words) {23             // 发送24             // send25             context.write(new Text(w), new LongWritable(1));26         }27 28     }29 30 }

Reduce类：

 1 package lyc.yushao.hadoop.mr.wc; 2  3 import java.io.IOException; 4  5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Reducer; 8  9 public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable> {10 11     @Override12     protected void reduce(Text key, Iterable<LongWritable> values,13             Context context) throws IOException, InterruptedException {14         // 定义一个计数器15         // define a counter16         long counter = 0;17 18         // 接收数据 循环19         // accept data and loop20         for (LongWritable i : values) {21             // sum22             counter += i.get();23 24         }25         // send26         context.write(key, new LongWritable(counter));27 28     }29 30 }

WordCount类：

 1 package lyc.yushao.hadoop.mr.wc; 2  3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;10 11 public class WordCount {12 13     public static void main(String[] args) throws Exception {14         Configuration conf = new Configuration();15         // 将mapreduce抽象成一个作业16         Job job = Job.getInstance(conf);17 18         // notice!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!19         job.setJarByClass(WordCount.class);20 21         // 将自定义的类组装起来22 23         // set mapper‘s properties24         job.setMapperClass(WCMapper.class);25 26         job.setMapOutputKeyClass(Text.class);27 28         job.setMapOutputValueClass(LongWritable.class);29 30         // 读取HDFS数据31         FileInputFormat.setInputPaths(job, new Path("/words.txt"));32 33         // set reducer‘s properties34         job.setReducerClass(WCReducer.class);35         // 输出到HDFS里面36         job.setOutputKeyClass(Text.class);37         job.setOutputValueClass(LongWritable.class);38         FileOutputFormat.setOutputPath(job, new Path("/wcout111"));39 40         // 调用job的一些方法来提交41         // submit,but this is not good42         // job.submit();43         job.waitForCompletion(true);44     }45 46 }