使用MapReduce将HDFS数据导入到HBase（二）

首页 > 代码库 > 使用MapReduce将HDFS数据导入到HBase（二）

使用MapReduce将HDFS数据导入到HBase（二）

2024-11-07 21:17:39 206人阅读

package com.bank.service;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
* 使用MapReduce批量导入Hbase
*    通过TableOutputFormat，该类内部传给指定的Put实例并调用table.put()方法。作业结束前会主动调用flushCommits()方法保存仍在写缓冲区的数据
*
* @author mengyao
*
*/
public class CnyBatch extends Configured implements Tool {

   static class CnyBatchMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
       protected void map(LongWritable key, Text value, Context context)
               throws java.io.IOException, InterruptedException {
           context.write(key, value);
       }
   }

   static class CnyBatchReduce extends TableReducer<LongWritable, Text, NullWritable> {
       private final static String familyName = "info";
       private final static String[] qualifiers = {"gzh", "currency", "version", "valuta", "qfTime", "flag", "machineID"};
       @Override
       protected void reduce(LongWritable key,
               java.lang.Iterable<Text> value, Context context)
               throws java.io.IOException, InterruptedException {
           final String[] values = value.toString().split("\t");
           if (values.length == 7 && values.length == qualifiers.length) {
               final String row = values[0]+"_"+values[1]+"_"+values[2]+"_"+values[3];
               long timestamp = System.currentTimeMillis();
               Put put = new Put(Bytes.toBytes(row));
               for (int i = 0; i < values.length; i++) {
                   String qualifier = qualifiers[i];
                   String val = values[i];
                   put.add(Bytes.toBytes(familyName), Bytes.toBytes(qualifier), timestamp, Bytes.toBytes(val));
               }
               context.write(NullWritable.get(), put);
           } else {
               System.err.println(" ERROR: value length must equale qualifier length ");
           }
       };
   }

   @Override
   public int run(String[] arg0) throws Exception {
       Job job = Job.getInstance(getConf(), CnyBatch.class.getSimpleName());
       TableMapReduceUtil.addDependencyJars(job);
       job.setJarByClass(CnyBatch.class);

       FileInputFormat.setInputPaths(job, arg0[0]);
       job.setMapperClass(CnyBatchMapper.class);
       job.setMapOutputKeyClass(LongWritable.class);
       job.setMapOutputValueClass(Text.class);

       job.setReducerClass(CnyBatchReduce.class);
       job.setOutputFormatClass(TableOutputFormat.class);


       return job.waitForCompletion(true) ? 0 : 1;
   }

   public static void main(String[] args) throws Exception {
       Configuration conf = new Configuration();
       conf.set("hbase.zookeeper.quorum", "h5:2181,h6:2181,h7:2181");
       conf.set("hbase.zookeeper.property.clientPort", "2181");
       conf.set("dfs.socket.timeout", "100000");
       String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs();
       if (otherArgs.length != 2) {
           System.err.println(" ERROR: <dataInputDir> <tableName>");
           System.exit(2);
       }
       conf.set(TableOutputFormat.OUTPUT_TABLE, args[1]);
       int status = ToolRunner.run(conf, new CnyBatch(), args);
       System.exit(status);
   }
}

使用MapReduce将HDFS数据导入到HBase（二）

声明：以上内容来自用户投稿及互联网公开渠道收集整理发布，本网站不拥有所有权，未作人工编辑处理，也不承担相关法律责任，若内容有误或涉及侵权可进行投诉：投诉/举报工作人员会在5个工作日内联系你，一经查实，本站将立刻删除涉嫌侵权内容。

联系
我们

首页 > 代码库 > 使用MapReduce将HDFS数据导入到HBase（二）

使用MapReduce将HDFS数据导入到HBase（二）

看完仍有疑问？有类似问题直接问程序猿