首页 > 代码库 > hadoop学习-wordcount程序c++重写执行

hadoop学习-wordcount程序c++重写执行

1、程序执行命令:

hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -input /input/wordcount/sample.txt -output /output/wordcount -program /bin/wordcount

2、具体代码:

#include <algorithm> 
#include <stdint.h>
#include <string>
#include <vector>
#include "Pipes.hh"  
#include "TemplateFactory.hh"  
#include "StringUtils.hh" 
#include <iostream>
using namespace std;

class WordcountMapper : public HadoopPipes::Mapper 
{  
public:  
	WordcountMapper(HadoopPipes::TaskContext& context);	
	vector<string> split(const string& src, const string& separator);
  void map(HadoopPipes::MapContext& context);
};  

class WordcountReducer : public HadoopPipes::Reducer 
{  
	public:  
	  WordcountReducer(HadoopPipes::TaskContext& context);
	  void reduce(HadoopPipes::ReduceContext& context);
};

#include "wordcount.h"

WordcountMapper::WordcountMapper(HadoopPipes::TaskContext& context)
{
}

void WordcountMapper::map(HadoopPipes::MapContext& context)
{ 
	int count = 1; 
	string line = context.getInputValue();  
	vector<string> wordVec = split(line, " ");
	for(unsigned i=0; i<wordVec.size(); i++)
	{
		context.emit(wordVec[i], HadoopUtils::toString(count));
	}
}  

vector<string> WordcountMapper::split(const string& src, const string& separator)
{
    vector<string> dest;
    string str = src;
    string substring;
    string::size_type start = 0, index = 0;
		while(index != string::npos)    
		{
        index = str.find_first_of(separator,start);
        if (index != string::npos)
        { 
            substring = str.substr(start,index-start);
            dest.push_back(substring);
            start = str.find_first_not_of(separator,index);
            if (start == string::npos) return dest;
        }
    }
    substring = str.substr(start);
    dest.push_back(substring);
    return dest;
}
 
WordcountReducer::WordcountReducer(HadoopPipes::TaskContext& context)
{
}
void WordcountReducer::reduce(HadoopPipes::ReduceContext& context)
{  
	int wSum = 0;
  while (context.nextValue())
	{  
    wSum = wSum + HadoopUtils::toInt(context.getInputValue()) ;  
  }  
  context.emit(context.getInputKey(), HadoopUtils::toString(wSum));  
}  


#include "wordcount.h"

int main(int argc, char *argv[]) 
{  
  return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordcountMapper, WordcountReducer>());  
}  

makefile程序:

.SUFFIXES:.h .c .cpp .o

CC=g++
CPPFLAGS = -m64 
RM = rm
SRCS = wordcount.cpp main.cpp
PROGRAM = wordcount
OBJS=$(SRCS:.cpp=.o)

INC_PATH = -I$(HADOOP_DEV_HOME)/include
LIB_PATH = -L$(HADOOP_DEV_HOME)/lib/native
LIBS = -lhadooppipes -lcrypto -lhadooputils -lpthread

#$?表示依赖项 $@表示目的项
$(PROGRAM):$(OBJS)
	$(CC) $? -Wall $(LIB_PATH) $(LIBS)  -g -O2 -o $@

$(OBJS):$(SRCS)
	$(CC) $(CPPFLAGS) -c $(SRCS)  $(INC_PATH)
	
.PHONY:clean
clean:
	$(RM) $(PROGRAM) $(OBJS)
	
	


源数据:

Happiness is not about being immortal nor having food or rights in one&apos;s hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone&apos;s love when you need love
Happiness is not about being immortal nor having food or rights in one&apos;s hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone&apos;s love when you need love
Happiness is not about being immortal nor having food or rights in one&apos;s hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone&apos;s love when you need love

执行结果:

Happiness 3
It??s 3
about 6
are 3
being 3
come 3
each 3
eat 3
food 3
hand. 3
having 12
hungry 3
immortal 3
in 3
is 3
love 6
need 3
nor 3
not 3
one&apos;s 3
or 9
rights 3
someone&apos;s 3
something 3
tiny 3
to 3
true, 3
when 6
wish 3
you 6



hadoop学习-wordcount程序c++重写执行