首页 > 代码库 > hadoop学习-wordcount程序c++重写执行
hadoop学习-wordcount程序c++重写执行
1、程序执行命令:
hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -input /input/wordcount/sample.txt -output /output/wordcount -program /bin/wordcount
2、具体代码:
#include <algorithm> #include <stdint.h> #include <string> #include <vector> #include "Pipes.hh" #include "TemplateFactory.hh" #include "StringUtils.hh" #include <iostream> using namespace std; class WordcountMapper : public HadoopPipes::Mapper { public: WordcountMapper(HadoopPipes::TaskContext& context); vector<string> split(const string& src, const string& separator); void map(HadoopPipes::MapContext& context); }; class WordcountReducer : public HadoopPipes::Reducer { public: WordcountReducer(HadoopPipes::TaskContext& context); void reduce(HadoopPipes::ReduceContext& context); };
#include "wordcount.h" WordcountMapper::WordcountMapper(HadoopPipes::TaskContext& context) { } void WordcountMapper::map(HadoopPipes::MapContext& context) { int count = 1; string line = context.getInputValue(); vector<string> wordVec = split(line, " "); for(unsigned i=0; i<wordVec.size(); i++) { context.emit(wordVec[i], HadoopUtils::toString(count)); } } vector<string> WordcountMapper::split(const string& src, const string& separator) { vector<string> dest; string str = src; string substring; string::size_type start = 0, index = 0; while(index != string::npos) { index = str.find_first_of(separator,start); if (index != string::npos) { substring = str.substr(start,index-start); dest.push_back(substring); start = str.find_first_not_of(separator,index); if (start == string::npos) return dest; } } substring = str.substr(start); dest.push_back(substring); return dest; } WordcountReducer::WordcountReducer(HadoopPipes::TaskContext& context) { } void WordcountReducer::reduce(HadoopPipes::ReduceContext& context) { int wSum = 0; while (context.nextValue()) { wSum = wSum + HadoopUtils::toInt(context.getInputValue()) ; } context.emit(context.getInputKey(), HadoopUtils::toString(wSum)); }
#include "wordcount.h" int main(int argc, char *argv[]) { return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordcountMapper, WordcountReducer>()); }
makefile程序:
.SUFFIXES:.h .c .cpp .o CC=g++ CPPFLAGS = -m64 RM = rm SRCS = wordcount.cpp main.cpp PROGRAM = wordcount OBJS=$(SRCS:.cpp=.o) INC_PATH = -I$(HADOOP_DEV_HOME)/include LIB_PATH = -L$(HADOOP_DEV_HOME)/lib/native LIBS = -lhadooppipes -lcrypto -lhadooputils -lpthread #$?表示依赖项 $@表示目的项 $(PROGRAM):$(OBJS) $(CC) $? -Wall $(LIB_PATH) $(LIBS) -g -O2 -o $@ $(OBJS):$(SRCS) $(CC) $(CPPFLAGS) -c $(SRCS) $(INC_PATH) .PHONY:clean clean: $(RM) $(PROGRAM) $(OBJS)
源数据:
Happiness is not about being immortal nor having food or rights in one's hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone's love when you need love
Happiness is not about being immortal nor having food or rights in one's hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone's love when you need love
Happiness is not about being immortal nor having food or rights in one's hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone's love when you need love
Happiness 3
It??s 3
about 6
are 3
being 3
come 3
each 3
eat 3
food 3
hand. 3
having 12
hungry 3
immortal 3
in 3
is 3
love 6
need 3
nor 3
not 3
one's 3
or 9
rights 3
someone's 3
something 3
tiny 3
to 3
true, 3
when 6
wish 3
you 6
hadoop学习-wordcount程序c++重写执行