首页 > 代码库 > 【甘道夫】Win7环境下Eclipse连接Hadoop2.2.0

【甘道夫】Win7环境下Eclipse连接Hadoop2.2.0

准备:
确保hadoop2.2.0集群正常运行

1.eclipse中建立java工程,导入hadoop2.2.0相关jar包

2.在src根目录下拷入log4j.properties,通过log4j查看详细日志
log4j.rootLogger=debug, stdout, R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=firestorm.log
log4j.appender.R.MaxFileSize=100KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n
log4j.logger.com.codefutures=DEBUG

3.拷入一个可执行的hadoop程序,我用的是一个HdfsDAO,可以先保证HDFS操作能执行
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;
public class HdfsDAO {
    private static final String HDFS = "hdfs://192.168.0.160:9000/";
    public HdfsDAO(Configuration conf) {
        this(HDFS, conf);
    }
    public HdfsDAO(String hdfs, Configuration conf) {
        this.hdfsPath = hdfs;
        this.conf = conf;
    }
    private String hdfsPath;
    private Configuration conf;
    public static void main(String[] args) throws IOException {
        JobConf conf = config();
        HdfsDAO hdfs = new HdfsDAO(conf);
//        hdfs.copyFile("datafile/item.csv", "/tmp/new");
//        hdfs.ls("/tmp/new");
        hdfs.ls("/");
    }        
    
    public static JobConf config(){
        JobConf conf = new JobConf(HdfsDAO.class);
        conf.setJobName("HdfsDAO");
        conf.addResource("classpath:/hadoop/core-site.xml");
        conf.addResource("classpath:/hadoop/hdfs-site.xml");
        conf.addResource("classpath:/hadoop/mapred-site.xml");
        return conf;
    }
    
    public void mkdirs(String folder) throws IOException {
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        if (!fs.exists(path)) {
            fs.mkdirs(path);
            System.out.println("Create: " + folder);
        }
        fs.close();
    }
    public void rmr(String folder) throws IOException {
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        fs.deleteOnExit(path);
        System.out.println("Delete: " + folder);
        fs.close();
    }
    public void ls(String folder) throws IOException {
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        FileStatus[] list = fs.listStatus(path);
        System.out.println("ls: " + folder);
        System.out.println("==========================================================");
        for (FileStatus f : list) {
            System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDir(), f.getLen());
        }
        System.out.println("==========================================================");
        fs.close();
    }
    public void createFile(String file, String content) throws IOException {
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        byte[] buff = content.getBytes();
        FSDataOutputStream os = null;
        try {
            os = fs.create(new Path(file));
            os.write(buff, 0, buff.length);
            System.out.println("Create: " + file);
        } finally {
            if (os != null)
                os.close();
        }
        fs.close();
    }
    public void copyFile(String local, String remote) throws IOException {
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        fs.copyFromLocalFile(new Path(local), new Path(remote));
        System.out.println("copy from: " + local + " to " + remote);
        fs.close();
    }
    public void download(String remote, String local) throws IOException {
        Path path = new Path(remote);
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        fs.copyToLocalFile(path, new Path(local));
        System.out.println("download: from" + remote + " to " + local);
        fs.close();
    }
    
    public void cat(String remoteFile) throws IOException {
        Path path = new Path(remoteFile);
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        FSDataInputStream fsdis = null;
        System.out.println("cat: " + remoteFile);
        try {  
            fsdis =fs.open(path);
            IOUtils.copyBytes(fsdis, System.out, 4096, false);  
          } finally {  
            IOUtils.closeStream(fsdis);
            fs.close();
          }
    }
    public void location() throws IOException {
        // String folder = hdfsPath + "create/";
        // String file = "t2.txt";
        // FileSystem fs = FileSystem.get(URI.create(hdfsPath), new
        // Configuration());
        // FileStatus f = fs.getFileStatus(new Path(folder + file));
        // BlockLocation[] list = fs.getFileBlockLocations(f, 0, f.getLen());
        //
        // System.out.println("File Location: " + folder + file);
        // for (BlockLocation bl : list) {
        // String[] hosts = bl.getHosts();
        // for (String host : hosts) {
        // System.out.println("host:" + host);
        // }
        // }
        // fs.close();
    }
}

4.运行HdfsDAO
报错:
java.io.IOExceptionHADOOP_HOME or hadoop.home.dir are not set.
    at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:225)
    at org.apache.hadoop.util.Shell.<clinit>(Shell.java:250)
    at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
    at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546)
    at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519)
    at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453)
    at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88)
    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367)
    at HdfsDAO.copyFile(HdfsDAO.java:94)
    at HdfsDAO.main(HdfsDAO.java:34)
ERROR - Failed to locate the winutils binary in the hadoop binary path
java.io.IOExceptionCould not locate executable null\bin\winutils.exe in the Hadoop binaries.
    at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278)
    at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300)
    at org.apache.hadoop.util.Shell.<clinit>(Shell.java:293)
    at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
    at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546)
    at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519)
    at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453)
    at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88)
    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367)
    at HdfsDAO.copyFile(HdfsDAO.java:94)
    at HdfsDAO.main(HdfsDAO.java:34)

解决:
首先,在win7中设置环境变量HADOOP_HOME,指向win7中的hadoop2.2.0根目录。
然后,到 https://github.com/srccodes/hadoop-common-2.2.0-bin 去下载hadoop2.2.0的bin,里面有winutils.exe
将其拷贝到 $HADOOP_HOME/bin 下。

5.重新运行,报错
Exception in thread "main" java.net.ConnectException: Call From WIN-CMM62V9I3VG/192.168.0.40 to singlehadoop:9000 failed on connection exception: java.net.ConnectException: Connection refused: no further information; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
    at java.lang.reflect.Constructor.newInstance(Unknown Source)
    at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:783)
    at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:730)
    at org.apache.hadoop.ipc.Client.call(Client.java:1351)
    at org.apache.hadoop.ipc.Client.call(Client.java:1300)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
    at com.sun.proxy.$Proxy9.getListing(Unknown Source)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
    at java.lang.reflect.Method.invoke(Unknown Source)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
    at com.sun.proxy.$Proxy9.getListing(Unknown Source)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getListing(ClientNamenodeProtocolTranslatorPB.java:482)
    at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:1660)
    at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:1643)
    at org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:640)
    at org.apache.hadoop.hdfs.DistributedFileSystem.access$600(DistributedFileSystem.java:92)
    at org.apache.hadoop.hdfs.DistributedFileSystem$14.doCall(DistributedFileSystem.java:702)
    at org.apache.hadoop.hdfs.DistributedFileSystem$14.doCall(DistributedFileSystem.java:698)
    at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
    at org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:698)
    at HdfsDAO.ls(HdfsDAO.java:69)
    at HdfsDAO.main(HdfsDAO.java:36)
Caused by: java.net.ConnectException: Connection refused: no further information
    at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
    at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)
    at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
    at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:529)
    at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:493)
    at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:547)
    at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:642)
    at org.apache.hadoop.ipc.Client$Connection.access$2600(Client.java:314)
    at org.apache.hadoop.ipc.Client.getConnection(Client.java:1399)
    at org.apache.hadoop.ipc.Client.call(Client.java:1318)
    ... 21 more
DEBUG - Stopping client

解决:
发现core-site.xml中
<property>
<name>fs.default.name</name>
<value>hdfs://singlehadoop:8020</value>
</property>
端口是8020,不是9000,所以修改程序中以下语句中的端口为8020
private static final String HDFS = "hdfs://192.168.0.160:8020/";

6.重新启动,顺利执行
DEBUG - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(valueName=Time, about=, value=http://www.mamicode.com/[Rate of successful kerberos logins and latency (milliseconds)], always=false, type=DEFAULT, sampleName=Ops)
DEBUG - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(valueName=Time, about=, value=http://www.mamicode.com/[Rate of failed kerberos logins and latency (milliseconds)], always=false, type=DEFAULT, sampleName=Ops)
DEBUG - UgiMetrics, User and group related metrics
DEBUG - Kerberos krb5 configuration not found, setting default realm to empty
DEBUG -  Creating new Groups object
DEBUG - Trying to load the custom-built native-hadoop library...
DEBUG - Failed to load native-hadoop with error: java.lang.UnsatisfiedLinkError: no hadoop in java.library.path
DEBUG - java.library.path=D:\Program Files\Java\jre7\bin;C:\Windows\Sun\Java\bin;C:\Windows\system32;C:\Windows;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files (x86)\Intel\iCLS Client\;C:\Program Files\Intel\iCLS Client\;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Program Files\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\OpenCL SDK\3.0\bin\x86;C:\Program Files (x86)\Intel\OpenCL SDK\3.0\bin\x64;D:\Program Files\Java\jdk1.7.0_40\bin;D:\Program Files\Java\jdk1.7.0_40\jre\bin;D:\Program Files\TortoiseSVN\bin;D:\Program Files (x86)\ant\bin;D:\Program Files\maven3\bin;.
 WARN - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
DEBUG - Falling back to shell based
DEBUG - Group mapping impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping
DEBUG - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000
DEBUG - hadoop login
DEBUG - hadoop login commit
DEBUG - using local user:NTUserPrincipal: Administrator
DEBUG - UGI loginUser:Administrator (auth:SIMPLE)
DEBUG - dfs.client.use.legacy.blockreader.local = false
DEBUG - dfs.client.read.shortcircuit = false
DEBUG - dfs.client.domain.socket.data.traffic = false
DEBUG - dfs.domain.socket.path = 
DEBUG - StartupProgress, NameNode startup progress
DEBUG - multipleLinearRandomRetry = null
DEBUG - rpcKind=RPC_PROTOCOL_BUFFER, rpcRequestWrapperClass=class org.apache.hadoop.ipc.ProtobufRpcEngine$RpcRequestWrapper, rpcInvoker=org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker@1afde4a3
DEBUG - Both short-circuit local reads and UNIX domain socket are disabled.
DEBUG - The ping interval is 60000 ms.
DEBUG - Connecting to /192.168.0.160:8020
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: starting, having connections 1
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator sending #0
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator got value #0
DEBUG - Call: getListing took 136ms
ls: /
==========================================================
name: hdfs://192.168.0.160:8020/data, folder: true, size: 0
name: hdfs://192.168.0.160:8020/fulong, folder: true, size: 0
name: hdfs://192.168.0.160:8020/test, folder: true, size: 0
name: hdfs://192.168.0.160:8020/tmp, folder: true, size: 0
name: hdfs://192.168.0.160:8020/user, folder: true, size: 0
name: hdfs://192.168.0.160:8020/workspace, folder: true, size: 0
==========================================================
DEBUG - Stopping client
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: closed
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: stopped, remaining connections 0

下面尝试跑一个hadoop2.2.0自带的wordcount程序

1.首先还是将wordcount的源码拷入工程的src
运行报错:
ERROR - PriviledgedActionException as:Administrator (auth:SIMPLE) cause:java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
Exception in thread "main" java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
    at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:120)
    at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:82)
    at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:75)
    at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1238)
    at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1234)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
    at org.apache.hadoop.mapreduce.Job.connect(Job.java:1233)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:1262)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
    at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)
DEBUG - Stopping client

解决:
拷入两个jar包
hadoop-mapreduce-client-common-2.2.0.jar
hadoop-mapreduce-client-jobclient-2.2.0.jar

2.再次运行,以上错误解决,出现新的报错
Exception in thread "main" DEBUG - Stopping client
DEBUG - IPC Client (152472387) connection to singlehadoop/192.168.0.160:8020 from Administrator: closed
DEBUG - IPC Client (152472387) connection to singlehadoop/192.168.0.160:8020 from Administrator: stopped, remaining connections 0
java.lang.NoClassDefFoundError: org/apache/hadoop/yarn/util/Apps
    at java.lang.ClassLoader.defineClass1(Native Method)
    at java.lang.ClassLoader.defineClass(Unknown Source)
    at java.security.SecureClassLoader.defineClass(Unknown Source)
    at java.net.URLClassLoader.defineClass(Unknown Source)
    at java.net.URLClassLoader.access$100(Unknown Source)
    at java.net.URLClassLoader$1.run(Unknown Source)
    at java.net.URLClassLoader$1.run(Unknown Source)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(Unknown Source)
    at java.lang.ClassLoader.loadClass(Unknown Source)
    at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
    at java.lang.ClassLoader.loadClass(Unknown Source)
    at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:93)
    at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:157)
    at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:636)
    at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:430)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
    at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.yarn.util.Apps
    at java.net.URLClassLoader$1.run(Unknown Source)
    at java.net.URLClassLoader$1.run(Unknown Source)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(Unknown Source)
    at java.lang.ClassLoader.loadClass(Unknown Source)
    at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
    at java.lang.ClassLoader.loadClass(Unknown Source)
    ... 24 more

解决:
把share\hadoop\yarn下的jar包全引入工程

3.再次运行,以上错误解决,出现新的报错
Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
    at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
    at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:435)
    at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:977)
    at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:177)
    at org.apache.hadoop.util.DiskChecker.checkDirAccess(DiskChecker.java:164)
    at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:98)
    at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:285)
    at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:344)
    at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:150)
    at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:131)
    at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:115)
    at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:131)
    at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:157)
    at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:636)
    at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:430)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
    at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)

解决:
找到我们刚才下载过的hadoop-common-2.2.0-bin
这次直接来个彻底的,把整个下载的bin目录覆盖了$HADOOP_HOME\bin
并且在环境变量的PATH中加上%HADOOP_HOME%\bin

3.再次运行,以上错误解决,出现新的报错
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=Administrator, access=WRITE, inode="/workspace/wordcount":casliyang:supergroup:drwxr-xr-x

解决:
在hdfs-site.xml中加入
<property>     <name>dfs.permissions</name>    <value>false</value>
</property>

4.再次运行,程序顺利运行



总结:
1.将hadoop-2.2.0.tar.gz解压一份放到win7的程序目录下,注意hadoop版本一定要和集群的版本一致,然后拷贝集群中的以下几个配置文件覆盖到win7本地的对应目录:
core-site.xml
hdfs-site.xml
mapred-site.xml
yarn-site.xml

2.在eclipse中新建java工程后,最好直接引入所有hadoop2.2.0相关的jar包,包括以下几个目录下的jar包:
share\hadoop\common
share\hadoop\hdfs
share\hadoop\mapreduce
share\hadoop\yarn

注:如果使用hadoop的eclipse插件,就无需该步骤,但2.2.0的插件需自行编译,编译过程参见我的另一篇博客:
http://blog.csdn.net/fulongfbi/article/details/23850575

3.需要在win7中设置环境变量%HADOOP_HOME%,并把%HADOOP_HOME%\bin加入PATH环境变量中

4.需要下载https://github.com/srccodes/hadoop-common-2.2.0-bin,解压后把下载的bin目录覆盖%HADOOP_HOME%\bin

5.注意参考hadoop集群的配置,Eclipse中的程序配置“hadoop地址:端口”的代码需和hadoop集群的配置一致
<property>
    <name>fs.default.name</name>
    <value>hdfs://singlehadoop:8020</value>
</property>

6.在hadoop集群的hdfs-site.xml中加入如下属性,关闭权限校验。
<property>     
    <name>dfs.permissions</name>    
    <value>false</value>
</property>