首页 > 代码库 > [转载]hdfs c/c++ API

[转载]hdfs c/c++ API

原文链接:http://blog.csdn.net/sprintfwater/article/details/8996214

1.建立、关闭与HDFS连接:hdfsConnect()、hdfsConnectAsUser()、hdfsDisconnect()。hdfsConnect()实际上是直接调用hdfsConnectAsUser。

2.打开、关闭HDFS文件:hdfsOpenFile()、hdfsCloseFile()。当用hdfsOpenFile()创建文件时,可以指定replication和blocksize参数。写打开一个文件时,隐含O_TRUNC标志,文件会被截断,写入是从文件头开始的。

3.读HDFS文件:hdfsRead()、hdfsPread()。两个函数都有可能返回少于用户要求的字节数,此时可以再次调用这两个函数读入剩下的部分(类似APUE中的readn实现);只有在两个函数返回零时,我们才能断定到了文件末尾。

4.写HDFS文件:hdfsWrite()。HDFS不支持随机写,只能是从文件头顺序写入。

5.查询HDFS文件信息:hdfsGetPathInfo()

6.查询和设置HDFS文件读写偏移量:hdfsSeek()、hdfsTell()

7.查询数据块所在节点信息:hdfsGetHosts()。返回一个或多个数据块所在数据节点的信息,一个数据块可能存在多个数据节点上。

8.libhdfs中的函数是通过jni调用JAVA虚拟机,在虚拟机中构造对应的HDFS的JAVA类,然后反射调用该类的功能函数。总会发生JVM和程序之间内存拷贝的动作,性能方面值得注意。

9.HDFS不支持多个客户端同时写入的操作,无文件或是记录锁的概念。

10.建议只有超大文件才应该考虑放在HDFS上,而且最好对文件的访问是写一次,读多次。小文件不应该考虑放在HDFS上,得不偿失!

  1 /**  2  * Licensed to the Apache Software Foundation (ASF) under one  3  * or more contributor license agreements.  See the NOTICE file  4  * distributed with this work for additional information  5  * regarding copyright ownership.  The ASF licenses this file  6  * to you under the Apache License, Version 2.0 (the  7  * "License"); you may not use this file except in compliance  8  * with the License.  You may obtain a copy of the License at  9  * 10  *     http://www.apache.org/licenses/LICENSE-2.0 11  * 12  * Unless required by applicable law or agreed to in writing, software 13  * distributed under the License is distributed on an "AS IS" BASIS, 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15  * See the License for the specific language governing permissions and 16  * limitations under the License. 17  */ 18  19 #ifndef LIBHDFS_HDFS_H 20 #define LIBHDFS_HDFS_H 21  22 #include <sys/types.h> 23 #include <sys/stat.h> 24  25 #include <fcntl.h> 26 #include <stdio.h> 27 #include <stdint.h> 28 #include <string.h> 29 #include <stdlib.h> 30 #include <time.h> 31 #include <errno.h> 32  33 #include <jni.h> 34  35 #ifndef O_RDONLY 36 #define O_RDONLY 1 37 #endif 38  39 #ifndef O_WRONLY  40 #define O_WRONLY 2 41 #endif 42  43 #ifndef EINTERNAL 44 #define EINTERNAL 255  45 #endif 46  47  48 /** All APIs set errno to meaningful values */ 49 #ifdef __cplusplus 50 extern  "C" { 51 #endif 52  53     /** 54      * Some utility decls used in libhdfs. 55      */ 56  57     typedef int32_t   tSize; /// size of data for read/write io ops  58     typedef time_t    tTime; /// time type 59     typedef int64_t   tOffset;/// offset within the file 60     typedef uint16_t  tPort; /// port 61     typedef enum tObjectKind { 62         kObjectKindFile = F, 63         kObjectKindDirectory = D, 64     } tObjectKind; 65  66  67     /** 68      * The C reflection of org.apache.org.hadoop.FileSystem . 69      */ 70     typedef void* hdfsFS; 71  72      73     /** 74      * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream . 75      */ 76     enum hdfsStreamType 77     { 78         UNINITIALIZED = 0, 79         INPUT = 1, 80         OUTPUT = 2, 81     }; 82  83      84     /** 85      * The ‘file-handle‘ to a file in hdfs. 86      */ 87     struct hdfsFile_internal { 88         void* file; 89         enum hdfsStreamType type; 90     }; 91     typedef struct hdfsFile_internal* hdfsFile; 92        93  94     /**  95      * hdfsConnect - Connect to a hdfs file system. 96      * Connect to the hdfs. 97      * @param host A string containing either a host name, or an ip address 98      * of the namenode of a hdfs cluster. ‘host‘ should be passed as NULL if 99      * you want to connect to local filesystem. ‘host‘ should be passed as100      * ‘default‘ (and port as 0) to used the ‘configured‘ filesystem101      * (hadoop-site/hadoop-default.xml).102      * @param port The port on which the server is listening.103      * @return Returns a handle to the filesystem or NULL on error.104      */105     hdfsFS hdfsConnect(const char* host, tPort port);106 107 108     /** 109      * hdfsDisconnect - Disconnect from the hdfs file system.110      * Disconnect from hdfs.111      * @param fs The configured filesystem handle.112      * @return Returns 0 on success, -1 on error.  113      */114     int hdfsDisconnect(hdfsFS fs);115         116 117     /** 118      * hdfsOpenFile - Open a hdfs file in given mode.119      * @param fs The configured filesystem handle.120      * @param path The full path to the file.121      * @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.122      * @param bufferSize Size of buffer for read/write - pass 0 if you want123      * to use the default configured values.124      * @param replication Block replication - pass 0 if you want to use125      * the default configured values.126      * @param blocksize Size of block - pass 0 if you want to use the127      * default configured values.128      * @return Returns the handle to the open file or NULL on error.129      */130     hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,131                           int bufferSize, short replication, tSize blocksize);132 133 134     /** 135      * hdfsCloseFile - Close an open file. 136      * @param fs The configured filesystem handle.137      * @param file The file handle.138      * @return Returns 0 on success, -1 on error.  139      */140     int hdfsCloseFile(hdfsFS fs, hdfsFile file);141 142 143     /** 144      * hdfsExists - Checks if a given path exsits on the filesystem 145      * @param fs The configured filesystem handle.146      * @param path The path to look for147      * @return Returns 0 on success, -1 on error.  148      */149     int hdfsExists(hdfsFS fs, const char *path);150 151 152     /** 153      * hdfsSeek - Seek to given offset in file. 154      * This works only for files opened in read-only mode. 155      * @param fs The configured filesystem handle.156      * @param file The file handle.157      * @param desiredPos Offset into the file to seek into.158      * @return Returns 0 on success, -1 on error.  159      */160     int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); 161 162 163     /** 164      * hdfsTell - Get the current offset in the file, in bytes.165      * @param fs The configured filesystem handle.166      * @param file The file handle.167      * @return Current offset, -1 on error.168      */169     tOffset hdfsTell(hdfsFS fs, hdfsFile file);170 171 172     /** 173      * hdfsRead - Read data from an open file.174      * @param fs The configured filesystem handle.175      * @param file The file handle.176      * @param buffer The buffer to copy read bytes into.177      * @param length The length of the buffer.178      * @return Returns the number of bytes actually read, possibly less179      * than than length;-1 on error.180      */181     tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);182 183 184     /** 185      * hdfsPread - Positional read of data from an open file.186      * @param fs The configured filesystem handle.187      * @param file The file handle.188      * @param position Position from which to read189      * @param buffer The buffer to copy read bytes into.190      * @param length The length of the buffer.191      * @return Returns the number of bytes actually read, possibly less than192      * than length;-1 on error.193      */194     tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,195                     void* buffer, tSize length);196 197 198     /** 199      * hdfsWrite - Write data into an open file.200      * @param fs The configured filesystem handle.201      * @param file The file handle.202      * @param buffer The data.203      * @param length The no. of bytes to write. 204      * @return Returns the number of bytes written, -1 on error.205      */206     tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,207                     tSize length);208 209 210     /** 211      * hdfsWrite - Flush the data. 212      * @param fs The configured filesystem handle.213      * @param file The file handle.214      * @return Returns 0 on success, -1 on error. 215      */216     int hdfsFlush(hdfsFS fs, hdfsFile file);217 218 219     /**220      * hdfsAvailable - Number of bytes that can be read from this221      * input stream without blocking.222      * @param fs The configured filesystem handle.223      * @param file The file handle.224      * @return Returns available bytes; -1 on error. 225      */226     int hdfsAvailable(hdfsFS fs, hdfsFile file);227 228 229     /**230      * hdfsCopy - Copy file from one filesystem to another.231      * @param srcFS The handle to source filesystem.232      * @param src The path of source file. 233      * @param dstFS The handle to destination filesystem.234      * @param dst The path of destination file. 235      * @return Returns 0 on success, -1 on error. 236      */237     int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);238 239 240     /**241      * hdfsMove - Move file from one filesystem to another.242      * @param srcFS The handle to source filesystem.243      * @param src The path of source file. 244      * @param dstFS The handle to destination filesystem.245      * @param dst The path of destination file. 246      * @return Returns 0 on success, -1 on error. 247      */248     int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);249 250 251     /**252      * hdfsDelete - Delete file. 253      * @param fs The configured filesystem handle.254      * @param path The path of the file. 255      * @return Returns 0 on success, -1 on error. 256      */257     int hdfsDelete(hdfsFS fs, const char* path);258 259 260     /**261      * hdfsRename - Rename file. 262      * @param fs The configured filesystem handle.263      * @param oldPath The path of the source file. 264      * @param newPath The path of the destination file. 265      * @return Returns 0 on success, -1 on error. 266      */267     int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);268 269 270     /** 271      * hdfsGetWorkingDirectory - Get the current working directory for272      * the given filesystem.273      * @param fs The configured filesystem handle.274      * @param buffer The user-buffer to copy path of cwd into. 275      * @param bufferSize The length of user-buffer.276      * @return Returns buffer, NULL on error.277      */278     char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);279 280 281     /** 282      * hdfsSetWorkingDirectory - Set the working directory. All relative283      * paths will be resolved relative to it.284      * @param fs The configured filesystem handle.285      * @param path The path of the new ‘cwd‘. 286      * @return Returns 0 on success, -1 on error. 287      */288     int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);289 290 291     /** 292      * hdfsCreateDirectory - Make the given file and all non-existent293      * parents into directories.294      * @param fs The configured filesystem handle.295      * @param path The path of the directory. 296      * @return Returns 0 on success, -1 on error. 297      */298     int hdfsCreateDirectory(hdfsFS fs, const char* path);299 300 301     /** 302      * hdfsSetReplication - Set the replication of the specified303      * file to the supplied value304      * @param fs The configured filesystem handle.305      * @param path The path of the file. 306      * @return Returns 0 on success, -1 on error. 307      */308     int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);309 310 311     /** 312      * hdfsFileInfo - Information about a file/directory.313      */314     typedef struct  {315         tObjectKind mKind;   /* file or directory */316         char *mName;         /* the name of the file */317         tTime mLastMod;      /* the last modification time for the file*/318         tOffset mSize;       /* the size of the file in bytes */319         short mReplication;    /* the count of replicas */320         tOffset mBlockSize;  /* the block size for the file */321     } hdfsFileInfo;322 323 324     /** 325      * hdfsListDirectory - Get list of files/directories for a given326      * directory-path. hdfsFreeFileInfo should be called to deallocate memory. 327      * @param fs The configured filesystem handle.328      * @param path The path of the directory. 329      * @param numEntries Set to the number of files/directories in path.330      * @return Returns a dynamically-allocated array of hdfsFileInfo331      * objects; NULL on error.332      */333     hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,334                                     int *numEntries);335 336 337     /** 338      * hdfsGetPathInfo - Get information about a path as a (dynamically339      * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be340      * called when the pointer is no longer needed.341      * @param fs The configured filesystem handle.342      * @param path The path of the file. 343      * @return Returns a dynamically-allocated hdfsFileInfo object;344      * NULL on error.345      */346     hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);347 348 349     /** 350      * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) 351      * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo352      * objects.353      * @param numEntries The size of the array.354      */355     void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);356 357 358     /** 359      * hdfsGetHosts - Get hostnames where a particular block (determined by360      * pos & blocksize) of a file is stored. The last element in the array361      * is NULL. Due to replication, a single block could be present on362      * multiple hosts.363      * @param fs The configured filesystem handle.364      * @param path The path of the file. 365      * @param start The start of the block.366      * @param length The length of the block.367      * @return Returns a dynamically-allocated 2-d array of blocks-hosts;368      * NULL on error.369      */370     char*** hdfsGetHosts(hdfsFS fs, const char* path, 371             tOffset start, tOffset length);372 373 374     /** 375      * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts376      * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo377      * objects.378      * @param numEntries The size of the array.379      */380     void hdfsFreeHosts(char ***blockHosts);381 382 383     /** 384      * hdfsGetDefaultBlockSize - Get the optimum blocksize.385      * @param fs The configured filesystem handle.386      * @return Returns the blocksize; -1 on error. 387      */388     tOffset hdfsGetDefaultBlockSize(hdfsFS fs);389 390 391     /** 392      * hdfsGetCapacity - Return the raw capacity of the filesystem.  393      * @param fs The configured filesystem handle.394      * @return Returns the raw-capacity; -1 on error. 395      */396     tOffset hdfsGetCapacity(hdfsFS fs);397 398 399     /** 400      * hdfsGetUsed - Return the total raw size of all files in the filesystem.401      * @param fs The configured filesystem handle.402      * @return Returns the total-size; -1 on error. 403      */404     tOffset hdfsGetUsed(hdfsFS fs);405     406 #ifdef __cplusplus407 }408 #endif409 410 #endif /*LIBHDFS_HDFS_H*/

 

[转载]hdfs c/c++ API