首页 > 代码库 > HDFS(GFS) Python实现
HDFS(GFS) Python实现
HDFS(GFS) Python实现
实现了HDFS的最基本的功能。具体的每一步的实现过程可以在这里看到https://github.com/DanielJyc/HDFS
。每一个commit都可以执行,算是记录了自己的实现过程。总体的设计框图如下:
总体设计
操作:
- 上传文件到HDFS:upload?filename
- 从HDFS下载文件:download?filename
- 删除HDFS的文件:delete?filename
- 列出HDFS文件:ls
- 退出HDFS:exits
实现代码
# -*- coding: UTF-8 -*-import osimport uuidimport mathimport timeclass Client(object): """docstring for Client""" def __init__(self, namenode): self.namenode = namenode def write(self, filename, data): chunks = [] #存放data分出来的num_chunks份数据 chunkloc = 1 num_chunks = self.get_num_chunks(data) for i in range(0, len(data), self.namenode.chunksize): chunks.append(data[i:i+self.namenode.chunksize]) chunk_uuids = self.namenode.alloc(filename, num_chunks) #为文件分配空间,更新元数据 for i in range(0, len(chunk_uuids)): chunkloc = i % self.namenode.num_datanodes + 1 self.namenode.datanodes[chunkloc].write(chunk_uuids[i], chunks[i]) #备份第二份 chunkloc = chunkloc % self.namenode.num_datanodes + 1 self.namenode.datanodes[chunkloc].write(chunk_uuids[i], chunks[i]) def read(self, filename): if True == self.namenode.exits(filename) : data = http://www.mamicode.com/‘‘"The file: \"" + filename + "\" is not exits." def delete(self, filename): #删除文件:物理删除和元数据删除 if True == self.namenode.exits(filename) : chunk_uuids = self.namenode.filetable[filename] for chunk_uuid in chunk_uuids : chunkloc = self.namenode.chunktable[chunk_uuid] self.namenode.datanodes[chunkloc].delete(chunk_uuid) #物理删除:第一份 self.namenode.datanodes[chunkloc%self.namenode.num_datanodes + 1].delete(chunk_uuid) #物理删除:第二份 self.namenode.delete(filename) #逻辑删除:在元数据删除信息 else : print "The file: \"" + filename + "\" is not exits." def list_files(self): print "Files:" for (k, v) in self.namenode.filetable.items(): print k def get_num_chunks(self, data): return int(math.ceil(len(data)*1.0 / self.namenode.chunksize))class Namenode(object): """docstring for Namenode""" def __init__(self): self.num_datanodes = 3 self.chunksize = 10 self.filetable = {} self.chunktable = {} self.datanodes = {} self.init_datanodes() #初始化:loc<-->server def init_datanodes(self): for i in range(1, self.num_datanodes+1): self.datanodes[i] = Datanode(i) def alloc(self, filename, num_chunks): #完成映射:filetable和chunktable chunkloc = 1 chunk_uuids = [] for i in range(0, num_chunks): chunk_uuid = uuid.uuid1(); chunk_uuids.append(chunk_uuid) self.chunktable[chunk_uuid] = chunkloc chunkloc = chunkloc % self.num_datanodes + 1 #!!注意:要+1,否则chunkloc值不会变坏 self.filetable[filename] = chunk_uuids print self.filetable return chunk_uuids def delete(self, filename): chunk_uuids = self.filetable[filename] for chunk_uuid in chunk_uuids: self.chunktable.pop(chunk_uuid) self.filetable.pop(filename) def exits(self, filename): #检测文件是否存在 if filename in self.filetable: return True else: return Falseclass Datanode(object): """docstring for Datanode""" def __init__(self, chunkloc): self.chunkloc = chunkloc self.local_fs_root = "D:/HDFSTemp/Datanode" + str(chunkloc) #用不同的目录来模仿不同的Datanode if not os.path.isdir(self.local_fs_root): os.makedirs(self.local_fs_root) def write(self, chunk_uuid, chunk):#写入到chunk try: with open(self.local_fs_root + "/" + str(chunk_uuid), "w") as fw: fw.write(chunk) except IOError : print "The HDFS is broken." def read(self, chunk_uuid): #从chunk读取 data = http://www.mamicode.com/None"/" + str(chunk_uuid), "r") as fr: data = http://www.mamicode.com/fr.read()"/" + str(chunk_uuid)) except WindowsError: print "Filename:" + self.local_fs_root + "/" + str(chunk_uuid) + ‘dose not exits.‘class Command(object): """docstring for Command""" def __init__(self, client): self.client = client def command_line(self): while True: cmd = raw_input(‘Input your command:\n‘) if(‘upload‘ == cmd): self.upload_cmd() elif(‘download‘ == cmd): self.download_cmd() elif(‘delete‘ == cmd): filename = raw_input(‘Input the filename which you want to delete in HDFS:\n‘) self.client.delete(filename) elif(‘ls‘ == cmd): self.client.list_files() elif(‘exits‘ == cmd): break else: print "Wrong command. \n" def upload_cmd(self): filename = raw_input(‘Input the filename which you want to upload in local:\n‘) try : with open(filename, "r") as fr: #读取本地文件 data = http://www.mamicode.com/fr.read()"No such file in local." def download_cmd(self): filename = raw_input(‘Input the filename which you want to download in HDFS:\n‘) data = http://www.mamicode.com/self.client.read(filename) #读取HDFS文件"w") as fw: fw.write(data) #写入本地def main(): nd = Namenode() client = Client(nd) command = Command(client) command.command_line()if __name__ == ‘__main__‘: main()
HDFS(GFS) Python实现
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。