首页 > 代码库 > java:快速文件分割及合并
java:快速文件分割及合并
文件分割与合并是一个常见需求,比如:上传大文件时,可以先分割成小块,传到服务器后,再进行合并。很多高大上的分布式文件系统(比如:google的GFS、taobao的TFS)里,也是按block为单位,对文件进行分割或合并。
看下基本思路:
如果有一个大文件,指定分割大小后(比如:按1M切割)
step 1:
先根据原始文件大小、分割大小,算出最终分割的小文件数N
step 2:
在磁盘上创建这N个小文件
step 3:
开多个线程(线程数=分割文件数),每个线程里,利用RandomAccessFile的seek功能,将读取指针定位到原文件里每一段的段首位置,然后向后读取指定大小(即:分割块大小),最终写入对应的分割文件,因为多线程并行处理,各写各的小文件,速度相对还是比较快的。
合并时,把上面的思路逆向处理即可。
核心代码:
分割处理:
1 /** 2 * 拆分文件 3 * @param fileName 待拆分的完整文件名 4 * @param byteSize 按多少字节大小拆分 5 * @return 拆分后的文件名列表 6 * @throws IOException 7 */ 8 public List<String> splitBySize(String fileName, int byteSize) 9 throws IOException {10 List<String> parts = new ArrayList<String>();11 File file = new File(fileName);12 int count = (int) Math.ceil(file.length() / (double) byteSize);13 int countLen = (count + "").length();14 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(count,15 count * 3, 1, TimeUnit.SECONDS,16 new ArrayBlockingQueue<Runnable>(count * 2));17 18 for (int i = 0; i < count; i++) {19 String partFileName = file.getName() + "."20 + leftPad((i + 1) + "", countLen, ‘0‘) + ".part";21 threadPool.execute(new SplitRunnable(byteSize, i * byteSize,22 partFileName, file));23 parts.add(partFileName);24 }25 return parts;26 }
1 private class SplitRunnable implements Runnable { 2 int byteSize; 3 String partFileName; 4 File originFile; 5 int startPos; 6 7 public SplitRunnable(int byteSize, int startPos, String partFileName, 8 File originFile) { 9 this.startPos = startPos;10 this.byteSize = byteSize;11 this.partFileName = partFileName;12 this.originFile = originFile;13 }14 15 public void run() {16 RandomAccessFile rFile;17 OutputStream os;18 try {19 rFile = new RandomAccessFile(originFile, "r");20 byte[] b = new byte[byteSize];21 rFile.seek(startPos);// 移动指针到每“段”开头22 int s = rFile.read(b);23 os = new FileOutputStream(partFileName);24 os.write(b, 0, s);25 os.flush();26 os.close();27 } catch (IOException e) {28 e.printStackTrace();29 }30 }31 }
合并处理:
1 /** 2 * 合并文件 3 * 4 * @param dirPath 拆分文件所在目录名 5 * @param partFileSuffix 拆分文件后缀名 6 * @param partFileSize 拆分文件的字节数大小 7 * @param mergeFileName 合并后的文件名 8 * @throws IOException 9 */10 public void mergePartFiles(String dirPath, String partFileSuffix,11 int partFileSize, String mergeFileName) throws IOException {12 ArrayList<File> partFiles = FileUtil.getDirFiles(dirPath,13 partFileSuffix);14 Collections.sort(partFiles, new FileComparator());15 16 RandomAccessFile randomAccessFile = new RandomAccessFile(mergeFileName,17 "rw");18 randomAccessFile.setLength(partFileSize * (partFiles.size() - 1)19 + partFiles.get(partFiles.size() - 1).length());20 randomAccessFile.close();21 22 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(23 partFiles.size(), partFiles.size() * 3, 1, TimeUnit.SECONDS,24 new ArrayBlockingQueue<Runnable>(partFiles.size() * 2));25 26 for (int i = 0; i < partFiles.size(); i++) {27 threadPool.execute(new MergeRunnable(i * partFileSize,28 mergeFileName, partFiles.get(i)));29 }30 31 }
1 private class MergeRunnable implements Runnable { 2 long startPos; 3 String mergeFileName; 4 File partFile; 5 6 public MergeRunnable(long startPos, String mergeFileName, File partFile) { 7 this.startPos = startPos; 8 this.mergeFileName = mergeFileName; 9 this.partFile = partFile;10 }11 12 public void run() {13 RandomAccessFile rFile;14 try {15 rFile = new RandomAccessFile(mergeFileName, "rw");16 rFile.seek(startPos);17 FileInputStream fs = new FileInputStream(partFile);18 byte[] b = new byte[fs.available()];19 fs.read(b);20 fs.close();21 rFile.write(b);22 rFile.close();23 } catch (IOException e) {24 e.printStackTrace();25 }26 }27 }
为了方便文件操作,把关于文件读写的功能,全封装到FileUtil类:
1 package com.cnblogs.yjmyzz; 2 3 import java.io.*; 4 import java.util.*; 5 import java.util.concurrent.*; 6 7 /** 8 * 文件处理辅助类 9 * 10 * @author yjmyzz@126.com 11 * @version 0.2 12 * @since 2014-11-17 13 * 14 */ 15 public class FileUtil { 16 17 /** 18 * 当前目录路径 19 */ 20 public static String currentWorkDir = System.getProperty("user.dir") + "\\"; 21 22 /** 23 * 左填充 24 * 25 * @param str 26 * @param length 27 * @param ch 28 * @return 29 */ 30 public static String leftPad(String str, int length, char ch) { 31 if (str.length() >= length) { 32 return str; 33 } 34 char[] chs = new char[length]; 35 Arrays.fill(chs, ch); 36 char[] src =http://www.mamicode.com/ str.toCharArray(); 37 System.arraycopy(src, 0, chs, length - src.length, src.length); 38 return new String(chs); 39 40 } 41 42 /** 43 * 删除文件 44 * 45 * @param fileName 46 * 待删除的完整文件名 47 * @return 48 */ 49 public static boolean delete(String fileName) { 50 boolean result = false; 51 File f = new File(fileName); 52 if (f.exists()) { 53 result = f.delete(); 54 55 } else { 56 result = true; 57 } 58 return result; 59 } 60 61 /*** 62 * 递归获取指定目录下的所有的文件(不包括文件夹) 63 * 64 * @param obj 65 * @return 66 */ 67 public static ArrayList<File> getAllFiles(String dirPath) { 68 File dir = new File(dirPath); 69 70 ArrayList<File> files = new ArrayList<File>(); 71 72 if (dir.isDirectory()) { 73 File[] fileArr = dir.listFiles(); 74 for (int i = 0; i < fileArr.length; i++) { 75 File f = fileArr[i]; 76 if (f.isFile()) { 77 files.add(f); 78 } else { 79 files.addAll(getAllFiles(f.getPath())); 80 } 81 } 82 } 83 return files; 84 } 85 86 /** 87 * 获取指定目录下的所有文件(不包括子文件夹) 88 * 89 * @param dirPath 90 * @return 91 */ 92 public static ArrayList<File> getDirFiles(String dirPath) { 93 File path = new File(dirPath); 94 File[] fileArr = path.listFiles(); 95 ArrayList<File> files = new ArrayList<File>(); 96 97 for (File f : fileArr) { 98 if (f.isFile()) { 99 files.add(f);100 }101 }102 return files;103 }104 105 /**106 * 获取指定目录下特定文件后缀名的文件列表(不包括子文件夹)107 * 108 * @param dirPath109 * 目录路径110 * @param suffix111 * 文件后缀112 * @return113 */114 public static ArrayList<File> getDirFiles(String dirPath,115 final String suffix) {116 File path = new File(dirPath);117 File[] fileArr = path.listFiles(new FilenameFilter() {118 public boolean accept(File dir, String name) {119 String lowerName = name.toLowerCase();120 String lowerSuffix = suffix.toLowerCase();121 if (lowerName.endsWith(lowerSuffix)) {122 return true;123 }124 return false;125 }126 127 });128 ArrayList<File> files = new ArrayList<File>();129 130 for (File f : fileArr) {131 if (f.isFile()) {132 files.add(f);133 }134 }135 return files;136 }137 138 /**139 * 读取文件内容140 * 141 * @param fileName142 * 待读取的完整文件名143 * @return 文件内容144 * @throws IOException145 */146 public static String read(String fileName) throws IOException {147 File f = new File(fileName);148 FileInputStream fs = new FileInputStream(f);149 String result = null;150 byte[] b = new byte[fs.available()];151 fs.read(b);152 fs.close();153 result = new String(b);154 return result;155 }156 157 /**158 * 写文件159 * 160 * @param fileName161 * 目标文件名162 * @param fileContent163 * 写入的内容164 * @return165 * @throws IOException166 */167 public static boolean write(String fileName, String fileContent)168 throws IOException {169 boolean result = false;170 File f = new File(fileName);171 FileOutputStream fs = new FileOutputStream(f);172 byte[] b = fileContent.getBytes();173 fs.write(b);174 fs.flush();175 fs.close();176 result = true;177 return result;178 }179 180 /**181 * 追加内容到指定文件182 * 183 * @param fileName184 * @param fileContent185 * @return186 * @throws IOException187 */188 public static boolean append(String fileName, String fileContent)189 throws IOException {190 boolean result = false;191 File f = new File(fileName);192 if (f.exists()) {193 RandomAccessFile rFile = new RandomAccessFile(f, "rw");194 byte[] b = fileContent.getBytes();195 long originLen = f.length();196 rFile.setLength(originLen + b.length);197 rFile.seek(originLen);198 rFile.write(b);199 rFile.close();200 }201 result = true;202 return result;203 }204 205 /**206 * 拆分文件207 * 208 * @param fileName209 * 待拆分的完整文件名210 * @param byteSize211 * 按多少字节大小拆分212 * @return 拆分后的文件名列表213 * @throws IOException214 */215 public List<String> splitBySize(String fileName, int byteSize)216 throws IOException {217 List<String> parts = new ArrayList<String>();218 File file = new File(fileName);219 int count = (int) Math.ceil(file.length() / (double) byteSize);220 int countLen = (count + "").length();221 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(count,222 count * 3, 1, TimeUnit.SECONDS,223 new ArrayBlockingQueue<Runnable>(count * 2));224 225 for (int i = 0; i < count; i++) {226 String partFileName = file.getName() + "."227 + leftPad((i + 1) + "", countLen, ‘0‘) + ".part";228 threadPool.execute(new SplitRunnable(byteSize, i * byteSize,229 partFileName, file));230 parts.add(partFileName);231 }232 return parts;233 }234 235 /**236 * 合并文件237 * 238 * @param dirPath239 * 拆分文件所在目录名240 * @param partFileSuffix241 * 拆分文件后缀名242 * @param partFileSize243 * 拆分文件的字节数大小244 * @param mergeFileName245 * 合并后的文件名246 * @throws IOException247 */248 public void mergePartFiles(String dirPath, String partFileSuffix,249 int partFileSize, String mergeFileName) throws IOException {250 ArrayList<File> partFiles = FileUtil.getDirFiles(dirPath,251 partFileSuffix);252 Collections.sort(partFiles, new FileComparator());253 254 RandomAccessFile randomAccessFile = new RandomAccessFile(mergeFileName,255 "rw");256 randomAccessFile.setLength(partFileSize * (partFiles.size() - 1)257 + partFiles.get(partFiles.size() - 1).length());258 randomAccessFile.close();259 260 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(261 partFiles.size(), partFiles.size() * 3, 1, TimeUnit.SECONDS,262 new ArrayBlockingQueue<Runnable>(partFiles.size() * 2));263 264 for (int i = 0; i < partFiles.size(); i++) {265 threadPool.execute(new MergeRunnable(i * partFileSize,266 mergeFileName, partFiles.get(i)));267 }268 269 }270 271 /**272 * 根据文件名,比较文件273 * 274 * @author yjmyzz@126.com275 *276 */277 private class FileComparator implements Comparator<File> {278 public int compare(File o1, File o2) {279 return o1.getName().compareToIgnoreCase(o2.getName());280 }281 }282 283 /**284 * 分割处理Runnable285 * 286 * @author yjmyzz@126.com287 *288 */289 private class SplitRunnable implements Runnable {290 int byteSize;291 String partFileName;292 File originFile;293 int startPos;294 295 public SplitRunnable(int byteSize, int startPos, String partFileName,296 File originFile) {297 this.startPos = startPos;298 this.byteSize = byteSize;299 this.partFileName = partFileName;300 this.originFile = originFile;301 }302 303 public void run() {304 RandomAccessFile rFile;305 OutputStream os;306 try {307 rFile = new RandomAccessFile(originFile, "r");308 byte[] b = new byte[byteSize];309 rFile.seek(startPos);// 移动指针到每“段”开头310 int s = rFile.read(b);311 os = new FileOutputStream(partFileName);312 os.write(b, 0, s);313 os.flush();314 os.close();315 } catch (IOException e) {316 e.printStackTrace();317 }318 }319 }320 321 /**322 * 合并处理Runnable323 * 324 * @author yjmyzz@126.com325 *326 */327 private class MergeRunnable implements Runnable {328 long startPos;329 String mergeFileName;330 File partFile;331 332 public MergeRunnable(long startPos, String mergeFileName, File partFile) {333 this.startPos = startPos;334 this.mergeFileName = mergeFileName;335 this.partFile = partFile;336 }337 338 public void run() {339 RandomAccessFile rFile;340 try {341 rFile = new RandomAccessFile(mergeFileName, "rw");342 rFile.seek(startPos);343 FileInputStream fs = new FileInputStream(partFile);344 byte[] b = new byte[fs.available()];345 fs.read(b);346 fs.close();347 rFile.write(b);348 rFile.close();349 } catch (IOException e) {350 e.printStackTrace();351 }352 }353 }354 355 }
单元测试:
1 package com.cnblogs.yjmyzz; 2 3 import java.io.IOException; 4 5 import org.junit.Test; 6 7 public class FileTest { 8 9 @Test10 public void writeFile() throws IOException, InterruptedException {11 12 System.out.println(FileUtil.currentWorkDir);13 14 StringBuilder sb = new StringBuilder();15 16 long originFileSize = 1024 * 1024 * 100;// 100M17 int blockFileSize = 1024 * 1024 * 15;// 15M18 19 // 生成一个大文件20 for (int i = 0; i < originFileSize; i++) {21 sb.append("A");22 }23 24 String fileName = FileUtil.currentWorkDir + "origin.myfile";25 System.out.println(fileName);26 System.out.println(FileUtil.write(fileName, sb.toString()));27 28 // 追加内容29 sb.setLength(0);30 sb.append("0123456789");31 FileUtil.append(fileName, sb.toString());32 33 FileUtil fileUtil = new FileUtil();34 35 // 将origin.myfile拆分36 fileUtil.splitBySize(fileName, blockFileSize);37 38 Thread.sleep(10000);// 稍等10秒,等前面的小文件全都写完39 40 // 合并成新文件41 fileUtil.mergePartFiles(FileUtil.currentWorkDir, ".part",42 blockFileSize, FileUtil.currentWorkDir + "new.myfile");43 44 }45 }
java:快速文件分割及合并
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。