首页 > 代码库 > tika入门--获得文档内容和元数据
tika入门--获得文档内容和元数据
package com.wangchao.tika.demo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Reader;
import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
public class MetadataDemo {
/**
* @param args
* @throws IOException
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException, IOException {
// TODO 自动生成的方法存根
Tika tika = new Tika();
Metadata m = new Metadata();
Reader r = tika.parse(new FileInputStream(new File("/home/wangchao/文档/Java网络socket编程详解.doc")), m);
//获得内容
BufferedReader br = new BufferedReader(r);
String line = null;
while((line=br.readLine())!=null){
System.out.println(line);
}
//所有元数据
System.out.println(m);
}
}