首页 > 代码库 > Java OCR tesseract 图像智能字符识别技术 Java实现

Java OCR tesseract 图像智能字符识别技术 Java实现

Java OCR tesseract 图像智能字符识别技术 Java代码实现

 

接着上一篇OCR所说的,上一篇给大家介绍了tesseract 在命令行的简单用法,当然了要继承到我们的程序中,还是需要代码实现的,下面给大家分享下java实现的例子。

 

拿代码扫描上面的图片,然后输出结果。主要思想就是利用Java调用系统任务。

 

下面是核心代码:

/** *  *  * @author mjorcen * @email mjorcen@gmail.com * @dateTime Jun 19, 2014 3:42:16 PM * @version 1 */public class PB {    static String path = "E:/test/code";    public static void main(String[] args) {        File file = new File(path);        for (String string : file.list()) {            File iFile = new File(path, string);            if (iFile.isFile()) {                pb2(string);            }        }    }    public static void pb2(String filename) {        try {            List<String> cmd = new LinkedList<String>();            cmd.add("tesseract");            cmd.add(filename);            cmd.add(filename);            ProcessBuilder pb = new ProcessBuilder(cmd);            pb.redirectErrorStream(true);            pb.directory(new File("E:/test/code"));            Process p = pb.start();            // 取得命令结果的输出流            InputStream fis = p.getInputStream();            // 用一个读输出流类去读            InputStreamReader isr = new InputStreamReader(fis, "gbk");            // 用缓冲器读行            BufferedReader br = new BufferedReader(isr);            String line = null;            // 直到读完为止            while ((line = br.readLine()) != null) {                // System.out.println(line);            }            // 取得结果的输出流            InputStream resultIs = new FileInputStream(new File(path, filename                    + ".txt"));            // 用一个读输出流类去读            InputStreamReader resultIsr = new InputStreamReader(resultIs, "gbk");            // 用缓冲器读行            BufferedReader resultBr = new BufferedReader(resultIsr);            line = null;            // 直到读完为止            while ((line = resultBr.readLine()) != null) {                System.out.print(line);            }            System.out.print(",");        } catch (Exception e) {            System.out.print(e.toString());        }    }}

结果如下:

uHx7,IXQO,\1ZYP,ZVBO,3237,5SYQ~,,87YF,\8KDN,CGPC,\c\IG\N,F\Z TA,J 9pc,Lpza,NBGC,N QW8,onwz,ox XJ,\P9FM,P PR鈥楿,QRG\I\,,RAZ v\,504i,VGPH,VPCI,\\I\M I,鈥楳J1,Y6H9\,Y OGP,

 

对比第一张图片, 不是很完美~哈哈 ,当然了如果你只需要实现验证码的读写,那么上面就足够了。下面继续普及图像处理的知识。



-------------------------------------------------------------------我的分割线--------------------------------------------------------------------

 

当然了,有时候图片被扭曲或者模糊的很厉害,很不容易识别,所以下面我给大家介绍一个去噪的辅助类, 能稍做优化,先看下效果图。

 

 

  

 

package cn.c.test3;import java.awt.Color;import java.awt.image.BufferedImage;import java.io.File;import java.io.IOException;import javax.imageio.ImageIO;public class ClearImageHelper {    public static void main(String[] args) throws IOException {        File testDataDir = new File("E:\\test\\code");        final String destDir = testDataDir.getAbsolutePath() + "/tmp";        for (File file : testDataDir.listFiles()) {            cleanImage(file, destDir);        }    }    /**     *      * @param sfile     *            需要去噪的图像     * @param destDir     *            去噪后的图像保存地址     * @throws IOException     */    public static void cleanImage(File sfile, String destDir)            throws IOException {        File destF = new File(destDir);        if (!destF.exists()) {            destF.mkdirs();        }        BufferedImage bufferedImage = ImageIO.read(sfile);        int h = bufferedImage.getHeight();        int w = bufferedImage.getWidth();        // 灰度化        int[][] gray = new int[w][h];        for (int x = 0; x < w; x++) {            for (int y = 0; y < h; y++) {                int argb = bufferedImage.getRGB(x, y);                // 图像加亮(调整亮度识别率非常高)                int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30);                int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30);                int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30);                if (r >= 255) {                    r = 255;                }                if (g >= 255) {                    g = 255;                }                if (b >= 255) {                    b = 255;                }                gray[x][y] = (int) Math                        .pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2)                                * 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2);            }        }        // 二值化        int threshold = ostu(gray, w, h);        BufferedImage binaryBufferedImage = new BufferedImage(w, h,                BufferedImage.TYPE_BYTE_BINARY);        for (int x = 0; x < w; x++) {            for (int y = 0; y < h; y++) {                if (gray[x][y] > threshold) {                    gray[x][y] |= 0x00FFFF;                } else {                    gray[x][y] &= 0xFF0000;                }                binaryBufferedImage.setRGB(x, y, gray[x][y]);            }        }        // 矩阵打印        for (int y = 0; y < h; y++) {            for (int x = 0; x < w; x++) {                if (isBlack(binaryBufferedImage.getRGB(x, y))) {                    System.out.print("*");                } else {                    System.out.print(" ");                }            }            System.out.println();        }        ImageIO.write(binaryBufferedImage, "jpg",                new File(destDir, sfile.getName()));    }    public static boolean isBlack(int colorInt) {        Color color = new Color(colorInt);        if (color.getRed() + color.getGreen() + color.getBlue() <= 300) {            return true;        }        return false;    }    public static boolean isWhite(int colorInt) {        Color color = new Color(colorInt);        if (color.getRed() + color.getGreen() + color.getBlue() > 300) {            return true;        }        return false;    }    public static int isBlackOrWhite(int colorInt) {        if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730) {            return 1;        }        return 0;    }    public static int getColorBright(int colorInt) {        Color color = new Color(colorInt);        return color.getRed() + color.getGreen() + color.getBlue();    }    public static int ostu(int[][] gray, int w, int h) {        int[] histData = http://www.mamicode.com/new int[w * h];        // Calculate histogram        for (int x = 0; x < w; x++) {            for (int y = 0; y < h; y++) {                int red = 0xFF & gray[x][y];                histData[red]++;            }        }        // Total number of pixels        int total = w * h;        float sum = 0;        for (int t = 0; t < 256; t++)            sum += t * histData[t];        float sumB = 0;        int wB = 0;        int wF = 0;        float varMax = 0;        int threshold = 0;        for (int t = 0; t < 256; t++) {            wB += histData[t]; // Weight Background            if (wB == 0)                continue;            wF = total - wB; // Weight Foreground            if (wF == 0)                break;            sumB += (float) (t * histData[t]);            float mB = sumB / wB; // Mean Background            float mF = (sum - sumB) / wF; // Mean Foreground            // Calculate Between Class Variance            float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF);            // Check if new maximum found            if (varBetween > varMax) {                varMax = varBetween;                threshold = t;            }        }        return threshold;    }}