首页 > 代码库 > 初级验证码识别

初级验证码识别

  本来想写个自动识别验证码的程序,验证码是一些可辨率比较高的字母和数字,写着写着发现sb了,同样的数字和字母居然角度旋转过了,觉得这是用js不能完成的任务。coding中止,记录下初步的成果。

  技术分享

  拿到例如如上的一张验证码图。首先是去噪,去掉和验证码数字色差很大的像素,然后是分割,把数字分成一块块的矩形,然后是匹配,和一些预处理完的二进制数组进行匹配,最后是输出结果。而因为及时发现在该网站的验证码上做了无用功,我只是到了去噪这步。用户脚本如下:

 1 // ==UserScript== 2 // @name        checkcode 3 // @namespace   http://www.cnblogs.com/bigbigsunrise/ 4 // @description 自动输入验证码 5 // @include     https://www.xinhehui.com/Account/User/register 6 // @version     1.0 7 // @grant       none 8 // ==/UserScript== 9 10 var canvas = document.createElement(‘canvas‘);11 var c = canvas.getContext(‘2d‘);12 13 var timer = function() {14     var img = document.getElementById(‘VerifyCodeImg‘);15     setTimeout(function(){16         if (!img) {17             timer();18         } else {19             canvas.width = imgWidth = img.width;20             canvas.height = imgHeight = img.height;21             c.drawImage(img, 0, 0, imgWidth, imgHeight);22             clearNoise();23             document.body.appendChild(canvas);24         }25     }, 1000);       26 };27 28 // 去噪函数29 var clearNoise = function(){30     var blankImageData = http://www.mamicode.com/c.createImageData(1, 1); // 白色对比像素点31     blankImageData.data[0]=blankImageData.data[1]=blankImageData.data[2]=255;32 33     for (var y = 0, h = imgHeight; y < h; y += 1){                34         for (var x = 0, w = imgWidth; x < w; x += 1){35             var imageData = http://www.mamicode.com/c.getImageData(x, y, 1, 1);36             if (chromatic(imageData.data, blankImageData.data) < 180){37                 // 验证码较简单, 直接二值设置阀值过滤38                 imageData.data[0] = imageData.data[1] = imageData.data[2] = 255;39                 c.putImageData(imageData, x, y);                                40             }41         }42     }43 };44 45 var chromatic = function(data1, data2){46     // 色差对比47     var offR = data1[0] - data2[0],48         offG = data1[1] - data2[1],49         offB = data1[2] - data2[2];50         // console.log(offR, offG, offB)51     return Math.sqrt(offR * offR + offG * offG + offB * offB);52 }53 54 timer();

  验证码去噪前后如下:

 技术分享 技术分享

  

  代码参照了以前看到过的一篇博文,介绍了如下验证码的代码分析。

  技术分享

技术分享
  1 // ==UserScript==  2 // @name checkcode  3 // @author gb_2312  4 // @match http://jxgl.hdu.edu.cn/*  5 // ==/UserScript==  6   7 var sample = [];  8 sample[0] = "0000000000000000000000000000000000000111000111110110011111000111100011110001111000111100011110001111000110111110001110000000000000000000000000000000000000";  9 sample[1] = "00000000001111111111111111111111110000000000"; 10 sample[2] = "0000000000000000000000000000000000000111100111111110001111000110000011000011100011100011100001100001100001111111111111100000000000000000000000000000000000"; 11 sample[3] = "0000000000000000000000000000000000001111001111110110011100001100001110000111000001110000011110001111000111111110001110000000000000000000000000000000000000"; 12 sample[4] = "000000000000000000000000000000000011000011000111001111001111011011110011110011111111111111000011000011000000000000000000000000000000"; 13 sample[5] = "0000000000000000000000000000000000001111110111111011000001100001111110011111111000110000011110001111100110111111001110000000000000000000000000000000000000"; 14 sample[6] = "0000000000000000000000000000000000000111100111110110011111000001111100111111011001111100011110001111001110111110001111000000000000000000000000000000000000"; 15 sample[7] = "000000000000000000000000000000111111111111000111000110001100001100011000011000011000110000110000110000000000000000000000000000000000"; 16 sample[8] = "0000000000000000000000000000000000001111100111110110001111000111100111011111001111101100011110001111000111111111001111000000000000000000000000000000000000"; 17  18 var canvas = document.createElement(‘canvas‘); 19 var ctx = canvas.getContext(‘2d‘); 20 var input = document.getElementById(‘txtYz‘); 21 canvas.id = "canvas"; 22 var guessCode = ‘‘; 23 var imgWidth, imgHeight; 24 var timer = function(){ 25     var img = document.querySelector(‘.footbutton img‘) || document.images[0]; 26     setTimeout(function(){ 27         if (!img) { 28             timer(); 29             return; 30         } else { 31             img.height = 22; 32             guessCode = ‘‘; 33             canvas.width = imgWidth = img.width; 34             canvas.height = imgHeight = img.height; 35             ctx.drawImage(img, 0, 0,imgWidth,imgHeight); 36             //document.body.appendChild(canvas); 37             clearNoise(); 38             numSlice(); 39         } 40     }, 1000);             41 }; 42  43  44  45  46 var clearNoise = function(){ 47     var blankImageData = http://www.mamicode.com/ctx.createImageData(1,1);// 白色对比像素点 48         blankImageData.data[0]=blankImageData.data[1]=blankImageData.data[2]=255; 49  50     for (var y=0, h=imgHeight; y<h; y+=1){                 51         for (var x=0, w=imgWidth; x<w; x+=1){ 52  53             var imageData = http://www.mamicode.com/ctx.getImageData(x,y,1,1); 54  55             if ( chromatic(imageData.data, blankImageData.data) < 180 ){ 56                 // 验证码较简单,直接二值设置阀值过滤 57                 imageData.data[0]=imageData.data[1]=imageData.data[2]=255; 58                 ctx.putImageData(imageData, x, y);                                 59             } 60  61         } 62     } 63 } 64  65 var chromatic = function(data1, data2){ 66     // 色差对比 67     var offR = data1[0] - data2[0] 68         offG = data1[1] - data2[1], 69         offB = data1[2] - data2[2]; 70         // console.log(offR,offG,offB) 71     return Math.sqrt(offR*offR+offG*offG+offB*offB); 72 } 73  74 var numSlice = function(){ 75     // 取验证码坐标 76     var codePos = []; 77     for (var x=0, w=imgWidth; x<w; x++){ 78         var count = 0; 79         var checkImageData = http://www.mamicode.com/ctx.getImageData(x, 0, 1, imgHeight); 80         for (var i=0, l=checkImageData.data.length; i<l; i+=4){ 81             // 得到的为存在验证码的坐标 82             if ( checkImageData.data[i] < 255 || checkImageData.data[i+1] < 255 || checkImageData.data[i+2] < 255 ){ 83                 count++; 84             } 85             if ( count >= 4 ){ 86                 codePos.push(x); 87                 break; 88             } 89         } 90     } 91  92     // 切分验证码坐标 93     var slicePos = []; 94     for (var i=1, l=codePos.length-1; i<l; i++){ 95         // 处理出边界的坐标 96         if ( codePos[i+1] - codePos[i] > 1 ){ 97             slicePos.push(codePos[i], codePos[i+1]); 98         } 99     }100     slicePos.unshift(codePos[0]);101     slicePos.push(codePos[codePos.length-1]);102 103     // 通过坐标切割出单个验证码图片104     var sliceImageData =http://www.mamicode.com/ [];105     for (var i=0, l=slicePos.length; i<l ;i+=2){106         var tempImageData = http://www.mamicode.com/ctx.getImageData(slicePos[i], 0, slicePos[i+1]-slicePos[i]+1, imgHeight);107 108         var temp = ‘‘;109         for (var j=0; j<tempImageData.data.length; j+=4){110             if ( tempImageData.data[j] < 255 || tempImageData.data[j+1] < 255 || tempImageData.data[j+2] < 255 ){111                 temp += ‘1‘;112             } else {113                 temp += ‘0‘;114             }115         }116         LDS(temp);117     }118     input.value =http://www.mamicode.com/ guessCode;119 }120 121 var LDS = function(str){122     var temp=[];123     var index, LDP=0;124     for (var i=0;i<9;i++){125         temp.push(LD(sample[i], str));126     }127     for (var i=0;i<9;i++){128         if (temp[i]>LDP){129             LDP = temp[i];130             index = i;131         }132     }133     guessCode += index;134     //console.log(guessCode);135 }136 var LD = function(str1, str2){137     //编辑距离138     var s = [];139     str1 = ‘ ‘+str1;140     str2 = ‘ ‘+str2;141     var l1=str1.length, l2=str2.length;142     for (var i=0,l=str1.length;i<l;i++){143         s[i] = [];144         s[i][0] = i;145     }146     for (var i=0,l=str2.length;i<l;i++){147         s[0][i] = i;148     }149 150     for (var i=1;i<l1;i++){151         for (var j=1;j<l2;j++){152             var tij = s[i-1][j-1];153             if ( str1[i] !== str2[j] ){154                 tij += 1;155             }156 157             var min;158             if ( s[i-1][j]+1 <= s[i][j-1]+1 ){159                 min = s[i-1][j]+1;160             } else {161                 min = s[i][j-1]+1;162             }163             if ( tij <= min){164                 min = tij;165             }166 167             s[i][j]=min;168         }169     }170     if (l1>l2){171         return 1-s[l1-1][l2-1]/l1;172     } else {173         return 1-s[l1-1][l2-1]/l2;174     }        175 }176    177  console.log(‘running‘);timer();
View Code

 

初级验证码识别