使用tesseract破解checkCode
tesseract是一个非常强大的图片识别工具,有较大的几率将图片中的字符抓取出来,在对付验证码上,有较好的效果。使用批处理命令结合这个工具,我们就可以再程序中破解得到我们想要的图片了。import java.awt.image.BufferedImage;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.net.URL;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import java.util.Locale;import javax.imageio.IIOImage;import javax.imageio.ImageIO;import javax.imageio.ImageReader;import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;import org.apache.commons.io.IOUtils;import org.apache.commons.lang.StringUtils;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;public class OCRUtil {private static final Log logger = LogFactory.getLog(OCRUtil.class);private static final String LANG_OPTION = "-l";private static final String EOL = File.separator;private static final String IMAGE_FORMAT = "jpg";public static String recognizeValidation(InputStream in) throws Exception {File tmpFile = File.createTempFile("img", "." + IMAGE_FORMAT);OutputStream out = new FileOutputStream(tmpFile);IOUtils.copy(in, out);IOUtils.closeQuietly(out);return format(recognizeText(tmpFile, IMAGE_FORMAT));}private static String format(String str) {if (StringUtils.isBlank(str)) {return null;}StringBuffer sb = new StringBuffer(str.length());for (int i = 0; i < str.length(); i++) {char c = str.charAt(i);if (Character.isDigit(c) || Character.isLetter(c)) {sb.append(c);}}return sb.toString();}public static String recognizeText(File imageFile, String imageFormat)throws Exception {File tempImage = createImage(imageFile, imageFormat);File outputFile = new File(imageFile.getParentFile(), "output");StringBuffer strB = new StringBuffer();List<String> cmd = new ArrayList<String>();cmd.add(SystemUtil.getUserDir() + "tesseract/tesseract.exe");cmd.add("");cmd.add(outputFile.getName());cmd.add(LANG_OPTION);cmd.add("eng");ProcessBuilder pb = new ProcessBuilder();pb.directory(imageFile.getParentFile());cmd.set(1, tempImage.getName());pb.command(cmd);pb.redirectErrorStream(true);Process process = pb.start();int w = process.waitFor();// delete temp working filestempImage.delete();if (w == 0) {BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath() + ".txt"),"UTF-8"));String str;while ((str = in.readLine()) != null) {strB.append(str).append(EOL);}in.close();} else {String msg;switch (w) {case 1:msg = "Errors accessing files. There may be spaces in your image's filename.";break;case 29:msg = "Cannot recognize the image or its selected region.";break;case 31:msg = "Unsupported image format.";break;default:msg = "Errors occurred.";}tempImage.delete();throw new RuntimeException(msg);}new File(outputFile.getAbsolutePath() + ".txt").delete();// logger.info("图像识别结果:" + strB);return strB.toString();}public static File createImage(File imageFile, String imageFormat) {File tempFile = null;try {Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);ImageReader reader = readers.next();ImageInputStream iis = ImageIO.createImageInputStream(imageFile);reader.setInput(iis);// Read the stream metadataIIOMetadata streamMetadata = reader.getStreamMetadata();// Set up the writeParamTIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);// Get tif writer and set output to fileIterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");ImageWriter writer = writers.next();BufferedImage bi = reader.read(0);// bi = new ImageFilter(bi).changeGrey();IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));tempFile = tempImageFile(imageFile);ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);writer.setOutput(ios);writer.write(streamMetadata, image, tiffWriteParam);ios.close();writer.dispose();reader.dispose();} catch (Exception exc) {logger.error("异常:", exc);}return tempFile;}private static File tempImageFile(File imageFile) {String path = imageFile.getPath();StringBuffer strB = new StringBuffer(path);strB.insert(path.lastIndexOf('.'), 0);return new File(strB.toString().replaceFirst("(?<=\\.)(\\w+)$", "tif"));}public static void main(String[] args) throws Exception { String maybe = recognizeValidation(newURL("http://passport.360buy.com/ImageVerifier.axd?uid=c360a45f-02b2-4255-8f2e-61191bfc3866").openStream()); String maybe2 = new OCRUtil().recognizeText(newFile("c:/1.jpg"), "jpg"); System.out.println(maybe2);}} 工具见附件。
页:
[1]