首页 > 代码库 > 解决java中对URL编码的问题

解决java中对URL编码的问题

首先查看javascript中的encodeURI和encodeURLComponent方法的区别.

encodeURI:不会对 ASCII 字母和数字进行编码,也不会对这些 ASCII 标点符号进行编码: - _ . ! ~ * ‘ ( )    也不会对以下在 URI 中具有特殊含义的 ASCII 标点符                       号,encodeURI() 函数是不会进行转义的:;/?:@&=+$,#

encodeURLComponent:不会对 ASCII 字母和数字进行编码,也不会对这些 ASCII 标点符号进行编码: - _ . ! ~ * ‘ ( )

 

而java中,URLEncoder.encode(string content,String enc) 方法:

  不会对 ASCII 字母和数字进行编码,也不会对这些 ASCII 标点符号进行编码: - _ .  * 

参考代码如下:

        dontNeedEncoding = new BitSet(256);        int i;        for (i = ‘a‘; i <= ‘z‘; i++) {            dontNeedEncoding.set(i);        }        for (i = ‘A‘; i <= ‘Z‘; i++) {            dontNeedEncoding.set(i);        }        for (i = ‘0‘; i <= ‘9‘; i++) {            dontNeedEncoding.set(i);        }        dontNeedEncoding.set(‘ ‘); /* encoding a space to a + is done                                    * in the encode() method */        dontNeedEncoding.set(‘-‘);        dontNeedEncoding.set(‘_‘);        dontNeedEncoding.set(‘.‘);        dontNeedEncoding.set(‘*‘);

 

如果我想要在java中对一个url进行编码,但是不对URI 中具有特殊含义的 ASCII 标点符号进行编码,需要在dontNeedEncoding中添加相关字符,创建自己的编码类MyURIEncode:

  

package com.sitech.solr.util;import java.io.CharArrayWriter;import java.io.UnsupportedEncodingException;import java.nio.charset.Charset;import java.nio.charset.IllegalCharsetNameException;import java.nio.charset.UnsupportedCharsetException;import java.security.AccessController;import java.util.BitSet;import sun.security.action.GetPropertyAction;public class MyURIEncoder {    static BitSet dontNeedEncoding;    static final int caseDiff = (‘a‘ - ‘A‘);    static String dfltEncName = null;    static {        /* The list of characters that are not encoded has been         * determined as follows:         *         * RFC 2396 states:         * -----         * Data characters that are allowed in a URI but do not have a         * reserved purpose are called unreserved.  These include upper         * and lower case letters, decimal digits, and a limited set of         * punctuation marks and symbols.         *         * unreserved  = alphanum | mark         *         * mark        = "-" | "_" | "." | "!" | "~" | "*" | "‘" | "(" | ")"         *         * Unreserved characters can be escaped without changing the         * semantics of the URI, but this should not be done unless the         * URI is being used in a context that does not allow the         * unescaped character to appear.         * -----         *         * It appears that both Netscape and Internet Explorer escape         * all special characters from this list with the exception         * of "-", "_", ".", "*". While it is not clear why they are         * escaping the other characters, perhaps it is safest to         * assume that there might be contexts in which the others         * are unsafe if not escaped. Therefore, we will use the same         * list. It is also noteworthy that this is consistent with         * O‘Reilly‘s "HTML: The Definitive Guide" (page 164).         *         * As a last note, Intenet Explorer does not encode the "@"         * character which is clearly not unreserved according to the         * RFC. We are being consistent with the RFC in this matter,         * as is Netscape.         *         */        dontNeedEncoding = new BitSet(256);        int i;        for (i = ‘a‘; i <= ‘z‘; i++) {            dontNeedEncoding.set(i);        }        for (i = ‘A‘; i <= ‘Z‘; i++) {            dontNeedEncoding.set(i);        }        for (i = ‘0‘; i <= ‘9‘; i++) {            dontNeedEncoding.set(i);        }        dontNeedEncoding.set(‘ ‘); /* encoding a space to a + is done                                    * in the encode() method */        dontNeedEncoding.set(‘-‘);        dontNeedEncoding.set(‘_‘);        dontNeedEncoding.set(‘.‘);        dontNeedEncoding.set(‘*‘);                        //对以下在 URI 中具有特殊含义的 ASCII 标点符号    ;/?:@&=+$,#  不需要转义        dontNeedEncoding.set(‘;‘);        dontNeedEncoding.set(‘/‘);        dontNeedEncoding.set(‘?‘);        dontNeedEncoding.set(‘:‘);        dontNeedEncoding.set(‘@‘);        dontNeedEncoding.set(‘&‘);        dontNeedEncoding.set(‘=‘);        dontNeedEncoding.set(‘+‘);        dontNeedEncoding.set(‘$‘);        dontNeedEncoding.set(‘,‘);        dontNeedEncoding.set(‘#‘);                dfltEncName = AccessController.doPrivileged(            new GetPropertyAction("file.encoding")        );    }    /**     * You can‘t call the constructor.     */    private MyURIEncoder() { }    public static String encode(String s, String enc)        throws UnsupportedEncodingException {        boolean needToChange = false;        StringBuffer out = new StringBuffer(s.length());        Charset charset;        CharArrayWriter charArrayWriter = new CharArrayWriter();        if (enc == null)            throw new NullPointerException("charsetName");        try {            charset = Charset.forName(enc);        } catch (IllegalCharsetNameException e) {            throw new UnsupportedEncodingException(enc);        } catch (UnsupportedCharsetException e) {            throw new UnsupportedEncodingException(enc);        }        for (int i = 0; i < s.length();) {            int c = (int) s.charAt(i);            //System.out.println("Examining character: " + c);            if (dontNeedEncoding.get(c)) {                if (c == ‘ ‘) {                    c = ‘+‘;                    needToChange = true;                }                //System.out.println("Storing: " + c);                out.append((char)c);                i++;            } else {                // convert to external encoding before hex conversion                do {                    charArrayWriter.write(c);                    /*                     * If this character represents the start of a Unicode                     * surrogate pair, then pass in two characters. It‘s not                     * clear what should be done if a bytes reserved in the                     * surrogate pairs range occurs outside of a legal                     * surrogate pair. For now, just treat it as if it were                     * any other character.                     */                    if (c >= 0xD800 && c <= 0xDBFF) {                        /*                          System.out.println(Integer.toHexString(c)                          + " is high surrogate");                        */                        if ( (i+1) < s.length()) {                            int d = (int) s.charAt(i+1);                            /*                              System.out.println("\tExamining "                              + Integer.toHexString(d));                            */                            if (d >= 0xDC00 && d <= 0xDFFF) {                                /*                                  System.out.println("\t"                                  + Integer.toHexString(d)                                  + " is low surrogate");                                */                                charArrayWriter.write(d);                                i++;                            }                        }                    }                    i++;                } while (i < s.length() && !dontNeedEncoding.get((c = (int) s.charAt(i))));                charArrayWriter.flush();                String str = new String(charArrayWriter.toCharArray());                byte[] ba = str.getBytes(charset);                for (int j = 0; j < ba.length; j++) {                    out.append(‘%‘);                    char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16);                    // converting to use uppercase letter as part of                    // the hex value if ch is a letter.                    if (Character.isLetter(ch)) {                        ch -= caseDiff;                    }                    out.append(ch);                    ch = Character.forDigit(ba[j] & 0xF, 16);                    if (Character.isLetter(ch)) {                        ch -= caseDiff;                    }                    out.append(ch);                }                charArrayWriter.reset();                needToChange = true;            }        }        return (needToChange? out.toString() : s);    }}

 

解决java中对URL编码的问题