首页 > 代码库 > 过滤3个字节以上的utf-8字符

过滤3个字节以上的utf-8字符

public static String filterOffUtf8Mb4_2(String text) throws UnsupportedEncodingException {        byte[] bytes = text.getBytes("utf-8");        ByteBuffer buffer = ByteBuffer.allocate(bytes.length);        int i = 0;        while (i < bytes.length) {            short b = bytes[i];            if (b > 0) {                buffer.put(bytes[i++]);                continue;            }            b += 256; // 去掉符号位            if (((b >> 5) ^ 0b110) == 0) {                buffer.put(bytes, i, 2);                i += 2;                System.out.println("2");            } else if (((b >> 4) ^ 0b1110) == 0) {                System.out.println("3");                buffer.put(bytes, i, 3);                i += 3;            } else if (((b >> 3) ^ 0b11110) == 0) {                i += 4;                System.out.println("4");            } else if (((b >> 2) ^ 0b111110) == 0) {                i += 5;                System.out.println("5");            } else if (((b >> 1) ^ 0b1111110) == 0) {                i += 6;                System.out.println("6");            } else {                buffer.put(bytes[i++]);            }        }        buffer.flip();        return new String(buffer.array(), "utf-8");    }

 

过滤3个字节以上的utf-8字符