首页 > 代码库 > Python url_escape
Python url_escape
escape.py
# -*- coding: utf8 -*-import sysif type(‘‘) is not type(b‘‘): def u(s): return s bytes_type = bytes unicode_type = str basestring_type = strelse: def u(s): return s.decode(‘unicode_escape‘) bytes_type = str unicode_type = unicode basestring_type = basestringtry: import urllib.parse as urllib_parse # py3except ImportError: import urllib as urllib_parse # py2def url_escape(value): """Returns a URL-encoded version of the given value.""" return urllib_parse.quote_plus(utf8(value))# python 3 changed things around enough that we need two separate# implementations of url_unescape. We also need our own implementation# of parse_qs since python 3‘s version insists on decoding everything.if sys.version_info[0] < 3: def url_unescape(value, encoding=‘utf-8‘): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. """ if encoding is None: return urllib_parse.unquote_plus(utf8(value)) else: return unicode_type(urllib_parse.unquote_plus(utf8(value)), encoding)else: def url_unescape(value, encoding=‘utf-8‘): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. """ if encoding is None: return urllib_parse.unquote_to_bytes(value) else: return urllib_parse.unquote_plus(to_basestring(value), encoding=encoding) def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parses a query string like urlparse.parse_qs, but returns the values as byte strings. Keys still become type str (interpreted as latin1 in python3!) because it‘s too painful to keep them as byte strings in python3 and in practice they‘re nearly always ascii anyway. """ # This is gross, but python3 doesn‘t give us another way. # Latin1 is the universal donor of character encodings. result = _parse_qs(qs, keep_blank_values, strict_parsing, encoding=‘latin1‘, errors=‘strict‘) encoded = {} for k, v in result.items(): encoded[k] = [i.encode(‘latin1‘) for i in v] return encoded_UTF8_TYPES = (bytes_type, type(None))def utf8(value): """Converts a string argument to a byte string. If the argument is already a byte string or None, it is returned unchanged. Otherwise it must be a unicode string and is encoded as utf8. """ if isinstance(value, _UTF8_TYPES): return value assert isinstance(value, unicode_type) return value.encode("utf-8")_TO_UNICODE_TYPES = (unicode_type, type(None))def to_unicode(value): """Converts a string argument to a unicode string. If the argument is already a unicode string or None, it is returned unchanged. Otherwise it must be a byte string and is decoded as utf8. """ if isinstance(value, _TO_UNICODE_TYPES): return value assert isinstance(value, bytes_type) return value.decode("utf-8")if __name__ == ‘__main__‘: args = sys.argv[1:] if len(args) != 2: sys.exit(1) if args[0] == ‘encode‘: print utf8(url_escape(args[1])) sys.exit(0) if args[0] == ‘decode‘: print utf8(url_unescape(args[1])) sys.exit(0)
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。