/usr/share/cagefs-skeleton/usr/lib64/python3.9/encodings
# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) import stringprep, re, codecs from unicodedata import ucd_3_2_0 as unicodedata # IDNA section 3.1 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") # IDNA section 5 ace_prefix = b"xn--" sace_prefix = "xn--" # This assumes query strings, so AllowUnassigned is true def nameprep(label): # Map newlabel = [] for c in label: if stringprep.in_table_b1(c): # Map to nothing continue newlabel.append(stringprep.map_table_b2(c)) label = "".join(newlabel) # Normalize label = unicodedata.normalize("NFKC", label) # Prohibit for c in label: if stringprep.in_table_c12(c) or \ stringprep.in_table_c22(c) or \ stringprep.in_table_c3(c) or \ stringprep.in_table_c4(c) or \ stringprep.in_table_c5(c) or \ stringprep.in_table_c6(c) or \ stringprep.in_table_c7(c) or \ stringprep.in_table_c8(c) or \ stringprep.in_table_c9(c): raise UnicodeError("Invalid character %r" % c) # Check bidi RandAL = [stringprep.in_table_d1(x) for x in label] if any(RandAL): # There is a RandAL char in the string. Must perform further # tests: # 1) The characters in section 5.8 MUST be prohibited. # This is table C.8, which was already checked # 2) If a string contains any RandALCat character, the string # MUST NOT contain any LCat character. if any(stringprep.in_table_d2(x) for x in label): raise UnicodeError("Violation of BIDI requirement 2") # 3) If a string contains any RandALCat character, a # RandALCat character MUST be the first character of the # string, and a RandALCat character MUST be the last # character of the string. if not RandAL[0] or not RandAL[-1]: raise UnicodeError("Violation of BIDI requirement 3") return label def ToASCII(label): try: # Step 1: try ASCII label = label.encode("ascii") except UnicodeError: pass else: # Skip to step 3: UseSTD3ASCIIRules is false, so # Skip to step 8. if 0 < len(label) < 64: return label raise UnicodeError("label empty or too long") # Step 2: nameprep label = nameprep(label) # Step 3: UseSTD3ASCIIRules is false # Step 4: try ASCII try: label = label.encode("ascii") except UnicodeError: pass else: # Skip to step 8. if 0 < len(label) < 64: return label raise UnicodeError("label empty or too long") # Step 5: Check ACE prefix if label.startswith(sace_prefix): raise UnicodeError("Label starts with ACE prefix") # Step 6: Encode with PUNYCODE label = label.encode("punycode") # Step 7: Prepend ACE prefix label = ace_prefix + label # Step 8: Check size if 0 < len(label) < 64: return label raise UnicodeError("label empty or too long") def ToUnicode(label): # Step 1: Check for ASCII if isinstance(label, bytes): pure_ascii = True else: try: label = label.encode("ascii") pure_ascii = True except UnicodeError: pure_ascii = False if not pure_ascii: # Step 2: Perform nameprep label = nameprep(label) # It doesn't say this, but apparently, it should be ASCII now try: label = label.encode("ascii") except UnicodeError: raise UnicodeError("Invalid character in IDN label") # Step 3: Check for ACE prefix if not label.startswith(ace_prefix): return str(label, "ascii") # Step 4: Remove ACE prefix label1 = label[len(ace_prefix):] # Step 5: Decode using PUNYCODE result = label1.decode("punycode") # Step 6: Apply ToASCII label2 = ToASCII(result) # Step 7: Compare the result of step 6 with the one of step 3 # label2 will already be in lower case. if str(label, "ascii").lower() != str(label2, "ascii"): raise UnicodeError("IDNA does not round-trip", label, label2) # Step 8: return the result of step 5 return result ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): if errors != 'strict': # IDNA is quite clear that implementations must be strict raise UnicodeError("unsupported error handling "+errors) if not input: return b'', 0 try: result = input.encode('ascii') except UnicodeEncodeError: pass else: # ASCII name: fast path labels = result.split(b'.') for label in labels[:-1]: if not (0 < len(label) < 64): raise UnicodeError("label empty or too long") if len(labels[-1]) >= 64: raise UnicodeError("label too long") return result, len(input) result = bytearray() labels = dots.split(input) if labels and not labels[-1]: trailing_dot = b'.' del labels[-1] else: trailing_dot = b'' for label in labels: if result: # Join with U+002E result.extend(b'.') result.extend(ToASCII(label)) return bytes(result+trailing_dot), len(input) def decode(self, input, errors='strict'): if errors != 'strict': raise UnicodeError("Unsupported error handling "+errors) if not input: return "", 0 # IDNA allows decoding to operate on Unicode strings, too. if not isinstance(input, bytes): # XXX obviously wrong, see #3232 input = bytes(input) if ace_prefix not in input: # Fast path try: return input.decode('ascii'), len(input) except UnicodeDecodeError: pass labels = input.split(b".") if labels and len(labels[-1]) == 0: trailing_dot = '.' del labels[-1] else: trailing_dot = '' result = [] for label in labels: result.append(ToUnicode(label)) return ".".join(result)+trailing_dot, len(input) class IncrementalEncoder(codecs.BufferedIncrementalEncoder): def _buffer_encode(self, input, errors, final): if errors != 'strict': # IDNA is quite clear that implementations must be strict raise UnicodeError("unsupported error handling "+errors) if not input: return (b'', 0) labels = dots.split(input) trailing_dot = b'' if labels: if not labels[-1]: trailing_dot = b'.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: trailing_dot = b'.' result = bytearray() size = 0 for label in labels: if size: # Join with U+002E result.extend(b'.') size += 1 result.extend(ToASCII(label)) size += len(label) result += trailing_dot size += len(trailing_dot) return (bytes(result), size) class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': raise UnicodeError("Unsupported error handling "+errors) if not input: return ("", 0) # IDNA allows decoding to operate on Unicode strings, too. if isinstance(input, str): labels = dots.split(input) else: # Must be ASCII string input = str(input, "ascii") labels = input.split(".") trailing_dot = '' if labels: if not labels[-1]: trailing_dot = '.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: trailing_dot = '.' result = [] size = 0 for label in labels: result.append(ToUnicode(label)) if size: size += 1 size += len(label) result = ".".join(result) + trailing_dot size += len(trailing_dot) return (result, size) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='idna', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, )
.
Edit
..
Edit
__init__.py
Edit
__pycache__
Edit
aliases.py
Edit
ascii.py
Edit
base64_codec.py
Edit
big5.py
Edit
big5hkscs.py
Edit
bz2_codec.py
Edit
charmap.py
Edit
cp037.pyc
Edit
cp1006.pyc
Edit
cp1026.pyc
Edit
cp1125.py
Edit
cp1140.pyc
Edit
cp1250.pyc
Edit
cp1251.pyc
Edit
cp1252.pyc
Edit
cp1253.pyc
Edit
cp1254.pyc
Edit
cp1255.pyc
Edit
cp1256.pyc
Edit
cp1257.pyc
Edit
cp1258.pyc
Edit
cp273.pyc
Edit
cp424.pyc
Edit
cp437.pyc
Edit
cp500.pyc
Edit
cp720.py
Edit
cp737.pyc
Edit
cp775.pyc
Edit
cp850.pyc
Edit
cp852.pyc
Edit
cp855.pyc
Edit
cp856.pyc
Edit
cp857.pyc
Edit
cp858.py
Edit
cp860.pyc
Edit
cp861.pyc
Edit
cp862.pyc
Edit
cp863.pyc
Edit
cp864.pyc
Edit
cp865.pyc
Edit
cp866.pyc
Edit
cp869.pyc
Edit
cp874.pyc
Edit
cp875.pyc
Edit
cp932.py
Edit
cp949.py
Edit
cp950.py
Edit
euc_jis_2004.py
Edit
euc_jisx0213.py
Edit
euc_jp.py
Edit
euc_kr.py
Edit
gb18030.py
Edit
gb2312.py
Edit
gbk.py
Edit
hex_codec.py
Edit
hp_roman8.pyc
Edit
hz.py
Edit
idna.py
Edit
iso2022_jp.py
Edit
iso2022_jp_1.py
Edit
iso2022_jp_2.py
Edit
iso2022_jp_2004.py
Edit
iso2022_jp_3.py
Edit
iso2022_jp_ext.py
Edit
iso2022_kr.py
Edit
iso8859_1.pyc
Edit
iso8859_10.pyc
Edit
iso8859_11.pyc
Edit
iso8859_13.pyc
Edit
iso8859_14.pyc
Edit
iso8859_15.pyc
Edit
iso8859_16.pyc
Edit
iso8859_2.pyc
Edit
iso8859_3.pyc
Edit
iso8859_4.pyc
Edit
iso8859_5.pyc
Edit
iso8859_6.pyc
Edit
iso8859_7.pyc
Edit
iso8859_8.pyc
Edit
iso8859_9.pyc
Edit
johab.py
Edit
koi8_r.pyc
Edit
koi8_t.py
Edit
koi8_u.pyc
Edit
kz1048.pyc
Edit
latin_1.py
Edit
mac_arabic.pyc
Edit
mac_croatian.pyc
Edit
mac_cyrillic.pyc
Edit
mac_farsi.pyc
Edit
mac_greek.pyc
Edit
mac_iceland.pyc
Edit
mac_latin2.pyc
Edit
mac_roman.pyc
Edit
mac_romanian.pyc
Edit
mac_turkish.pyc
Edit
mbcs.py
Edit
oem.py
Edit
palmos.py
Edit
ptcp154.pyc
Edit
punycode.py
Edit
quopri_codec.py
Edit
raw_unicode_escape.py
Edit
rot_13.py
Edit
shift_jis.py
Edit
shift_jis_2004.py
Edit
shift_jisx0213.py
Edit
tis_620.pyc
Edit
undefined.py
Edit
unicode_escape.py
Edit
utf_16.py
Edit
utf_16_be.py
Edit
utf_16_le.py
Edit
utf_32.py
Edit
utf_32_be.py
Edit
utf_32_le.py
Edit
utf_7.py
Edit
utf_8.py
Edit
utf_8_sig.py
Edit
uu_codec.py
Edit
zlib_codec.py
Edit