...

# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)

import stringprep, re, codecs
from unicodedata import ucd_3_2_0 as unicodedata

# IDNA section 3.1
dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")

# IDNA section 5
ace_prefix = b"xn--"
sace_prefix = "xn--"

# This assumes query strings, so AllowUnassigned is true
def nameprep(label):
    # Map
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            # Map to nothing
            continue
        newlabel.append(stringprep.map_table_b2(c))
    label = "".join(newlabel)

# Normalize
    label = unicodedata.normalize("NFKC", label)

# Prohibit
    for c in label:
        if stringprep.in_table_c12(c) or \
           stringprep.in_table_c22(c) or \
           stringprep.in_table_c3(c) or \
           stringprep.in_table_c4(c) or \
           stringprep.in_table_c5(c) or \
           stringprep.in_table_c6(c) or \
           stringprep.in_table_c7(c) or \
           stringprep.in_table_c8(c) or \
           stringprep.in_table_c9(c):
            raise UnicodeError("Invalid character %r" % c)

# Check bidi
    RandAL = [stringprep.in_table_d1(x) for x in label]
    if any(RandAL):
        # There is a RandAL char in the string. Must perform further
        # tests:
        # 1) The characters in section 5.8 MUST be prohibited.
        # This is table C.8, which was already checked
        # 2) If a string contains any RandALCat character, the string
        # MUST NOT contain any LCat character.
        if any(stringprep.in_table_d2(x) for x in label):
            raise UnicodeError("Violation of BIDI requirement 2")
        # 3) If a string contains any RandALCat character, a
        # RandALCat character MUST be the first character of the
        # string, and a RandALCat character MUST be the last
        # character of the string.
        if not RandAL[0] or not RandAL[-1]:
            raise UnicodeError("Violation of BIDI requirement 3")

return label

def ToASCII(label):
    try:
        # Step 1: try ASCII
        label = label.encode("ascii")
    except UnicodeError:
        pass
    else:
        # Skip to step 3: UseSTD3ASCIIRules is false, so
        # Skip to step 8.
        if 0 < len(label) < 64:
            return label
        raise UnicodeError("label empty or too long")

# Step 2: nameprep
    label = nameprep(label)

# Step 3: UseSTD3ASCIIRules is false
    # Step 4: try ASCII
    try:
        label = label.encode("ascii")
    except UnicodeError:
        pass
    else:
        # Skip to step 8.
        if 0 < len(label) < 64:
            return label
        raise UnicodeError("label empty or too long")

# Step 5: Check ACE prefix
    if label.startswith(sace_prefix):
        raise UnicodeError("Label starts with ACE prefix")

# Step 6: Encode with PUNYCODE
    label = label.encode("punycode")

# Step 7: Prepend ACE prefix
    label = ace_prefix + label

# Step 8: Check size
    if 0 < len(label) < 64:
        return label
    raise UnicodeError("label empty or too long")

def ToUnicode(label):
    # Step 1: Check for ASCII
    if isinstance(label, bytes):
        pure_ascii = True
    else:
        try:
            label = label.encode("ascii")
            pure_ascii = True
        except UnicodeError:
            pure_ascii = False
    if not pure_ascii:
        # Step 2: Perform nameprep
        label = nameprep(label)
        # It doesn't say this, but apparently, it should be ASCII now
        try:
            label = label.encode("ascii")
        except UnicodeError:
            raise UnicodeError("Invalid character in IDN label")
    # Step 3: Check for ACE prefix
    if not label.startswith(ace_prefix):
        return str(label, "ascii")

# Step 4: Remove ACE prefix
    label1 = label[len(ace_prefix):]

# Step 5: Decode using PUNYCODE
    result = label1.decode("punycode")

# Step 6: Apply ToASCII
    label2 = ToASCII(result)

# Step 7: Compare the result of step 6 with the one of step 3
    # label2 will already be in lower case.
    if str(label, "ascii").lower() != str(label2, "ascii"):
        raise UnicodeError("IDNA does not round-trip", label, label2)

# Step 8: return the result of step 5
    return result

### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):

if errors != 'strict':
            # IDNA is quite clear that implementations must be strict
            raise UnicodeError("unsupported error handling "+errors)

if not input:
            return b'', 0

try:
            result = input.encode('ascii')
        except UnicodeEncodeError:
            pass
        else:
            # ASCII name: fast path
            labels = result.split(b'.')
            for label in labels[:-1]:
                if not (0 < len(label) < 64):
                    raise UnicodeError("label empty or too long")
            if len(labels[-1]) >= 64:
                raise UnicodeError("label too long")
            return result, len(input)

result = bytearray()
        labels = dots.split(input)
        if labels and not labels[-1]:
            trailing_dot = b'.'
            del labels[-1]
        else:
            trailing_dot = b''
        for label in labels:
            if result:
                # Join with U+002E
                result.extend(b'.')
            result.extend(ToASCII(label))
        return bytes(result+trailing_dot), len(input)

def decode(self, input, errors='strict'):

if errors != 'strict':
            raise UnicodeError("Unsupported error handling "+errors)

if not input:
            return "", 0

# IDNA allows decoding to operate on Unicode strings, too.
        if not isinstance(input, bytes):
            # XXX obviously wrong, see #3232
            input = bytes(input)

if ace_prefix not in input:
            # Fast path
            try:
                return input.decode('ascii'), len(input)
            except UnicodeDecodeError:
                pass

labels = input.split(b".")

if labels and len(labels[-1]) == 0:
            trailing_dot = '.'
            del labels[-1]
        else:
            trailing_dot = ''

result = []
        for label in labels:
            result.append(ToUnicode(label))

return ".".join(result)+trailing_dot, len(input)

class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
    def _buffer_encode(self, input, errors, final):
        if errors != 'strict':
            # IDNA is quite clear that implementations must be strict
            raise UnicodeError("unsupported error handling "+errors)

if not input:
            return (b'', 0)

labels = dots.split(input)
        trailing_dot = b''
        if labels:
            if not labels[-1]:
                trailing_dot = b'.'
                del labels[-1]
            elif not final:
                # Keep potentially unfinished label until the next call
                del labels[-1]
                if labels:
                    trailing_dot = b'.'

result = bytearray()
        size = 0
        for label in labels:
            if size:
                # Join with U+002E
                result.extend(b'.')
                size += 1
            result.extend(ToASCII(label))
            size += len(label)

result += trailing_dot
        size += len(trailing_dot)
        return (bytes(result), size)

class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
    def _buffer_decode(self, input, errors, final):
        if errors != 'strict':
            raise UnicodeError("Unsupported error handling "+errors)

if not input:
            return ("", 0)

# IDNA allows decoding to operate on Unicode strings, too.
        if isinstance(input, str):
            labels = dots.split(input)
        else:
            # Must be ASCII string
            input = str(input, "ascii")
            labels = input.split(".")

trailing_dot = ''
        if labels:
            if not labels[-1]:
                trailing_dot = '.'
                del labels[-1]
            elif not final:
                # Keep potentially unfinished label until the next call
                del labels[-1]
                if labels:
                    trailing_dot = '.'

result = []
        size = 0
        for label in labels:
            result.append(ToUnicode(label))
            if size:
                size += 1
            size += len(label)

result = ".".join(result) + trailing_dot
        size += len(trailing_dot)
        return (result, size)

class StreamWriter(Codec,codecs.StreamWriter):
    pass

class StreamReader(Codec,codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return codecs.CodecInfo(
        name='idna',
        encode=Codec().encode,
        decode=Codec().decode,
        incrementalencoder=IncrementalEncoder,
        incrementaldecoder=IncrementalDecoder,
        streamwriter=StreamWriter,
        streamreader=StreamReader,
    )

.	Edit
..	Edit
__init__.py	Edit
__pycache__	Edit
aliases.py	Edit
ascii.py	Edit
base64_codec.py	Edit
big5.py	Edit
big5hkscs.py	Edit
bz2_codec.py	Edit
charmap.py	Edit
cp037.pyc	Edit
cp1006.pyc	Edit
cp1026.pyc	Edit
cp1125.py	Edit
cp1140.pyc	Edit
cp1250.pyc	Edit
cp1251.pyc	Edit
cp1252.pyc	Edit
cp1253.pyc	Edit
cp1254.pyc	Edit
cp1255.pyc	Edit
cp1256.pyc	Edit
cp1257.pyc	Edit
cp1258.pyc	Edit
cp273.pyc	Edit
cp424.pyc	Edit
cp437.pyc	Edit
cp500.pyc	Edit
cp720.py	Edit
cp737.pyc	Edit
cp775.pyc	Edit
cp850.pyc	Edit
cp852.pyc	Edit
cp855.pyc	Edit
cp856.pyc	Edit
cp857.pyc	Edit
cp858.py	Edit
cp860.pyc	Edit
cp861.pyc	Edit
cp862.pyc	Edit
cp863.pyc	Edit
cp864.pyc	Edit
cp865.pyc	Edit
cp866.pyc	Edit
cp869.pyc	Edit
cp874.pyc	Edit
cp875.pyc	Edit
cp932.py	Edit
cp949.py	Edit
cp950.py	Edit
euc_jis_2004.py	Edit
euc_jisx0213.py	Edit
euc_jp.py	Edit
euc_kr.py	Edit
gb18030.py	Edit
gb2312.py	Edit
gbk.py	Edit
hex_codec.py	Edit
hp_roman8.pyc	Edit
hz.py	Edit
idna.py	Edit
iso2022_jp.py	Edit
iso2022_jp_1.py	Edit
iso2022_jp_2.py	Edit
iso2022_jp_2004.py	Edit
iso2022_jp_3.py	Edit
iso2022_jp_ext.py	Edit
iso2022_kr.py	Edit
iso8859_1.pyc	Edit
iso8859_10.pyc	Edit
iso8859_11.pyc	Edit
iso8859_13.pyc	Edit
iso8859_14.pyc	Edit
iso8859_15.pyc	Edit
iso8859_16.pyc	Edit
iso8859_2.pyc	Edit
iso8859_3.pyc	Edit
iso8859_4.pyc	Edit
iso8859_5.pyc	Edit
iso8859_6.pyc	Edit
iso8859_7.pyc	Edit
iso8859_8.pyc	Edit
iso8859_9.pyc	Edit
johab.py	Edit
koi8_r.pyc	Edit
koi8_t.py	Edit
koi8_u.pyc	Edit
kz1048.pyc	Edit
latin_1.py	Edit
mac_arabic.pyc	Edit
mac_croatian.pyc	Edit
mac_cyrillic.pyc	Edit
mac_farsi.pyc	Edit
mac_greek.pyc	Edit
mac_iceland.pyc	Edit
mac_latin2.pyc	Edit
mac_roman.pyc	Edit
mac_romanian.pyc	Edit
mac_turkish.pyc	Edit
mbcs.py	Edit
oem.py	Edit
palmos.py	Edit
ptcp154.pyc	Edit
punycode.py	Edit
quopri_codec.py	Edit
raw_unicode_escape.py	Edit
rot_13.py	Edit
shift_jis.py	Edit
shift_jis_2004.py	Edit
shift_jisx0213.py	Edit
tis_620.pyc	Edit
undefined.py	Edit
unicode_escape.py	Edit
utf_16.py	Edit
utf_16_be.py	Edit
utf_16_le.py	Edit
utf_32.py	Edit
utf_32_be.py	Edit
utf_32_le.py	Edit
utf_7.py	Edit
utf_8.py	Edit
utf_8_sig.py	Edit
uu_codec.py	Edit
zlib_codec.py	Edit