Files
unshackle/unshackle/utils/base62.py
2025-07-18 00:46:05 +00:00

119 lines
2.9 KiB
Python

# -*- coding: utf-8 -*-
"""
base62
~~~~~~
Originated from http://blog.suminb.com/archives/558
"""
__title__ = "base62"
__author__ = "Sumin Byeon"
__email__ = "suminb@gmail.com"
__version__ = "1.0.0"
BASE = 62
CHARSET_DEFAULT = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
CHARSET_INVERTED = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
def encode(n, charset=CHARSET_DEFAULT):
"""Encodes a given integer ``n``."""
chs = []
while n > 0:
n, r = divmod(n, BASE)
chs.insert(0, charset[r])
if not chs:
return "0"
return "".join(chs)
def encodebytes(barray, charset=CHARSET_DEFAULT):
"""Encodes a bytestring into a base62 string.
:param barray: A byte array
:type barray: bytes
:rtype: str
"""
_check_type(barray, bytes)
# Count the number of leading zeros.
leading_zeros_count = 0
for i in range(len(barray)):
if barray[i] != 0:
break
leading_zeros_count += 1
# Encode the leading zeros as "0" followed by a character indicating the count.
# This pattern may occur several times if there are many leading zeros.
n, r = divmod(leading_zeros_count, len(charset) - 1)
zero_padding = f"0{charset[-1]}" * n
if r:
zero_padding += f"0{charset[r]}"
# Special case: the input is empty, or is entirely null bytes.
if leading_zeros_count == len(barray):
return zero_padding
value = encode(int.from_bytes(barray, "big"), charset=charset)
return zero_padding + value
def decode(encoded, charset=CHARSET_DEFAULT):
"""Decodes a base62 encoded value ``encoded``.
:type encoded: str
:rtype: int
"""
_check_type(encoded, str)
length, i, v = len(encoded), 0, 0
for x in encoded:
v += _value(x, charset=charset) * (BASE ** (length - (i + 1)))
i += 1
return v
def decodebytes(encoded, charset=CHARSET_DEFAULT):
"""Decodes a string of base62 data into a bytes object.
:param encoded: A string to be decoded in base62
:type encoded: str
:rtype: bytes
"""
leading_null_bytes = b""
while encoded.startswith("0") and len(encoded) >= 2:
leading_null_bytes += b"\x00" * _value(encoded[1], charset)
encoded = encoded[2:]
decoded = decode(encoded, charset=charset)
buf = bytearray()
while decoded > 0:
buf.append(decoded & 0xFF)
decoded //= 256
buf.reverse()
return leading_null_bytes + bytes(buf)
def _value(ch, charset):
"""Decodes an individual digit of a base62 encoded string."""
try:
return charset.index(ch)
except ValueError:
raise ValueError("base62: Invalid character (%s)" % ch)
def _check_type(value, expected_type):
"""Checks if the input is in an appropriate type."""
if not isinstance(value, expected_type):
msg = "Expected {} object, not {}".format(expected_type, value.__class__.__name__)
raise TypeError(msg)