File: //usr/local/lib/python3.10/dist-packages/tiktoken/__pycache__/_educational.cpython-310.pyc
o
;��g% � @ sh d Z ddlmZ ddlZddlZddlZG dd� d�Z dd dd�Z dd!dd�Zd"dd�Z dd� Z
dS )#zJThis is an educational implementation of the byte pair encoding algorithm.� )�annotationsNc @ sZ e Zd Zd!dd�Zd"d#dd�Zd$dd�Zd%dd�Zd&dd�Zed'dd��Z edd� �Z
d S )(�SimpleBytePairEncoding�pat_str�str�mergeable_ranks�dict[bytes, int]�return�Nonec C s0 || _ || _dd� |�� D �| _t�|�| _dS )zCreates an Encoding object.c S s i | ]\}}||�qS � r
)�.0�token_bytes�tokenr
r
�@/usr/local/lib/python3.10/dist-packages/tiktoken/_educational.py�
<dictcomp> � z3SimpleBytePairEncoding.__init__.<locals>.<dictcomp>N)r r �items�_decoder�regex�compile�_pat)�selfr r r
r
r �__init__
s zSimpleBytePairEncoding.__init__�colour�text� visualise�
str | None� list[int]c C sB | j �|�}g }|D ]}|�d�}t| j||d�}|�|� q
|S )z`Encodes a string into tokens.
>>> enc.encode("hello world")
[388, 372]
�utf-8)r )r �findall�encode�
bpe_encoder �extend)r r r �words�tokens�word�
word_bytes�word_tokensr
r
r r s
zSimpleBytePairEncoding.encoder# �bytesc s d� � fdd�|D ��S )znDecodes a list of tokens into bytes.
>>> enc.decode_bytes([388, 372])
b'hello world'
� c 3 s � | ]}� j | V qd S �N�r �r r
�r r
r � <genexpr>- s � z6SimpleBytePairEncoding.decode_bytes.<locals>.<genexpr>)�join�r r# r
r, r �decode_bytes'