HEX
Server: Apache/2.4.52 (Ubuntu)
System: Linux spn-python 5.15.0-89-generic #99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023 x86_64
User: arjun (1000)
PHP: 8.1.2-1ubuntu2.20
Disabled: NONE
Upload Files
File: //usr/local/lib/python3.10/dist-packages/charset_normalizer/__pycache__/md.cpython-310.pyc
o

;��gDN�@sRddlmZddlmZddlmZddlmZmZm	Z	ddl
mZmZm
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZGdd�d�ZGd	d
�d
e�ZGdd�de�ZGd
d�de�ZGdd�de�Z Gdd�de�Z!Gdd�de�Z"Gdd�de�Z#Gdd�de�Z$Gdd�de�Z%edd�d/d"d#��Z&ed$d�	&d0d1d,d-��Z'd.S)2�)�annotations)�	lru_cache)�	getLogger�)�COMMON_SAFE_ASCII_CHARACTERS�TRACE�UNICODE_SECONDARY_RANGE_KEYWORD)�is_accentuated�	is_arabic�is_arabic_isolated_form�is_case_variable�is_cjk�is_emoticon�	is_hangul�is_hiragana�is_katakana�is_latin�is_punctuation�is_separator�	is_symbol�is_thai�is_unprintable�
remove_accent�
unicode_rangec@s<eZdZdZddd�Zdd	d
�Zddd�Zeddd��ZdS)�MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    �	character�str�return�boolcC�t�)z@
        Determine if given character should be fed in.
        ��NotImplementedError��selfr�r$�@/usr/local/lib/python3.10/dist-packages/charset_normalizer/md.py�eligible&�zMessDetectorPlugin.eligible�NonecCr)z�
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        r r"r$r$r%�feed,szMessDetectorPlugin.feedcCr)zB
        Permit to reset the plugin to the initial state.
        r �r#r$r$r%�reset3r'zMessDetectorPlugin.reset�floatcCr)z�
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        r r*r$r$r%�ratio9szMessDetectorPlugin.ratioN�rrrr�rrrr(�rr(�rr,)	�__name__�
__module__�__qualname__�__doc__r&r)r+�propertyr-r$r$r$r%r s


rc@�BeZdZddd�Zddd	�Zdd
d�Zddd
�Zeddd��ZdS)� TooManySymbolOrPunctuationPluginrr(cCs"d|_d|_d|_d|_d|_dS)NrF)�_punctuation_count�
_symbol_count�_character_count�_last_printable_char�_frenzy_symbol_in_wordr*r$r$r%�__init__Cs

z)TooManySymbolOrPunctuationPlugin.__init__rrrcC�|��S�N��isprintabler"r$r$r%r&K�z)TooManySymbolOrPunctuationPlugin.eligiblecCsp|jd7_||jkr3|tvr3t|�r|jd7_n|��dur3t|�r3t|�dur3|jd7_||_dS)NrF�)	r;r<rrr9�isdigitrrr:r"r$r$r%r)Ns
�
z%TooManySymbolOrPunctuationPlugin.feedcCsd|_d|_d|_dS�Nr)r9r;r:r*r$r$r%r+`�
z&TooManySymbolOrPunctuationPlugin.resetr,cCs0|jdkrdS|j|j|j}|dkr|SdS)Nr��333333�?)r;r9r:)r#�ratio_of_punctuationr$r$r%r-es

�z&TooManySymbolOrPunctuationPlugin.ratioNr0r.r/r1�	r2r3r4r>r&r)r+r6r-r$r$r$r%r8Bs



r8c@r7)�TooManyAccentuatedPluginrr(cC�d|_d|_dSrF�r;�_accentuated_countr*r$r$r%r>r�
z!TooManyAccentuatedPlugin.__init__rrrcCr?r@)�isalphar"r$r$r%r&vrCz!TooManyAccentuatedPlugin.eligiblecC�,|jd7_t|�r|jd7_dSdS�Nr)r;r	rOr"r$r$r%r)y��zTooManyAccentuatedPlugin.feedcCrMrFrNr*r$r$r%r+rPzTooManyAccentuatedPlugin.resetr,cCs*|jdkrdS|j|j}|dkr|SdS)N�rHgffffff�?rN)r#�ratio_of_accentuationr$r$r%r-�s
zTooManyAccentuatedPlugin.ratioNr0r.r/r1rKr$r$r$r%rLqs



rLc@r7)�UnprintablePluginrr(cCrMrF)�_unprintable_countr;r*r$r$r%r>�rPzUnprintablePlugin.__init__rrrcC�dS�NTr$r"r$r$r%r&��zUnprintablePlugin.eligiblecCs(t|�r|jd7_|jd7_dSrS)rrXr;r"r$r$r%r)�szUnprintablePlugin.feedcCs
d|_dSrF)rXr*r$r$r%r+�s
zUnprintablePlugin.resetr,cC�|jdkrdS|jd|jS)NrrHrU)r;rXr*r$r$r%r-��
zUnprintablePlugin.ratioNr0r.r/r1rKr$r$r$r%rW�s



rWc@r7)�SuspiciousDuplicateAccentPluginrr(cC�d|_d|_d|_dSrF��_successive_countr;�_last_latin_characterr*r$r$r%r>�s
z(SuspiciousDuplicateAccentPlugin.__init__rrrcCs|��ot|�Sr@)rQrr"r$r$r%r&�sz(SuspiciousDuplicateAccentPlugin.eligiblecCst|jd7_|jdur5t|�r5t|j�r5|��r%|j��r%|jd7_t|�t|j�kr5|jd7_||_dSrS)r;rbr	�isupperrarr"r$r$r%r)�s
��
z$SuspiciousDuplicateAccentPlugin.feedcCr_rFr`r*r$r$r%r+�rGz%SuspiciousDuplicateAccentPlugin.resetr,cCr\)NrrHrD)r;rar*r$r$r%r-�r]z%SuspiciousDuplicateAccentPlugin.ratioNr0r.r/r1rKr$r$r$r%r^�s



r^c@r7)�SuspiciousRangerr(cCr_rF)�"_suspicious_successive_range_countr;�_last_printable_seenr*r$r$r%r>�rGzSuspiciousRange.__init__rrrcCr?r@rAr"r$r$r%r&�rCzSuspiciousRange.eligiblecCsx|jd7_|��st|�s|tvrd|_dS|jdur"||_dSt|j�}t|�}t||�r7|jd7_||_dSrS)r;�isspacerrrfr� is_suspiciously_successive_rangere)r#r�unicode_range_a�unicode_range_br$r$r%r)�s ��



zSuspiciousRange.feedcCr_rF)r;rerfr*r$r$r%r+�rGzSuspiciousRange.resetr,cCs"|jdkrdS|jd|j}|S)N�
rHrD)r;re)r#�ratio_of_suspicious_range_usager$r$r%r-�s
�zSuspiciousRange.ratioNr0r.r/r1rKr$r$r$r%rd�s



rdc@r7)�SuperWeirdWordPluginrr(cCs@d|_d|_d|_d|_d|_d|_d|_d|_d|_d|_	dS)NrF�)
�_word_count�_bad_word_count�_foreign_long_count�_is_current_word_bad�_foreign_long_watchr;�_bad_character_count�_buffer�_buffer_accent_count�_buffer_glyph_countr*r$r$r%r>�s
zSuperWeirdWordPlugin.__init__rrrcCrYrZr$r"r$r$r%r&
r[zSuperWeirdWordPlugin.eligiblecCs�|��rc|j|7_t|�r|jd7_|jdurFt|�dus%t|�rFt|�durFt|�durFt|�durFt	|�durFt
|�durFd|_t|�sZt|�sZt|�sZt	|�sZt
|�ra|jd7_dS|jshdS|��sut
|�sut|��r$|j�r$|jd7_t|j�}|j|7_|dkr�|j|dkr�d|_n4t|jd�r�|jd��r�tdd�|jD��dur�|jd7_d|_n|jdkr�d|_|jd7_|d	kr�|jr�d
d�t|jtd|��D�}d}|r�t|�|d
kr�d}|s�|jd7_d|_|j�r|jd7_|jt|j�7_d|_d|_d|_d|_d|_dS|dv�rA|��du�rCt|��rEd|_|j|7_dSdSdSdS)NrFT�g�?���css�|]}|��VqdSr@�rc)�.0�_r$r$r%�	<genexpr>7s�z,SuperWeirdWordPlugin.feed.<locals>.<genexpr>�cSsg|]
\}}|��r|�qSr$rz)r{�c�ir$r$r%�
<listcomp>?s��z-SuperWeirdWordPlugin.feed.<locals>.<listcomp>rrIrn>r|�-�<�=�>�|�~)rQrur	rvrsrr
rrrrrwrgrrro�lenr;rrrc�allrq�zip�rangerprtrEr)r#r�
buffer_length�camel_case_dst�probable_camel_casedr$r$r%r)
s�
���������
��
�

��zSuperWeirdWordPlugin.feedcCs4d|_d|_d|_d|_d|_d|_d|_d|_dS)NrnFr)rurrrsrpror;rtrqr*r$r$r%r+^s
zSuperWeirdWordPlugin.resetr,cCs$|jdkr|jdkrdS|j|jS)N�
rrH)rorqrtr;r*r$r$r%r-hszSuperWeirdWordPlugin.ratioNr0r.r/r1rKr$r$r$r%rm�s



Q
rmc@sFeZdZdZddd�Zdd	d
�Zddd�Zdd
d�Zeddd��Z	dS)�CjkInvalidStopPluginu�
    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
    can be easily detected. Searching for the overuse of '丅' and '丄'.
    rr(cCrMrF��_wrong_stop_count�_cjk_character_countr*r$r$r%r>vrPzCjkInvalidStopPlugin.__init__rrrcCrYrZr$r"r$r$r%r&zr[zCjkInvalidStopPlugin.eligiblecCs8|dvr
|jd7_dSt|�r|jd7_dSdS)N>�丄�丅r)r�r
r�r"r$r$r%r)}s�zCjkInvalidStopPlugin.feedcCrMrFr�r*r$r$r%r+�rPzCjkInvalidStopPlugin.resetr,cCs|jdkrdS|j|jS)N�rH)r�r�r*r$r$r%r-�s
zCjkInvalidStopPlugin.ratioNr0r.r/r1)
r2r3r4r5r>r&r)r+r6r-r$r$r$r%r�ps



r�c@r7)�ArchaicUpperLowerPluginrr(cCs.d|_d|_d|_d|_d|_d|_d|_dS)NFrT)�_buf�_character_count_since_last_sep�_successive_upper_lower_count�#_successive_upper_lower_count_finalr;�_last_alpha_seen�_current_ascii_onlyr*r$r$r%r>�s
z ArchaicUpperLowerPlugin.__init__rrrcCrYrZr$r"r$r$r%r&�r[z ArchaicUpperLowerPlugin.eligiblecCs$|��ot|�}|du}|rC|jdkrC|jdkr+|��dur+|jdur+|j|j7_d|_d|_d|_d|_|j	d7_	d|_dS|jdurQ|�
�durQd|_|jdur|��r_|j��sh|��r||j��r||jdurx|jd7_d|_nd|_nd|_|j	d7_	|jd7_||_dS)NFr�@rTrD)
rQrr�rEr�r�r�r�r�r;�isasciirc�islower)r#r�is_concerned�	chunk_sepr$r$r%r)�s@

�
��

zArchaicUpperLowerPlugin.feedcCs.d|_d|_d|_d|_d|_d|_d|_dS)NrFT)r;r�r�r�r�r�r�r*r$r$r%r+�s
zArchaicUpperLowerPlugin.resetr,cCs|jdkrdS|j|jS)NrrH)r;r�r*r$r$r%r-�s
zArchaicUpperLowerPlugin.ratioNr0r.r/r1rKr$r$r$r%r��s



*	r�c@sBeZdZddd�Zddd�Zdd
d�Zddd
�Zeddd��ZdS)�ArabicIsolatedFormPluginrr(cCrMrF�r;�_isolated_form_countr*r$r$r%r>�rPz!ArabicIsolatedFormPlugin.__init__cCrMrFr�r*r$r$r%r+�rPzArabicIsolatedFormPlugin.resetrrrcCst|�Sr@)r
r"r$r$r%r&�rCz!ArabicIsolatedFormPlugin.eligiblecCrRrS)r;rr�r"r$r$r%r)�rTzArabicIsolatedFormPlugin.feedr,cCs|jdkrdS|j|j}|S)NrUrHr�)r#�isolated_form_usager$r$r%r-�s
zArabicIsolatedFormPlugin.ratioNr0r.r/r1)	r2r3r4r>r+r&r)r6r-r$r$r$r%r��s



r��)�maxsizeri�
str | NonerjrrcCsv|dus|dur
dS||krdSd|vrd|vrdSd|vs"d|vr$dSd|vs,d|vr6d|vs4d|vr6dS|�d�|�d�}}|D]}|tvrJqC||vrQdSqC|dv|dv}}|s_|rid	|vsgd	|vridS|ro|rodSd
|vswd
|vr�d	|vsd	|vr�dS|dks�|dkr�dSd	|vs�d	|vs�|dvr�|dvr�d
|vs�d
|vr�dSd|vs�d|vr�dS|dks�|dkr�dSdS)za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTF�Latin�	Emoticons�	Combining� )�Hiragana�Katakana�CJK�HangulzBasic Latin)r�r��Punctuation�Forms)�splitr)rirj�keywords_range_a�keywords_range_b�el�range_a_jp_chars�range_b_jp_charsr$r$r%rh�sZ����rhi皙�����?F�decoded_sequencer�maximum_thresholdr,�debugc	CsRdd�t��D�}t|�d}d}|dkrd}n	|dkrd}nd	}t|d
t|��D]2\}}|D]}	|	�|�r<|	�|�q0|dkrG||dksM||dkr\tdd
�|D��}||kr\nq*|r�td�}
|
�	t
d|�d|�d|���t|�dkr�|
�	t
d|dd����|
�	t
d|dd����|D]}|
�	t
|j�d|j���q�t
|d�S)zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    cSsg|]}|��qSr$r$)r{�md_classr$r$r%r�Is�zmess_ratio.<locals>.<listcomp>rrHi� r�r���
rcss�|]}|jVqdSr@)r-)r{�dtr$r$r%r}`s�zmess_ratio.<locals>.<genexpr>�charset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=r�zStarting with: Nz
Ending with: i�z: �)r�__subclasses__r�r�r�r&r)�sumr�logr�	__class__r-�round)r�r�r��	detectors�length�mean_mess_ratio�!intermediary_mean_mess_ratio_calcr�index�detector�loggerr�r$r$r%�
mess_ratioAsN�

������
r�N)rir�rjr�rr)r�F)r�rr�r,r�rrr,)(�
__future__r�	functoolsr�loggingr�constantrrr�utilsr	r
rrr
rrrrrrrrrrrrrr8rLrWr^rdrmr�r�r�rhr�r$r$r$r%�<module>s(L"/%1vLI�