File manager - Edit - /opt/gsutil/third_party/charset_normalizer/tests/test_large_payload.py
Back
import pytest from charset_normalizer import from_bytes from charset_normalizer.constant import TOO_BIG_SEQUENCE def test_large_payload_u8_sig_basic_entry(): payload = ('0' * TOO_BIG_SEQUENCE).encode("utf_8_sig") best_guess = from_bytes(payload).best() assert best_guess is not None, "Large U8 payload case detection completely failed" assert best_guess.encoding == "utf_8", "Large U8 payload case detection wrongly detected!" assert best_guess.bom is True, "SIG/BOM property should be True" assert len(best_guess.raw) == len(payload), "Large payload should remain untouched when accessed through .raw" assert best_guess._string is not None, "str should be decoded before direct access (sig available)" def test_large_payload_ascii_basic_entry(): payload = ('0' * TOO_BIG_SEQUENCE).encode("utf_8") best_guess = from_bytes(payload).best() assert best_guess is not None, "Large ASCII payload case detection completely failed" assert best_guess.encoding == "ascii", "Large ASCII payload case detection wrongly detected!" assert best_guess.bom is False, "SIG/BOM property should be False" assert len(best_guess.raw) == len(payload), "Large payload should remain untouched when accessed through .raw" assert best_guess._string is None, "str should not be decoded until direct access" def test_misleading_large_sequence(): content = (("hello simple ascii " * TOO_BIG_SEQUENCE) + ('我没有埋怨,磋砣的只是一些时间。 磋砣的只是一些时间。')) .encode('utf_8') guesses = from_bytes(content) assert len(guesses) > 0 match = guesses.best() assert match is not None assert match._string is not None, "str should be cached as only match" assert match.encoding == 'utf_8' assert str(match) is not None
| ver. 1.4 |
Github
|
.
| PHP 8.2.28 | Generation time: 0.02 |
proxy
|
phpinfo
|
Settings