test_codecencodings_cn.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. #
  2. # test_codecencodings_cn.py
  3. # Codec encoding tests for PRC encodings.
  4. #
  5. from test import multibytecodec_support
  6. import unittest
  7. class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
  8. encoding = 'gb2312'
  9. tstring = multibytecodec_support.load_teststring('gb2312')
  10. codectests = (
  11. # invalid bytes
  12. (b"abc\x81\x81\xc1\xc4", "strict", None),
  13. (b"abc\xc8", "strict", None),
  14. (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
  15. (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
  16. (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
  17. (b"\xc1\x64", "strict", None),
  18. )
  19. class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
  20. encoding = 'gbk'
  21. tstring = multibytecodec_support.load_teststring('gbk')
  22. codectests = (
  23. # invalid bytes
  24. (b"abc\x80\x80\xc1\xc4", "strict", None),
  25. (b"abc\xc8", "strict", None),
  26. (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
  27. (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
  28. (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
  29. (b"\x83\x34\x83\x31", "strict", None),
  30. ("\u30fb", "strict", None),
  31. )
  32. class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
  33. encoding = 'gb18030'
  34. tstring = multibytecodec_support.load_teststring('gb18030')
  35. codectests = (
  36. # invalid bytes
  37. (b"abc\x80\x80\xc1\xc4", "strict", None),
  38. (b"abc\xc8", "strict", None),
  39. (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
  40. (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
  41. (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
  42. (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
  43. ("\u30fb", "strict", b"\x819\xa79"),
  44. (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
  45. (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
  46. (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
  47. # issue29990
  48. (b"\xff\x30\x81\x30", "strict", None),
  49. (b"\x81\x30\xff\x30", "strict", None),
  50. (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"),
  51. (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'),
  52. (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"),
  53. )
  54. has_iso10646 = True
  55. class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
  56. encoding = 'hz'
  57. tstring = multibytecodec_support.load_teststring('hz')
  58. codectests = (
  59. # test '~\n' (3 lines)
  60. (b'This sentence is in ASCII.\n'
  61. b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
  62. b'~{NpJ)l6HK!#~}Bye.\n',
  63. 'strict',
  64. 'This sentence is in ASCII.\n'
  65. 'The next sentence is in GB.'
  66. '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
  67. 'Bye.\n'),
  68. # test '~\n' (4 lines)
  69. (b'This sentence is in ASCII.\n'
  70. b'The next sentence is in GB.~\n'
  71. b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
  72. b'Bye.\n',
  73. 'strict',
  74. 'This sentence is in ASCII.\n'
  75. 'The next sentence is in GB.'
  76. '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
  77. 'Bye.\n'),
  78. # invalid bytes
  79. (b'ab~cd', 'replace', 'ab\uFFFDcd'),
  80. (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
  81. (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
  82. (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
  83. (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
  84. # issue 30003
  85. ('ab~cd', 'strict', b'ab~~cd'), # escape ~
  86. (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
  87. (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
  88. )
  89. if __name__ == "__main__":
  90. unittest.main()