test_codecencodings_kr.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #
  2. # test_codecencodings_kr.py
  3. # Codec encoding tests for ROK encodings.
  4. #
  5. from test import multibytecodec_support
  6. import unittest
  7. class Test_CP949(multibytecodec_support.TestBase, unittest.TestCase):
  8. encoding = 'cp949'
  9. tstring = multibytecodec_support.load_teststring('cp949')
  10. codectests = (
  11. # invalid bytes
  12. (b"abc\x80\x80\xc1\xc4", "strict", None),
  13. (b"abc\xc8", "strict", None),
  14. (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
  15. (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
  16. (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
  17. )
  18. class Test_EUCKR(multibytecodec_support.TestBase, unittest.TestCase):
  19. encoding = 'euc_kr'
  20. tstring = multibytecodec_support.load_teststring('euc_kr')
  21. codectests = (
  22. # invalid bytes
  23. (b"abc\x80\x80\xc1\xc4", "strict", None),
  24. (b"abc\xc8", "strict", None),
  25. (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
  26. (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
  27. (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
  28. # composed make-up sequence errors
  29. (b"\xa4\xd4", "strict", None),
  30. (b"\xa4\xd4\xa4", "strict", None),
  31. (b"\xa4\xd4\xa4\xb6", "strict", None),
  32. (b"\xa4\xd4\xa4\xb6\xa4", "strict", None),
  33. (b"\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None),
  34. (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
  35. (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
  36. (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
  37. (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
  38. (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
  39. (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
  40. (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
  41. (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'),
  42. (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'),
  43. (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'),
  44. (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'),
  45. (b"\xc1\xc4", "strict", "\uc894"),
  46. )
  47. class Test_JOHAB(multibytecodec_support.TestBase, unittest.TestCase):
  48. encoding = 'johab'
  49. tstring = multibytecodec_support.load_teststring('johab')
  50. codectests = (
  51. # invalid bytes
  52. (b"abc\x80\x80\xc1\xc4", "strict", None),
  53. (b"abc\xc8", "strict", None),
  54. (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
  55. (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
  56. (b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
  57. (b"\xD8abc", "replace", "\uFFFDabc"),
  58. (b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"),
  59. (b"\x84bxy", "replace", "\uFFFDbxy"),
  60. (b"\x8CBxy", "replace", "\uFFFDBxy"),
  61. )
  62. if __name__ == "__main__":
  63. unittest.main()