test_binascii.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. """Test the binascii C module."""
  2. import unittest
  3. import binascii
  4. import array
  5. import re
  6. from test.support import bigmemtest, _1G, _4G, warnings_helper
  7. # Note: "*_hex" functions are aliases for "(un)hexlify"
  8. b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_qp', 'b2a_uu',
  9. 'hexlify']
  10. a2b_functions = ['a2b_base64', 'a2b_hex', 'a2b_qp', 'a2b_uu',
  11. 'unhexlify']
  12. all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx']
  13. class BinASCIITest(unittest.TestCase):
  14. type2test = bytes
  15. # Create binary test data
  16. rawdata = b"The quick brown fox jumps over the lazy dog.\r\n"
  17. # Be slow so we don't depend on other modules
  18. rawdata += bytes(range(256))
  19. rawdata += b"\r\nHello world.\n"
  20. def setUp(self):
  21. self.data = self.type2test(self.rawdata)
  22. def test_exceptions(self):
  23. # Check module exceptions
  24. self.assertTrue(issubclass(binascii.Error, Exception))
  25. self.assertTrue(issubclass(binascii.Incomplete, Exception))
  26. def test_functions(self):
  27. # Check presence of all functions
  28. for name in all_functions:
  29. self.assertTrue(hasattr(getattr(binascii, name), '__call__'))
  30. self.assertRaises(TypeError, getattr(binascii, name))
  31. def test_returned_value(self):
  32. # Limit to the minimum of all limits (b2a_uu)
  33. MAX_ALL = 45
  34. raw = self.rawdata[:MAX_ALL]
  35. for fa, fb in zip(a2b_functions, b2a_functions):
  36. a2b = getattr(binascii, fa)
  37. b2a = getattr(binascii, fb)
  38. try:
  39. a = b2a(self.type2test(raw))
  40. res = a2b(self.type2test(a))
  41. except Exception as err:
  42. self.fail("{}/{} conversion raises {!r}".format(fb, fa, err))
  43. self.assertEqual(res, raw, "{}/{} conversion: "
  44. "{!r} != {!r}".format(fb, fa, res, raw))
  45. self.assertIsInstance(res, bytes)
  46. self.assertIsInstance(a, bytes)
  47. self.assertLess(max(a), 128)
  48. self.assertIsInstance(binascii.crc_hqx(raw, 0), int)
  49. self.assertIsInstance(binascii.crc32(raw), int)
  50. def test_base64valid(self):
  51. # Test base64 with valid data
  52. MAX_BASE64 = 57
  53. lines = []
  54. for i in range(0, len(self.rawdata), MAX_BASE64):
  55. b = self.type2test(self.rawdata[i:i+MAX_BASE64])
  56. a = binascii.b2a_base64(b)
  57. lines.append(a)
  58. res = bytes()
  59. for line in lines:
  60. a = self.type2test(line)
  61. b = binascii.a2b_base64(a)
  62. res += b
  63. self.assertEqual(res, self.rawdata)
  64. def test_base64invalid(self):
  65. # Test base64 with random invalid characters sprinkled throughout
  66. # (This requires a new version of binascii.)
  67. MAX_BASE64 = 57
  68. lines = []
  69. for i in range(0, len(self.data), MAX_BASE64):
  70. b = self.type2test(self.rawdata[i:i+MAX_BASE64])
  71. a = binascii.b2a_base64(b)
  72. lines.append(a)
  73. fillers = bytearray()
  74. valid = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
  75. for i in range(256):
  76. if i not in valid:
  77. fillers.append(i)
  78. def addnoise(line):
  79. noise = fillers
  80. ratio = len(line) // len(noise)
  81. res = bytearray()
  82. while line and noise:
  83. if len(line) // len(noise) > ratio:
  84. c, line = line[0], line[1:]
  85. else:
  86. c, noise = noise[0], noise[1:]
  87. res.append(c)
  88. return res + noise + line
  89. res = bytearray()
  90. for line in map(addnoise, lines):
  91. a = self.type2test(line)
  92. b = binascii.a2b_base64(a)
  93. res += b
  94. self.assertEqual(res, self.rawdata)
  95. # Test base64 with just invalid characters, which should return
  96. # empty strings. TBD: shouldn't it raise an exception instead ?
  97. self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
  98. def test_base64_strict_mode(self):
  99. # Test base64 with strict mode on
  100. def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
  101. with self.assertRaisesRegex(binascii.Error, assert_regex):
  102. binascii.a2b_base64(self.type2test(data), strict_mode=True)
  103. self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
  104. non_strict_mode_expected_result)
  105. self.assertEqual(binascii.a2b_base64(self.type2test(data)),
  106. non_strict_mode_expected_result)
  107. def assertExcessData(data, non_strict_mode_expected_result: bytes):
  108. _assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)
  109. def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
  110. _assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)
  111. def assertLeadingPadding(data, non_strict_mode_expected_result: bytes):
  112. _assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)
  113. def assertDiscontinuousPadding(data, non_strict_mode_expected_result: bytes):
  114. _assertRegexTemplate(r'(?i)Discontinuous padding', data, non_strict_mode_expected_result)
  115. # Test excess data exceptions
  116. assertExcessData(b'ab==a', b'i')
  117. assertExcessData(b'ab===', b'i')
  118. assertExcessData(b'ab==:', b'i')
  119. assertExcessData(b'abc=a', b'i\xb7')
  120. assertExcessData(b'abc=:', b'i\xb7')
  121. assertExcessData(b'ab==\n', b'i')
  122. # Test non-base64 data exceptions
  123. assertNonBase64Data(b'\nab==', b'i')
  124. assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
  125. assertNonBase64Data(b'a\nb==', b'i')
  126. assertNonBase64Data(b'a\x00b==', b'i')
  127. # Test malformed padding
  128. assertLeadingPadding(b'=', b'')
  129. assertLeadingPadding(b'==', b'')
  130. assertLeadingPadding(b'===', b'')
  131. assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
  132. assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
  133. def test_base64errors(self):
  134. # Test base64 with invalid padding
  135. def assertIncorrectPadding(data):
  136. with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
  137. binascii.a2b_base64(self.type2test(data))
  138. assertIncorrectPadding(b'ab')
  139. assertIncorrectPadding(b'ab=')
  140. assertIncorrectPadding(b'abc')
  141. assertIncorrectPadding(b'abcdef')
  142. assertIncorrectPadding(b'abcdef=')
  143. assertIncorrectPadding(b'abcdefg')
  144. assertIncorrectPadding(b'a=b=')
  145. assertIncorrectPadding(b'a\nb=')
  146. # Test base64 with invalid number of valid characters (1 mod 4)
  147. def assertInvalidLength(data):
  148. n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', br'', data))
  149. expected_errmsg_re = \
  150. r'(?i)Invalid.+number of data characters.+' + str(n_data_chars)
  151. with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
  152. binascii.a2b_base64(self.type2test(data))
  153. assertInvalidLength(b'a')
  154. assertInvalidLength(b'a=')
  155. assertInvalidLength(b'a==')
  156. assertInvalidLength(b'a===')
  157. assertInvalidLength(b'a' * 5)
  158. assertInvalidLength(b'a' * (4 * 87 + 1))
  159. assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters
  160. def test_uu(self):
  161. MAX_UU = 45
  162. for backtick in (True, False):
  163. lines = []
  164. for i in range(0, len(self.data), MAX_UU):
  165. b = self.type2test(self.rawdata[i:i+MAX_UU])
  166. a = binascii.b2a_uu(b, backtick=backtick)
  167. lines.append(a)
  168. res = bytes()
  169. for line in lines:
  170. a = self.type2test(line)
  171. b = binascii.a2b_uu(a)
  172. res += b
  173. self.assertEqual(res, self.rawdata)
  174. self.assertEqual(binascii.a2b_uu(b"\x7f"), b"\x00"*31)
  175. self.assertEqual(binascii.a2b_uu(b"\x80"), b"\x00"*32)
  176. self.assertEqual(binascii.a2b_uu(b"\xff"), b"\x00"*31)
  177. self.assertRaises(binascii.Error, binascii.a2b_uu, b"\xff\x00")
  178. self.assertRaises(binascii.Error, binascii.a2b_uu, b"!!!!")
  179. self.assertRaises(binascii.Error, binascii.b2a_uu, 46*b"!")
  180. # Issue #7701 (crash on a pydebug build)
  181. self.assertEqual(binascii.b2a_uu(b'x'), b'!> \n')
  182. self.assertEqual(binascii.b2a_uu(b''), b' \n')
  183. self.assertEqual(binascii.b2a_uu(b'', backtick=True), b'`\n')
  184. self.assertEqual(binascii.a2b_uu(b' \n'), b'')
  185. self.assertEqual(binascii.a2b_uu(b'`\n'), b'')
  186. self.assertEqual(binascii.b2a_uu(b'\x00Cat'), b'$ $-A= \n')
  187. self.assertEqual(binascii.b2a_uu(b'\x00Cat', backtick=True),
  188. b'$`$-A=```\n')
  189. self.assertEqual(binascii.a2b_uu(b'$`$-A=```\n'),
  190. binascii.a2b_uu(b'$ $-A= \n'))
  191. with self.assertRaises(TypeError):
  192. binascii.b2a_uu(b"", True)
  193. def test_crc_hqx(self):
  194. crc = binascii.crc_hqx(self.type2test(b"Test the CRC-32 of"), 0)
  195. crc = binascii.crc_hqx(self.type2test(b" this string."), crc)
  196. self.assertEqual(crc, 14290)
  197. self.assertRaises(TypeError, binascii.crc_hqx)
  198. self.assertRaises(TypeError, binascii.crc_hqx, self.type2test(b''))
  199. for crc in 0, 1, 0x1234, 0x12345, 0x12345678, -1:
  200. self.assertEqual(binascii.crc_hqx(self.type2test(b''), crc),
  201. crc & 0xffff)
  202. def test_crc32(self):
  203. crc = binascii.crc32(self.type2test(b"Test the CRC-32 of"))
  204. crc = binascii.crc32(self.type2test(b" this string."), crc)
  205. self.assertEqual(crc, 1571220330)
  206. self.assertRaises(TypeError, binascii.crc32)
  207. def test_hex(self):
  208. # test hexlification
  209. s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
  210. t = binascii.b2a_hex(self.type2test(s))
  211. u = binascii.a2b_hex(self.type2test(t))
  212. self.assertEqual(s, u)
  213. self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1])
  214. self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1] + b'q')
  215. self.assertRaises(binascii.Error, binascii.a2b_hex, bytes([255, 255]))
  216. self.assertRaises(binascii.Error, binascii.a2b_hex, b'0G')
  217. self.assertRaises(binascii.Error, binascii.a2b_hex, b'0g')
  218. self.assertRaises(binascii.Error, binascii.a2b_hex, b'G0')
  219. self.assertRaises(binascii.Error, binascii.a2b_hex, b'g0')
  220. # Confirm that b2a_hex == hexlify and a2b_hex == unhexlify
  221. self.assertEqual(binascii.hexlify(self.type2test(s)), t)
  222. self.assertEqual(binascii.unhexlify(self.type2test(t)), u)
  223. def test_hex_separator(self):
  224. """Test that hexlify and b2a_hex are binary versions of bytes.hex."""
  225. # Logic of separators is tested in test_bytes.py. This checks that
  226. # arg parsing works and exercises the direct to bytes object code
  227. # path within pystrhex.c.
  228. s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
  229. self.assertEqual(binascii.hexlify(self.type2test(s)), s.hex().encode('ascii'))
  230. expected8 = s.hex('.', 8).encode('ascii')
  231. self.assertEqual(binascii.hexlify(self.type2test(s), '.', 8), expected8)
  232. expected1 = s.hex(':').encode('ascii')
  233. self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1)
  234. def test_qp(self):
  235. type2test = self.type2test
  236. a2b_qp = binascii.a2b_qp
  237. b2a_qp = binascii.b2a_qp
  238. a2b_qp(data=b"", header=False) # Keyword arguments allowed
  239. # A test for SF bug 534347 (segfaults without the proper fix)
  240. try:
  241. a2b_qp(b"", **{1:1})
  242. except TypeError:
  243. pass
  244. else:
  245. self.fail("binascii.a2b_qp(**{1:1}) didn't raise TypeError")
  246. self.assertEqual(a2b_qp(type2test(b"=")), b"")
  247. self.assertEqual(a2b_qp(type2test(b"= ")), b"= ")
  248. self.assertEqual(a2b_qp(type2test(b"==")), b"=")
  249. self.assertEqual(a2b_qp(type2test(b"=\nAB")), b"AB")
  250. self.assertEqual(a2b_qp(type2test(b"=\r\nAB")), b"AB")
  251. self.assertEqual(a2b_qp(type2test(b"=\rAB")), b"") # ?
  252. self.assertEqual(a2b_qp(type2test(b"=\rAB\nCD")), b"CD") # ?
  253. self.assertEqual(a2b_qp(type2test(b"=AB")), b"\xab")
  254. self.assertEqual(a2b_qp(type2test(b"=ab")), b"\xab")
  255. self.assertEqual(a2b_qp(type2test(b"=AX")), b"=AX")
  256. self.assertEqual(a2b_qp(type2test(b"=XA")), b"=XA")
  257. self.assertEqual(a2b_qp(type2test(b"=AB")[:-1]), b"=A")
  258. self.assertEqual(a2b_qp(type2test(b'_')), b'_')
  259. self.assertEqual(a2b_qp(type2test(b'_'), header=True), b' ')
  260. self.assertRaises(TypeError, b2a_qp, foo="bar")
  261. self.assertEqual(a2b_qp(type2test(b"=00\r\n=00")), b"\x00\r\n\x00")
  262. self.assertEqual(b2a_qp(type2test(b"\xff\r\n\xff\n\xff")),
  263. b"=FF\r\n=FF\r\n=FF")
  264. self.assertEqual(b2a_qp(type2test(b"0"*75+b"\xff\r\n\xff\r\n\xff")),
  265. b"0"*75+b"=\r\n=FF\r\n=FF\r\n=FF")
  266. self.assertEqual(b2a_qp(type2test(b'\x7f')), b'=7F')
  267. self.assertEqual(b2a_qp(type2test(b'=')), b'=3D')
  268. self.assertEqual(b2a_qp(type2test(b'_')), b'_')
  269. self.assertEqual(b2a_qp(type2test(b'_'), header=True), b'=5F')
  270. self.assertEqual(b2a_qp(type2test(b'x y'), header=True), b'x_y')
  271. self.assertEqual(b2a_qp(type2test(b'x '), header=True), b'x=20')
  272. self.assertEqual(b2a_qp(type2test(b'x y'), header=True, quotetabs=True),
  273. b'x=20y')
  274. self.assertEqual(b2a_qp(type2test(b'x\ty'), header=True), b'x\ty')
  275. self.assertEqual(b2a_qp(type2test(b' ')), b'=20')
  276. self.assertEqual(b2a_qp(type2test(b'\t')), b'=09')
  277. self.assertEqual(b2a_qp(type2test(b' x')), b' x')
  278. self.assertEqual(b2a_qp(type2test(b'\tx')), b'\tx')
  279. self.assertEqual(b2a_qp(type2test(b' x')[:-1]), b'=20')
  280. self.assertEqual(b2a_qp(type2test(b'\tx')[:-1]), b'=09')
  281. self.assertEqual(b2a_qp(type2test(b'\0')), b'=00')
  282. self.assertEqual(b2a_qp(type2test(b'\0\n')), b'=00\n')
  283. self.assertEqual(b2a_qp(type2test(b'\0\n'), quotetabs=True), b'=00\n')
  284. self.assertEqual(b2a_qp(type2test(b'x y\tz')), b'x y\tz')
  285. self.assertEqual(b2a_qp(type2test(b'x y\tz'), quotetabs=True),
  286. b'x=20y=09z')
  287. self.assertEqual(b2a_qp(type2test(b'x y\tz'), istext=False),
  288. b'x y\tz')
  289. self.assertEqual(b2a_qp(type2test(b'x \ny\t\n')),
  290. b'x=20\ny=09\n')
  291. self.assertEqual(b2a_qp(type2test(b'x \ny\t\n'), quotetabs=True),
  292. b'x=20\ny=09\n')
  293. self.assertEqual(b2a_qp(type2test(b'x \ny\t\n'), istext=False),
  294. b'x =0Ay\t=0A')
  295. self.assertEqual(b2a_qp(type2test(b'x \ry\t\r')),
  296. b'x \ry\t\r')
  297. self.assertEqual(b2a_qp(type2test(b'x \ry\t\r'), quotetabs=True),
  298. b'x=20\ry=09\r')
  299. self.assertEqual(b2a_qp(type2test(b'x \ry\t\r'), istext=False),
  300. b'x =0Dy\t=0D')
  301. self.assertEqual(b2a_qp(type2test(b'x \r\ny\t\r\n')),
  302. b'x=20\r\ny=09\r\n')
  303. self.assertEqual(b2a_qp(type2test(b'x \r\ny\t\r\n'), quotetabs=True),
  304. b'x=20\r\ny=09\r\n')
  305. self.assertEqual(b2a_qp(type2test(b'x \r\ny\t\r\n'), istext=False),
  306. b'x =0D=0Ay\t=0D=0A')
  307. self.assertEqual(b2a_qp(type2test(b'x \r\n')[:-1]), b'x \r')
  308. self.assertEqual(b2a_qp(type2test(b'x\t\r\n')[:-1]), b'x\t\r')
  309. self.assertEqual(b2a_qp(type2test(b'x \r\n')[:-1], quotetabs=True),
  310. b'x=20\r')
  311. self.assertEqual(b2a_qp(type2test(b'x\t\r\n')[:-1], quotetabs=True),
  312. b'x=09\r')
  313. self.assertEqual(b2a_qp(type2test(b'x \r\n')[:-1], istext=False),
  314. b'x =0D')
  315. self.assertEqual(b2a_qp(type2test(b'x\t\r\n')[:-1], istext=False),
  316. b'x\t=0D')
  317. self.assertEqual(b2a_qp(type2test(b'.')), b'=2E')
  318. self.assertEqual(b2a_qp(type2test(b'.\n')), b'=2E\n')
  319. self.assertEqual(b2a_qp(type2test(b'.\r')), b'=2E\r')
  320. self.assertEqual(b2a_qp(type2test(b'.\0')), b'=2E=00')
  321. self.assertEqual(b2a_qp(type2test(b'a.\n')), b'a.\n')
  322. self.assertEqual(b2a_qp(type2test(b'.a')[:-1]), b'=2E')
  323. def test_empty_string(self):
  324. # A test for SF bug #1022953. Make sure SystemError is not raised.
  325. empty = self.type2test(b'')
  326. for func in all_functions:
  327. if func == 'crc_hqx':
  328. # crc_hqx needs 2 arguments
  329. binascii.crc_hqx(empty, 0)
  330. continue
  331. f = getattr(binascii, func)
  332. try:
  333. f(empty)
  334. except Exception as err:
  335. self.fail("{}({!r}) raises {!r}".format(func, empty, err))
  336. def test_unicode_b2a(self):
  337. # Unicode strings are not accepted by b2a_* functions.
  338. for func in set(all_functions) - set(a2b_functions):
  339. try:
  340. self.assertRaises(TypeError, getattr(binascii, func), "test")
  341. except Exception as err:
  342. self.fail('{}("test") raises {!r}'.format(func, err))
  343. # crc_hqx needs 2 arguments
  344. self.assertRaises(TypeError, binascii.crc_hqx, "test", 0)
  345. def test_unicode_a2b(self):
  346. # Unicode strings are accepted by a2b_* functions.
  347. MAX_ALL = 45
  348. raw = self.rawdata[:MAX_ALL]
  349. for fa, fb in zip(a2b_functions, b2a_functions):
  350. a2b = getattr(binascii, fa)
  351. b2a = getattr(binascii, fb)
  352. try:
  353. a = b2a(self.type2test(raw))
  354. binary_res = a2b(a)
  355. a = a.decode('ascii')
  356. res = a2b(a)
  357. except Exception as err:
  358. self.fail("{}/{} conversion raises {!r}".format(fb, fa, err))
  359. self.assertEqual(res, raw, "{}/{} conversion: "
  360. "{!r} != {!r}".format(fb, fa, res, raw))
  361. self.assertEqual(res, binary_res)
  362. self.assertIsInstance(res, bytes)
  363. # non-ASCII string
  364. self.assertRaises(ValueError, a2b, "\x80")
  365. def test_b2a_base64_newline(self):
  366. # Issue #25357: test newline parameter
  367. b = self.type2test(b'hello')
  368. self.assertEqual(binascii.b2a_base64(b),
  369. b'aGVsbG8=\n')
  370. self.assertEqual(binascii.b2a_base64(b, newline=True),
  371. b'aGVsbG8=\n')
  372. self.assertEqual(binascii.b2a_base64(b, newline=False),
  373. b'aGVsbG8=')
  374. class ArrayBinASCIITest(BinASCIITest):
  375. def type2test(self, s):
  376. return array.array('B', list(s))
  377. class BytearrayBinASCIITest(BinASCIITest):
  378. type2test = bytearray
  379. class MemoryviewBinASCIITest(BinASCIITest):
  380. type2test = memoryview
  381. class ChecksumBigBufferTestCase(unittest.TestCase):
  382. """bpo-38256 - check that inputs >=4 GiB are handled correctly."""
  383. @bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
  384. def test_big_buffer(self, size):
  385. data = b"nyan" * (_1G + 1)
  386. self.assertEqual(binascii.crc32(data), 1044521549)
  387. if __name__ == "__main__":
  388. unittest.main()