test_shlex.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. import io
  2. import itertools
  3. import shlex
  4. import string
  5. import unittest
  6. from unittest import mock
  7. # The original test data set was from shellwords, by Hartmut Goebel.
  8. data = r"""x|x|
  9. foo bar|foo|bar|
  10. foo bar|foo|bar|
  11. foo bar |foo|bar|
  12. foo bar bla fasel|foo|bar|bla|fasel|
  13. x y z xxxx|x|y|z|xxxx|
  14. \x bar|\|x|bar|
  15. \ x bar|\|x|bar|
  16. \ bar|\|bar|
  17. foo \x bar|foo|\|x|bar|
  18. foo \ x bar|foo|\|x|bar|
  19. foo \ bar|foo|\|bar|
  20. foo "bar" bla|foo|"bar"|bla|
  21. "foo" "bar" "bla"|"foo"|"bar"|"bla"|
  22. "foo" bar "bla"|"foo"|bar|"bla"|
  23. "foo" bar bla|"foo"|bar|bla|
  24. foo 'bar' bla|foo|'bar'|bla|
  25. 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
  26. 'foo' bar 'bla'|'foo'|bar|'bla'|
  27. 'foo' bar bla|'foo'|bar|bla|
  28. blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
  29. blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
  30. ""|""|
  31. ''|''|
  32. foo "" bar|foo|""|bar|
  33. foo '' bar|foo|''|bar|
  34. foo "" "" "" bar|foo|""|""|""|bar|
  35. foo '' '' '' bar|foo|''|''|''|bar|
  36. \""|\|""|
  37. "\"|"\"|
  38. "foo\ bar"|"foo\ bar"|
  39. "foo\\ bar"|"foo\\ bar"|
  40. "foo\\ bar\"|"foo\\ bar\"|
  41. "foo\\" bar\""|"foo\\"|bar|\|""|
  42. "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
  43. "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
  44. "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
  45. "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
  46. \''|\|''|
  47. 'foo\ bar'|'foo\ bar'|
  48. 'foo\\ bar'|'foo\\ bar'|
  49. "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
  50. \"foo"|\|"foo"|
  51. \"foo"\x|\|"foo"|\|x|
  52. "foo\x"|"foo\x"|
  53. "foo\ "|"foo\ "|
  54. foo\ xx|foo|\|xx|
  55. foo\ x\x|foo|\|x|\|x|
  56. foo\ x\x\""|foo|\|x|\|x|\|""|
  57. "foo\ x\x"|"foo\ x\x"|
  58. "foo\ x\x\\"|"foo\ x\x\\"|
  59. "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
  60. "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
  61. "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
  62. "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
  63. 'foo\ bar'|'foo\ bar'|
  64. 'foo\\ bar'|'foo\\ bar'|
  65. foo\ bar|foo|\|bar|
  66. foo#bar\nbaz|foobaz|
  67. :-) ;-)|:|-|)|;|-|)|
  68. áéíóú|á|é|í|ó|ú|
  69. """
  70. posix_data = r"""x|x|
  71. foo bar|foo|bar|
  72. foo bar|foo|bar|
  73. foo bar |foo|bar|
  74. foo bar bla fasel|foo|bar|bla|fasel|
  75. x y z xxxx|x|y|z|xxxx|
  76. \x bar|x|bar|
  77. \ x bar| x|bar|
  78. \ bar| bar|
  79. foo \x bar|foo|x|bar|
  80. foo \ x bar|foo| x|bar|
  81. foo \ bar|foo| bar|
  82. foo "bar" bla|foo|bar|bla|
  83. "foo" "bar" "bla"|foo|bar|bla|
  84. "foo" bar "bla"|foo|bar|bla|
  85. "foo" bar bla|foo|bar|bla|
  86. foo 'bar' bla|foo|bar|bla|
  87. 'foo' 'bar' 'bla'|foo|bar|bla|
  88. 'foo' bar 'bla'|foo|bar|bla|
  89. 'foo' bar bla|foo|bar|bla|
  90. blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
  91. blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
  92. ""||
  93. ''||
  94. foo "" bar|foo||bar|
  95. foo '' bar|foo||bar|
  96. foo "" "" "" bar|foo||||bar|
  97. foo '' '' '' bar|foo||||bar|
  98. \"|"|
  99. "\""|"|
  100. "foo\ bar"|foo\ bar|
  101. "foo\\ bar"|foo\ bar|
  102. "foo\\ bar\""|foo\ bar"|
  103. "foo\\" bar\"|foo\|bar"|
  104. "foo\\ bar\" dfadf"|foo\ bar" dfadf|
  105. "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
  106. "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
  107. "foo\x bar\" dfadf"|foo\x bar" dfadf|
  108. \'|'|
  109. 'foo\ bar'|foo\ bar|
  110. 'foo\\ bar'|foo\\ bar|
  111. "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
  112. \"foo|"foo|
  113. \"foo\x|"foox|
  114. "foo\x"|foo\x|
  115. "foo\ "|foo\ |
  116. foo\ xx|foo xx|
  117. foo\ x\x|foo xx|
  118. foo\ x\x\"|foo xx"|
  119. "foo\ x\x"|foo\ x\x|
  120. "foo\ x\x\\"|foo\ x\x\|
  121. "foo\ x\x\\""foobar"|foo\ x\x\foobar|
  122. "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
  123. "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
  124. "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
  125. "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
  126. 'foo\ bar'|foo\ bar|
  127. 'foo\\ bar'|foo\\ bar|
  128. foo\ bar|foo bar|
  129. foo#bar\nbaz|foo|baz|
  130. :-) ;-)|:-)|;-)|
  131. áéíóú|áéíóú|
  132. """
  133. class ShlexTest(unittest.TestCase):
  134. def setUp(self):
  135. self.data = [x.split("|")[:-1]
  136. for x in data.splitlines()]
  137. self.posix_data = [x.split("|")[:-1]
  138. for x in posix_data.splitlines()]
  139. for item in self.data:
  140. item[0] = item[0].replace(r"\n", "\n")
  141. for item in self.posix_data:
  142. item[0] = item[0].replace(r"\n", "\n")
  143. def splitTest(self, data, comments):
  144. for i in range(len(data)):
  145. l = shlex.split(data[i][0], comments=comments)
  146. self.assertEqual(l, data[i][1:],
  147. "%s: %s != %s" %
  148. (data[i][0], l, data[i][1:]))
  149. def oldSplit(self, s):
  150. ret = []
  151. lex = shlex.shlex(io.StringIO(s))
  152. tok = lex.get_token()
  153. while tok:
  154. ret.append(tok)
  155. tok = lex.get_token()
  156. return ret
  157. @mock.patch('sys.stdin', io.StringIO())
  158. def testSplitNoneDeprecation(self):
  159. with self.assertWarns(DeprecationWarning):
  160. shlex.split(None)
  161. def testSplitPosix(self):
  162. """Test data splitting with posix parser"""
  163. self.splitTest(self.posix_data, comments=True)
  164. def testCompat(self):
  165. """Test compatibility interface"""
  166. for i in range(len(self.data)):
  167. l = self.oldSplit(self.data[i][0])
  168. self.assertEqual(l, self.data[i][1:],
  169. "%s: %s != %s" %
  170. (self.data[i][0], l, self.data[i][1:]))
  171. def testSyntaxSplitAmpersandAndPipe(self):
  172. """Test handling of syntax splitting of &, |"""
  173. # Could take these forms: &&, &, |&, ;&, ;;&
  174. # of course, the same applies to | and ||
  175. # these should all parse to the same output
  176. for delimiter in ('&&', '&', '|&', ';&', ';;&',
  177. '||', '|', '&|', ';|', ';;|'):
  178. src = ['echo hi %s echo bye' % delimiter,
  179. 'echo hi%secho bye' % delimiter]
  180. ref = ['echo', 'hi', delimiter, 'echo', 'bye']
  181. for ss, ws in itertools.product(src, (False, True)):
  182. s = shlex.shlex(ss, punctuation_chars=True)
  183. s.whitespace_split = ws
  184. result = list(s)
  185. self.assertEqual(ref, result,
  186. "While splitting '%s' [ws=%s]" % (ss, ws))
  187. def testSyntaxSplitSemicolon(self):
  188. """Test handling of syntax splitting of ;"""
  189. # Could take these forms: ;, ;;, ;&, ;;&
  190. # these should all parse to the same output
  191. for delimiter in (';', ';;', ';&', ';;&'):
  192. src = ['echo hi %s echo bye' % delimiter,
  193. 'echo hi%s echo bye' % delimiter,
  194. 'echo hi%secho bye' % delimiter]
  195. ref = ['echo', 'hi', delimiter, 'echo', 'bye']
  196. for ss, ws in itertools.product(src, (False, True)):
  197. s = shlex.shlex(ss, punctuation_chars=True)
  198. s.whitespace_split = ws
  199. result = list(s)
  200. self.assertEqual(ref, result,
  201. "While splitting '%s' [ws=%s]" % (ss, ws))
  202. def testSyntaxSplitRedirect(self):
  203. """Test handling of syntax splitting of >"""
  204. # of course, the same applies to <, |
  205. # these should all parse to the same output
  206. for delimiter in ('<', '|'):
  207. src = ['echo hi %s out' % delimiter,
  208. 'echo hi%s out' % delimiter,
  209. 'echo hi%sout' % delimiter]
  210. ref = ['echo', 'hi', delimiter, 'out']
  211. for ss, ws in itertools.product(src, (False, True)):
  212. s = shlex.shlex(ss, punctuation_chars=True)
  213. result = list(s)
  214. self.assertEqual(ref, result,
  215. "While splitting '%s' [ws=%s]" % (ss, ws))
  216. def testSyntaxSplitParen(self):
  217. """Test handling of syntax splitting of ()"""
  218. # these should all parse to the same output
  219. src = ['( echo hi )',
  220. '(echo hi)']
  221. ref = ['(', 'echo', 'hi', ')']
  222. for ss, ws in itertools.product(src, (False, True)):
  223. s = shlex.shlex(ss, punctuation_chars=True)
  224. s.whitespace_split = ws
  225. result = list(s)
  226. self.assertEqual(ref, result,
  227. "While splitting '%s' [ws=%s]" % (ss, ws))
  228. def testSyntaxSplitCustom(self):
  229. """Test handling of syntax splitting with custom chars"""
  230. ss = "~/a&&b-c --color=auto||d *.py?"
  231. ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
  232. s = shlex.shlex(ss, punctuation_chars="|")
  233. result = list(s)
  234. self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
  235. ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
  236. s = shlex.shlex(ss, punctuation_chars="|")
  237. s.whitespace_split = True
  238. result = list(s)
  239. self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
  240. def testTokenTypes(self):
  241. """Test that tokens are split with types as expected."""
  242. for source, expected in (
  243. ('a && b || c',
  244. [('a', 'a'), ('&&', 'c'), ('b', 'a'),
  245. ('||', 'c'), ('c', 'a')]),
  246. ):
  247. s = shlex.shlex(source, punctuation_chars=True)
  248. observed = []
  249. while True:
  250. t = s.get_token()
  251. if t == s.eof:
  252. break
  253. if t[0] in s.punctuation_chars:
  254. tt = 'c'
  255. else:
  256. tt = 'a'
  257. observed.append((t, tt))
  258. self.assertEqual(observed, expected)
  259. def testPunctuationInWordChars(self):
  260. """Test that any punctuation chars are removed from wordchars"""
  261. s = shlex.shlex('a_b__c', punctuation_chars='_')
  262. self.assertNotIn('_', s.wordchars)
  263. self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
  264. def testPunctuationWithWhitespaceSplit(self):
  265. """Test that with whitespace_split, behaviour is as expected"""
  266. s = shlex.shlex('a && b || c', punctuation_chars='&')
  267. # whitespace_split is False, so splitting will be based on
  268. # punctuation_chars
  269. self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
  270. s = shlex.shlex('a && b || c', punctuation_chars='&')
  271. s.whitespace_split = True
  272. # whitespace_split is True, so splitting will be based on
  273. # white space
  274. self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
  275. def testPunctuationWithPosix(self):
  276. """Test that punctuation_chars and posix behave correctly together."""
  277. # see Issue #29132
  278. s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
  279. self.assertEqual(list(s), ['f', '>', 'abc'])
  280. s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
  281. self.assertEqual(list(s), ['f', '>', '"abc"'])
  282. def testEmptyStringHandling(self):
  283. """Test that parsing of empty strings is correctly handled."""
  284. # see Issue #21999
  285. expected = ['', ')', 'abc']
  286. for punct in (False, True):
  287. s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
  288. slist = list(s)
  289. self.assertEqual(slist, expected)
  290. expected = ["''", ')', 'abc']
  291. s = shlex.shlex("'')abc", punctuation_chars=True)
  292. self.assertEqual(list(s), expected)
  293. def testUnicodeHandling(self):
  294. """Test punctuation_chars and whitespace_split handle unicode."""
  295. ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
  296. # Should be parsed as one complete token (whitespace_split=True).
  297. ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
  298. s = shlex.shlex(ss, punctuation_chars=True)
  299. s.whitespace_split = True
  300. self.assertEqual(list(s), ref)
  301. # Without whitespace_split, uses wordchars and splits on all.
  302. ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
  303. s = shlex.shlex(ss, punctuation_chars=True)
  304. self.assertEqual(list(s), ref)
  305. def testQuote(self):
  306. safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
  307. unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s
  308. unsafe = '"`$\\!' + unicode_sample
  309. self.assertEqual(shlex.quote(''), "''")
  310. self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
  311. self.assertEqual(shlex.quote('test file name'), "'test file name'")
  312. for u in unsafe:
  313. self.assertEqual(shlex.quote('test%sname' % u),
  314. "'test%sname'" % u)
  315. for u in unsafe:
  316. self.assertEqual(shlex.quote("test%s'name'" % u),
  317. "'test%s'\"'\"'name'\"'\"''" % u)
  318. def testJoin(self):
  319. for split_command, command in [
  320. (['a ', 'b'], "'a ' b"),
  321. (['a', ' b'], "a ' b'"),
  322. (['a', ' ', 'b'], "a ' ' b"),
  323. (['"a', 'b"'], '\'"a\' \'b"\''),
  324. ]:
  325. with self.subTest(command=command):
  326. joined = shlex.join(split_command)
  327. self.assertEqual(joined, command)
  328. def testJoinRoundtrip(self):
  329. all_data = self.data + self.posix_data
  330. for command, *split_command in all_data:
  331. with self.subTest(command=command):
  332. joined = shlex.join(split_command)
  333. resplit = shlex.split(joined)
  334. self.assertEqual(split_command, resplit)
  335. def testPunctuationCharsReadOnly(self):
  336. punctuation_chars = "/|$%^"
  337. shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
  338. self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
  339. with self.assertRaises(AttributeError):
  340. shlex_instance.punctuation_chars = False
  341. # Allow this test to be used with old shlex.py
  342. if not getattr(shlex, "split", None):
  343. for methname in dir(ShlexTest):
  344. if methname.startswith("test") and methname != "testCompat":
  345. delattr(ShlexTest, methname)
  346. if __name__ == "__main__":
  347. unittest.main()