test_glob.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. import glob
  2. import os
  3. import shutil
  4. import sys
  5. import unittest
  6. from test.support.os_helper import (TESTFN, skip_unless_symlink,
  7. can_symlink, create_empty_file, change_cwd)
  8. class GlobTests(unittest.TestCase):
  9. dir_fd = None
  10. def norm(self, *parts):
  11. return os.path.normpath(os.path.join(self.tempdir, *parts))
  12. def joins(self, *tuples):
  13. return [os.path.join(self.tempdir, *parts) for parts in tuples]
  14. def mktemp(self, *parts):
  15. filename = self.norm(*parts)
  16. base, file = os.path.split(filename)
  17. if not os.path.exists(base):
  18. os.makedirs(base)
  19. create_empty_file(filename)
  20. def setUp(self):
  21. self.tempdir = TESTFN + "_dir"
  22. self.mktemp('a', 'D')
  23. self.mktemp('aab', 'F')
  24. self.mktemp('.aa', 'G')
  25. self.mktemp('.bb', 'H')
  26. self.mktemp('.bb', '.J')
  27. self.mktemp('aaa', 'zzzF')
  28. self.mktemp('ZZZ')
  29. self.mktemp('EF')
  30. self.mktemp('a', 'bcd', 'EF')
  31. self.mktemp('a', 'bcd', 'efg', 'ha')
  32. if can_symlink():
  33. os.symlink(self.norm('broken'), self.norm('sym1'))
  34. os.symlink('broken', self.norm('sym2'))
  35. os.symlink(os.path.join('a', 'bcd'), self.norm('sym3'))
  36. if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd:
  37. self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY)
  38. else:
  39. self.dir_fd = None
  40. def tearDown(self):
  41. if self.dir_fd is not None:
  42. os.close(self.dir_fd)
  43. shutil.rmtree(self.tempdir)
  44. def glob(self, *parts, **kwargs):
  45. if len(parts) == 1:
  46. pattern = parts[0]
  47. else:
  48. pattern = os.path.join(*parts)
  49. p = os.path.join(self.tempdir, pattern)
  50. res = glob.glob(p, **kwargs)
  51. res2 = glob.iglob(p, **kwargs)
  52. self.assertCountEqual(glob.iglob(p, **kwargs), res)
  53. bres = [os.fsencode(x) for x in res]
  54. self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres)
  55. self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres)
  56. with change_cwd(self.tempdir):
  57. res2 = glob.glob(pattern, **kwargs)
  58. for x in res2:
  59. self.assertFalse(os.path.isabs(x), x)
  60. if pattern == '**' or pattern == '**' + os.sep:
  61. expected = res[1:]
  62. else:
  63. expected = res
  64. self.assertCountEqual([os.path.join(self.tempdir, x) for x in res2],
  65. expected)
  66. self.assertCountEqual(glob.iglob(pattern, **kwargs), res2)
  67. bpattern = os.fsencode(pattern)
  68. bres2 = [os.fsencode(x) for x in res2]
  69. self.assertCountEqual(glob.glob(bpattern, **kwargs), bres2)
  70. self.assertCountEqual(glob.iglob(bpattern, **kwargs), bres2)
  71. self.assertCountEqual(glob.glob(pattern, root_dir=self.tempdir, **kwargs), res2)
  72. self.assertCountEqual(glob.iglob(pattern, root_dir=self.tempdir, **kwargs), res2)
  73. btempdir = os.fsencode(self.tempdir)
  74. self.assertCountEqual(
  75. glob.glob(bpattern, root_dir=btempdir, **kwargs), bres2)
  76. self.assertCountEqual(
  77. glob.iglob(bpattern, root_dir=btempdir, **kwargs), bres2)
  78. if self.dir_fd is not None:
  79. self.assertCountEqual(
  80. glob.glob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
  81. self.assertCountEqual(
  82. glob.iglob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
  83. self.assertCountEqual(
  84. glob.glob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
  85. self.assertCountEqual(
  86. glob.iglob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
  87. return res
  88. def assertSequencesEqual_noorder(self, l1, l2):
  89. l1 = list(l1)
  90. l2 = list(l2)
  91. self.assertEqual(set(l1), set(l2))
  92. self.assertEqual(sorted(l1), sorted(l2))
  93. def test_glob_literal(self):
  94. eq = self.assertSequencesEqual_noorder
  95. eq(self.glob('a'), [self.norm('a')])
  96. eq(self.glob('a', 'D'), [self.norm('a', 'D')])
  97. eq(self.glob('aab'), [self.norm('aab')])
  98. eq(self.glob('zymurgy'), [])
  99. res = glob.glob('*')
  100. self.assertEqual({type(r) for r in res}, {str})
  101. res = glob.glob(os.path.join(os.curdir, '*'))
  102. self.assertEqual({type(r) for r in res}, {str})
  103. res = glob.glob(b'*')
  104. self.assertEqual({type(r) for r in res}, {bytes})
  105. res = glob.glob(os.path.join(os.fsencode(os.curdir), b'*'))
  106. self.assertEqual({type(r) for r in res}, {bytes})
  107. def test_glob_empty_pattern(self):
  108. self.assertEqual(glob.glob(''), [])
  109. self.assertEqual(glob.glob(b''), [])
  110. self.assertEqual(glob.glob('', root_dir=self.tempdir), [])
  111. self.assertEqual(glob.glob(b'', root_dir=os.fsencode(self.tempdir)), [])
  112. self.assertEqual(glob.glob('', dir_fd=self.dir_fd), [])
  113. self.assertEqual(glob.glob(b'', dir_fd=self.dir_fd), [])
  114. def test_glob_one_directory(self):
  115. eq = self.assertSequencesEqual_noorder
  116. eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa']))
  117. eq(self.glob('*a'), map(self.norm, ['a', 'aaa']))
  118. eq(self.glob('.*'), map(self.norm, ['.aa', '.bb']))
  119. eq(self.glob('?aa'), map(self.norm, ['aaa']))
  120. eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab']))
  121. eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab']))
  122. eq(self.glob('*q'), [])
  123. def test_glob_nested_directory(self):
  124. eq = self.assertSequencesEqual_noorder
  125. if os.path.normcase("abCD") == "abCD":
  126. # case-sensitive filesystem
  127. eq(self.glob('a', 'bcd', 'E*'), [self.norm('a', 'bcd', 'EF')])
  128. else:
  129. # case insensitive filesystem
  130. eq(self.glob('a', 'bcd', 'E*'), [self.norm('a', 'bcd', 'EF'),
  131. self.norm('a', 'bcd', 'efg')])
  132. eq(self.glob('a', 'bcd', '*g'), [self.norm('a', 'bcd', 'efg')])
  133. def test_glob_directory_names(self):
  134. eq = self.assertSequencesEqual_noorder
  135. eq(self.glob('*', 'D'), [self.norm('a', 'D')])
  136. eq(self.glob('*', '*a'), [])
  137. eq(self.glob('a', '*', '*', '*a'),
  138. [self.norm('a', 'bcd', 'efg', 'ha')])
  139. eq(self.glob('?a?', '*F'), [self.norm('aaa', 'zzzF'),
  140. self.norm('aab', 'F')])
  141. def test_glob_directory_with_trailing_slash(self):
  142. # Patterns ending with a slash shouldn't match non-dirs
  143. res = glob.glob(self.norm('Z*Z') + os.sep)
  144. self.assertEqual(res, [])
  145. res = glob.glob(self.norm('ZZZ') + os.sep)
  146. self.assertEqual(res, [])
  147. # When there is a wildcard pattern which ends with os.sep, glob()
  148. # doesn't blow up.
  149. res = glob.glob(self.norm('aa*') + os.sep)
  150. self.assertEqual(len(res), 2)
  151. # either of these results is reasonable
  152. self.assertIn(set(res), [
  153. {self.norm('aaa'), self.norm('aab')},
  154. {self.norm('aaa') + os.sep, self.norm('aab') + os.sep},
  155. ])
  156. def test_glob_bytes_directory_with_trailing_slash(self):
  157. # Same as test_glob_directory_with_trailing_slash, but with a
  158. # bytes argument.
  159. res = glob.glob(os.fsencode(self.norm('Z*Z') + os.sep))
  160. self.assertEqual(res, [])
  161. res = glob.glob(os.fsencode(self.norm('ZZZ') + os.sep))
  162. self.assertEqual(res, [])
  163. res = glob.glob(os.fsencode(self.norm('aa*') + os.sep))
  164. self.assertEqual(len(res), 2)
  165. # either of these results is reasonable
  166. self.assertIn(set(res), [
  167. {os.fsencode(self.norm('aaa')),
  168. os.fsencode(self.norm('aab'))},
  169. {os.fsencode(self.norm('aaa') + os.sep),
  170. os.fsencode(self.norm('aab') + os.sep)},
  171. ])
  172. @skip_unless_symlink
  173. def test_glob_symlinks(self):
  174. eq = self.assertSequencesEqual_noorder
  175. eq(self.glob('sym3'), [self.norm('sym3')])
  176. eq(self.glob('sym3', '*'), [self.norm('sym3', 'EF'),
  177. self.norm('sym3', 'efg')])
  178. self.assertIn(self.glob('sym3' + os.sep),
  179. [[self.norm('sym3')], [self.norm('sym3') + os.sep]])
  180. eq(self.glob('*', '*F'),
  181. [self.norm('aaa', 'zzzF'),
  182. self.norm('aab', 'F'), self.norm('sym3', 'EF')])
  183. @skip_unless_symlink
  184. def test_glob_broken_symlinks(self):
  185. eq = self.assertSequencesEqual_noorder
  186. eq(self.glob('sym*'), [self.norm('sym1'), self.norm('sym2'),
  187. self.norm('sym3')])
  188. eq(self.glob('sym1'), [self.norm('sym1')])
  189. eq(self.glob('sym2'), [self.norm('sym2')])
  190. @unittest.skipUnless(sys.platform == "win32", "Win32 specific test")
  191. def test_glob_magic_in_drive(self):
  192. eq = self.assertSequencesEqual_noorder
  193. eq(glob.glob('*:'), [])
  194. eq(glob.glob(b'*:'), [])
  195. eq(glob.glob('?:'), [])
  196. eq(glob.glob(b'?:'), [])
  197. eq(glob.glob('\\\\?\\c:\\'), ['\\\\?\\c:\\'])
  198. eq(glob.glob(b'\\\\?\\c:\\'), [b'\\\\?\\c:\\'])
  199. eq(glob.glob('\\\\*\\*\\'), [])
  200. eq(glob.glob(b'\\\\*\\*\\'), [])
  201. def check_escape(self, arg, expected):
  202. self.assertEqual(glob.escape(arg), expected)
  203. self.assertEqual(glob.escape(os.fsencode(arg)), os.fsencode(expected))
  204. def test_escape(self):
  205. check = self.check_escape
  206. check('abc', 'abc')
  207. check('[', '[[]')
  208. check('?', '[?]')
  209. check('*', '[*]')
  210. check('[[_/*?*/_]]', '[[][[]_/[*][?][*]/_]]')
  211. check('/[[_/*?*/_]]/', '/[[][[]_/[*][?][*]/_]]/')
  212. @unittest.skipUnless(sys.platform == "win32", "Win32 specific test")
  213. def test_escape_windows(self):
  214. check = self.check_escape
  215. check('?:?', '?:[?]')
  216. check('*:*', '*:[*]')
  217. check(r'\\?\c:\?', r'\\?\c:\[?]')
  218. check(r'\\*\*\*', r'\\*\*\[*]')
  219. check('//?/c:/?', '//?/c:/[?]')
  220. check('//*/*/*', '//*/*/[*]')
  221. def rglob(self, *parts, **kwargs):
  222. return self.glob(*parts, recursive=True, **kwargs)
  223. def hglob(self, *parts, **kwargs):
  224. return self.glob(*parts, include_hidden=True, **kwargs)
  225. def test_hidden_glob(self):
  226. eq = self.assertSequencesEqual_noorder
  227. l = [('aaa',), ('.aa',)]
  228. eq(self.hglob('?aa'), self.joins(*l))
  229. eq(self.hglob('*aa'), self.joins(*l))
  230. l2 = [('.aa','G',)]
  231. eq(self.hglob('**', 'G'), self.joins(*l2))
  232. def test_recursive_glob(self):
  233. eq = self.assertSequencesEqual_noorder
  234. full = [('EF',), ('ZZZ',),
  235. ('a',), ('a', 'D'),
  236. ('a', 'bcd'),
  237. ('a', 'bcd', 'EF'),
  238. ('a', 'bcd', 'efg'),
  239. ('a', 'bcd', 'efg', 'ha'),
  240. ('aaa',), ('aaa', 'zzzF'),
  241. ('aab',), ('aab', 'F'),
  242. ]
  243. if can_symlink():
  244. full += [('sym1',), ('sym2',),
  245. ('sym3',),
  246. ('sym3', 'EF'),
  247. ('sym3', 'efg'),
  248. ('sym3', 'efg', 'ha'),
  249. ]
  250. eq(self.rglob('**'), self.joins(('',), *full))
  251. eq(self.rglob(os.curdir, '**'),
  252. self.joins((os.curdir, ''), *((os.curdir,) + i for i in full)))
  253. dirs = [('a', ''), ('a', 'bcd', ''), ('a', 'bcd', 'efg', ''),
  254. ('aaa', ''), ('aab', '')]
  255. if can_symlink():
  256. dirs += [('sym3', ''), ('sym3', 'efg', '')]
  257. eq(self.rglob('**', ''), self.joins(('',), *dirs))
  258. eq(self.rglob('a', '**'), self.joins(
  259. ('a', ''), ('a', 'D'), ('a', 'bcd'), ('a', 'bcd', 'EF'),
  260. ('a', 'bcd', 'efg'), ('a', 'bcd', 'efg', 'ha')))
  261. eq(self.rglob('a**'), self.joins(('a',), ('aaa',), ('aab',)))
  262. expect = [('a', 'bcd', 'EF'), ('EF',)]
  263. if can_symlink():
  264. expect += [('sym3', 'EF')]
  265. eq(self.rglob('**', 'EF'), self.joins(*expect))
  266. expect = [('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F'), ('EF',)]
  267. if can_symlink():
  268. expect += [('sym3', 'EF')]
  269. eq(self.rglob('**', '*F'), self.joins(*expect))
  270. eq(self.rglob('**', '*F', ''), [])
  271. eq(self.rglob('**', 'bcd', '*'), self.joins(
  272. ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')))
  273. eq(self.rglob('a', '**', 'bcd'), self.joins(('a', 'bcd')))
  274. with change_cwd(self.tempdir):
  275. join = os.path.join
  276. eq(glob.glob('**', recursive=True), [join(*i) for i in full])
  277. eq(glob.glob(join('**', ''), recursive=True),
  278. [join(*i) for i in dirs])
  279. eq(glob.glob(join('**', '*'), recursive=True),
  280. [join(*i) for i in full])
  281. eq(glob.glob(join(os.curdir, '**'), recursive=True),
  282. [join(os.curdir, '')] + [join(os.curdir, *i) for i in full])
  283. eq(glob.glob(join(os.curdir, '**', ''), recursive=True),
  284. [join(os.curdir, '')] + [join(os.curdir, *i) for i in dirs])
  285. eq(glob.glob(join(os.curdir, '**', '*'), recursive=True),
  286. [join(os.curdir, *i) for i in full])
  287. eq(glob.glob(join('**','zz*F'), recursive=True),
  288. [join('aaa', 'zzzF')])
  289. eq(glob.glob('**zz*F', recursive=True), [])
  290. expect = [join('a', 'bcd', 'EF'), 'EF']
  291. if can_symlink():
  292. expect += [join('sym3', 'EF')]
  293. eq(glob.glob(join('**', 'EF'), recursive=True), expect)
  294. rec = [('.bb','H'), ('.bb','.J'), ('.aa','G'), ('.aa',), ('.bb',)]
  295. eq(glob.glob('**', recursive=True, include_hidden=True),
  296. [join(*i) for i in full+rec])
  297. def test_glob_many_open_files(self):
  298. depth = 30
  299. base = os.path.join(self.tempdir, 'deep')
  300. p = os.path.join(base, *(['d']*depth))
  301. os.makedirs(p)
  302. pattern = os.path.join(base, *(['*']*depth))
  303. iters = [glob.iglob(pattern, recursive=True) for j in range(100)]
  304. for it in iters:
  305. self.assertEqual(next(it), p)
  306. pattern = os.path.join(base, '**', 'd')
  307. iters = [glob.iglob(pattern, recursive=True) for j in range(100)]
  308. p = base
  309. for i in range(depth):
  310. p = os.path.join(p, 'd')
  311. for it in iters:
  312. self.assertEqual(next(it), p)
  313. @skip_unless_symlink
  314. class SymlinkLoopGlobTests(unittest.TestCase):
  315. def test_selflink(self):
  316. tempdir = TESTFN + "_dir"
  317. os.makedirs(tempdir)
  318. self.addCleanup(shutil.rmtree, tempdir)
  319. with change_cwd(tempdir):
  320. os.makedirs('dir')
  321. create_empty_file(os.path.join('dir', 'file'))
  322. os.symlink(os.curdir, os.path.join('dir', 'link'))
  323. results = glob.glob('**', recursive=True)
  324. self.assertEqual(len(results), len(set(results)))
  325. results = set(results)
  326. depth = 0
  327. while results:
  328. path = os.path.join(*(['dir'] + ['link'] * depth))
  329. self.assertIn(path, results)
  330. results.remove(path)
  331. if not results:
  332. break
  333. path = os.path.join(path, 'file')
  334. self.assertIn(path, results)
  335. results.remove(path)
  336. depth += 1
  337. results = glob.glob(os.path.join('**', 'file'), recursive=True)
  338. self.assertEqual(len(results), len(set(results)))
  339. results = set(results)
  340. depth = 0
  341. while results:
  342. path = os.path.join(*(['dir'] + ['link'] * depth + ['file']))
  343. self.assertIn(path, results)
  344. results.remove(path)
  345. depth += 1
  346. results = glob.glob(os.path.join('**', ''), recursive=True)
  347. self.assertEqual(len(results), len(set(results)))
  348. results = set(results)
  349. depth = 0
  350. while results:
  351. path = os.path.join(*(['dir'] + ['link'] * depth + ['']))
  352. self.assertIn(path, results)
  353. results.remove(path)
  354. depth += 1
  355. if __name__ == "__main__":
  356. unittest.main()