test_sax.py 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510
  1. # regression test for SAX 2.0
  2. # $Id$
  3. from xml.sax import make_parser, ContentHandler, \
  4. SAXException, SAXReaderNotAvailable, SAXParseException
  5. import unittest
  6. from unittest import mock
  7. try:
  8. make_parser()
  9. except SAXReaderNotAvailable:
  10. # don't try to test this module if we cannot create a parser
  11. raise unittest.SkipTest("no XML parsers available")
  12. from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
  13. XMLFilterBase, prepare_input_source
  14. from xml.sax.expatreader import create_parser
  15. from xml.sax.handler import (feature_namespaces, feature_external_ges,
  16. LexicalHandler)
  17. from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
  18. from io import BytesIO, StringIO
  19. import codecs
  20. import os.path
  21. import shutil
  22. import sys
  23. from urllib.error import URLError
  24. import urllib.request
  25. from test.support import os_helper
  26. from test.support import findfile
  27. from test.support.os_helper import FakePath, TESTFN
  28. TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
  29. TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
  30. try:
  31. TEST_XMLFILE.encode("utf-8")
  32. TEST_XMLFILE_OUT.encode("utf-8")
  33. except UnicodeEncodeError:
  34. raise unittest.SkipTest("filename is not encodable to utf8")
  35. supports_nonascii_filenames = True
  36. if not os.path.supports_unicode_filenames:
  37. try:
  38. os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
  39. except (UnicodeError, TypeError):
  40. # Either the file system encoding is None, or the file name
  41. # cannot be encoded in the file system encoding.
  42. supports_nonascii_filenames = False
  43. requires_nonascii_filenames = unittest.skipUnless(
  44. supports_nonascii_filenames,
  45. 'Requires non-ascii filenames support')
  46. ns_uri = "http://www.python.org/xml-ns/saxtest/"
  47. class XmlTestBase(unittest.TestCase):
  48. def verify_empty_attrs(self, attrs):
  49. self.assertRaises(KeyError, attrs.getValue, "attr")
  50. self.assertRaises(KeyError, attrs.getValueByQName, "attr")
  51. self.assertRaises(KeyError, attrs.getNameByQName, "attr")
  52. self.assertRaises(KeyError, attrs.getQNameByName, "attr")
  53. self.assertRaises(KeyError, attrs.__getitem__, "attr")
  54. self.assertEqual(attrs.getLength(), 0)
  55. self.assertEqual(attrs.getNames(), [])
  56. self.assertEqual(attrs.getQNames(), [])
  57. self.assertEqual(len(attrs), 0)
  58. self.assertNotIn("attr", attrs)
  59. self.assertEqual(list(attrs.keys()), [])
  60. self.assertEqual(attrs.get("attrs"), None)
  61. self.assertEqual(attrs.get("attrs", 25), 25)
  62. self.assertEqual(list(attrs.items()), [])
  63. self.assertEqual(list(attrs.values()), [])
  64. def verify_empty_nsattrs(self, attrs):
  65. self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
  66. self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
  67. self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
  68. self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
  69. self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
  70. self.assertEqual(attrs.getLength(), 0)
  71. self.assertEqual(attrs.getNames(), [])
  72. self.assertEqual(attrs.getQNames(), [])
  73. self.assertEqual(len(attrs), 0)
  74. self.assertNotIn((ns_uri, "attr"), attrs)
  75. self.assertEqual(list(attrs.keys()), [])
  76. self.assertEqual(attrs.get((ns_uri, "attr")), None)
  77. self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
  78. self.assertEqual(list(attrs.items()), [])
  79. self.assertEqual(list(attrs.values()), [])
  80. def verify_attrs_wattr(self, attrs):
  81. self.assertEqual(attrs.getLength(), 1)
  82. self.assertEqual(attrs.getNames(), ["attr"])
  83. self.assertEqual(attrs.getQNames(), ["attr"])
  84. self.assertEqual(len(attrs), 1)
  85. self.assertIn("attr", attrs)
  86. self.assertEqual(list(attrs.keys()), ["attr"])
  87. self.assertEqual(attrs.get("attr"), "val")
  88. self.assertEqual(attrs.get("attr", 25), "val")
  89. self.assertEqual(list(attrs.items()), [("attr", "val")])
  90. self.assertEqual(list(attrs.values()), ["val"])
  91. self.assertEqual(attrs.getValue("attr"), "val")
  92. self.assertEqual(attrs.getValueByQName("attr"), "val")
  93. self.assertEqual(attrs.getNameByQName("attr"), "attr")
  94. self.assertEqual(attrs["attr"], "val")
  95. self.assertEqual(attrs.getQNameByName("attr"), "attr")
  96. def xml_str(doc, encoding=None):
  97. if encoding is None:
  98. return doc
  99. return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
  100. def xml_bytes(doc, encoding, decl_encoding=...):
  101. if decl_encoding is ...:
  102. decl_encoding = encoding
  103. return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
  104. def make_xml_file(doc, encoding, decl_encoding=...):
  105. if decl_encoding is ...:
  106. decl_encoding = encoding
  107. with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
  108. f.write(xml_str(doc, decl_encoding))
  109. class ParseTest(unittest.TestCase):
  110. data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
  111. def tearDown(self):
  112. os_helper.unlink(TESTFN)
  113. def check_parse(self, f):
  114. from xml.sax import parse
  115. result = StringIO()
  116. parse(f, XMLGenerator(result, 'utf-8'))
  117. self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
  118. def test_parse_text(self):
  119. encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
  120. 'utf-16', 'utf-16le', 'utf-16be')
  121. for encoding in encodings:
  122. self.check_parse(StringIO(xml_str(self.data, encoding)))
  123. make_xml_file(self.data, encoding)
  124. with open(TESTFN, 'r', encoding=encoding) as f:
  125. self.check_parse(f)
  126. self.check_parse(StringIO(self.data))
  127. make_xml_file(self.data, encoding, None)
  128. with open(TESTFN, 'r', encoding=encoding) as f:
  129. self.check_parse(f)
  130. def test_parse_bytes(self):
  131. # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
  132. # UTF-16 is autodetected
  133. encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
  134. for encoding in encodings:
  135. self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
  136. make_xml_file(self.data, encoding)
  137. self.check_parse(TESTFN)
  138. with open(TESTFN, 'rb') as f:
  139. self.check_parse(f)
  140. self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
  141. make_xml_file(self.data, encoding, None)
  142. self.check_parse(TESTFN)
  143. with open(TESTFN, 'rb') as f:
  144. self.check_parse(f)
  145. # accept UTF-8 with BOM
  146. self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
  147. make_xml_file(self.data, 'utf-8-sig', 'utf-8')
  148. self.check_parse(TESTFN)
  149. with open(TESTFN, 'rb') as f:
  150. self.check_parse(f)
  151. self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
  152. make_xml_file(self.data, 'utf-8-sig', None)
  153. self.check_parse(TESTFN)
  154. with open(TESTFN, 'rb') as f:
  155. self.check_parse(f)
  156. # accept data with declared encoding
  157. self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
  158. make_xml_file(self.data, 'iso-8859-1')
  159. self.check_parse(TESTFN)
  160. with open(TESTFN, 'rb') as f:
  161. self.check_parse(f)
  162. # fail on non-UTF-8 incompatible data without declared encoding
  163. with self.assertRaises(SAXException):
  164. self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
  165. make_xml_file(self.data, 'iso-8859-1', None)
  166. with self.assertRaises(SAXException):
  167. self.check_parse(TESTFN)
  168. with open(TESTFN, 'rb') as f:
  169. with self.assertRaises(SAXException):
  170. self.check_parse(f)
  171. def test_parse_path_object(self):
  172. make_xml_file(self.data, 'utf-8', None)
  173. self.check_parse(FakePath(TESTFN))
  174. def test_parse_InputSource(self):
  175. # accept data without declared but with explicitly specified encoding
  176. make_xml_file(self.data, 'iso-8859-1', None)
  177. with open(TESTFN, 'rb') as f:
  178. input = InputSource()
  179. input.setByteStream(f)
  180. input.setEncoding('iso-8859-1')
  181. self.check_parse(input)
  182. def test_parse_close_source(self):
  183. builtin_open = open
  184. fileobj = None
  185. def mock_open(*args):
  186. nonlocal fileobj
  187. fileobj = builtin_open(*args)
  188. return fileobj
  189. with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
  190. make_xml_file(self.data, 'iso-8859-1', None)
  191. with self.assertRaises(SAXException):
  192. self.check_parse(TESTFN)
  193. self.assertTrue(fileobj.closed)
  194. def check_parseString(self, s):
  195. from xml.sax import parseString
  196. result = StringIO()
  197. parseString(s, XMLGenerator(result, 'utf-8'))
  198. self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
  199. def test_parseString_text(self):
  200. encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
  201. 'utf-16', 'utf-16le', 'utf-16be')
  202. for encoding in encodings:
  203. self.check_parseString(xml_str(self.data, encoding))
  204. self.check_parseString(self.data)
  205. def test_parseString_bytes(self):
  206. # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
  207. # UTF-16 is autodetected
  208. encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
  209. for encoding in encodings:
  210. self.check_parseString(xml_bytes(self.data, encoding))
  211. self.check_parseString(xml_bytes(self.data, encoding, None))
  212. # accept UTF-8 with BOM
  213. self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
  214. self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
  215. # accept data with declared encoding
  216. self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
  217. # fail on non-UTF-8 incompatible data without declared encoding
  218. with self.assertRaises(SAXException):
  219. self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
  220. class MakeParserTest(unittest.TestCase):
  221. def test_make_parser2(self):
  222. # Creating parsers several times in a row should succeed.
  223. # Testing this because there have been failures of this kind
  224. # before.
  225. from xml.sax import make_parser
  226. p = make_parser()
  227. from xml.sax import make_parser
  228. p = make_parser()
  229. from xml.sax import make_parser
  230. p = make_parser()
  231. from xml.sax import make_parser
  232. p = make_parser()
  233. from xml.sax import make_parser
  234. p = make_parser()
  235. from xml.sax import make_parser
  236. p = make_parser()
  237. def test_make_parser3(self):
  238. # Testing that make_parser can handle different types of
  239. # iterables.
  240. make_parser(['module'])
  241. make_parser(('module', ))
  242. make_parser({'module'})
  243. make_parser(frozenset({'module'}))
  244. make_parser({'module': None})
  245. make_parser(iter(['module']))
  246. def test_make_parser4(self):
  247. # Testing that make_parser can handle empty iterables.
  248. make_parser([])
  249. make_parser(tuple())
  250. make_parser(set())
  251. make_parser(frozenset())
  252. make_parser({})
  253. make_parser(iter([]))
  254. def test_make_parser5(self):
  255. # Testing that make_parser can handle iterables with more than
  256. # one item.
  257. make_parser(['module1', 'module2'])
  258. make_parser(('module1', 'module2'))
  259. make_parser({'module1', 'module2'})
  260. make_parser(frozenset({'module1', 'module2'}))
  261. make_parser({'module1': None, 'module2': None})
  262. make_parser(iter(['module1', 'module2']))
  263. # ===========================================================================
  264. #
  265. # saxutils tests
  266. #
  267. # ===========================================================================
  268. class SaxutilsTest(unittest.TestCase):
  269. # ===== escape
  270. def test_escape_basic(self):
  271. self.assertEqual(escape("Donald Duck & Co"), "Donald Duck &amp; Co")
  272. def test_escape_all(self):
  273. self.assertEqual(escape("<Donald Duck & Co>"),
  274. "&lt;Donald Duck &amp; Co&gt;")
  275. def test_escape_extra(self):
  276. self.assertEqual(escape("Hei på deg", {"å" : "&aring;"}),
  277. "Hei p&aring; deg")
  278. # ===== unescape
  279. def test_unescape_basic(self):
  280. self.assertEqual(unescape("Donald Duck &amp; Co"), "Donald Duck & Co")
  281. def test_unescape_all(self):
  282. self.assertEqual(unescape("&lt;Donald Duck &amp; Co&gt;"),
  283. "<Donald Duck & Co>")
  284. def test_unescape_extra(self):
  285. self.assertEqual(unescape("Hei på deg", {"å" : "&aring;"}),
  286. "Hei p&aring; deg")
  287. def test_unescape_amp_extra(self):
  288. self.assertEqual(unescape("&amp;foo;", {"&foo;": "splat"}), "&foo;")
  289. # ===== quoteattr
  290. def test_quoteattr_basic(self):
  291. self.assertEqual(quoteattr("Donald Duck & Co"),
  292. '"Donald Duck &amp; Co"')
  293. def test_single_quoteattr(self):
  294. self.assertEqual(quoteattr('Includes "double" quotes'),
  295. '\'Includes "double" quotes\'')
  296. def test_double_quoteattr(self):
  297. self.assertEqual(quoteattr("Includes 'single' quotes"),
  298. "\"Includes 'single' quotes\"")
  299. def test_single_double_quoteattr(self):
  300. self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
  301. "\"Includes 'single' and &quot;double&quot; quotes\"")
  302. # ===== make_parser
  303. def test_make_parser(self):
  304. # Creating a parser should succeed - it should fall back
  305. # to the expatreader
  306. p = make_parser(['xml.parsers.no_such_parser'])
  307. class PrepareInputSourceTest(unittest.TestCase):
  308. def setUp(self):
  309. self.file = os_helper.TESTFN
  310. with open(self.file, "w") as tmp:
  311. tmp.write("This was read from a file.")
  312. def tearDown(self):
  313. os_helper.unlink(self.file)
  314. def make_byte_stream(self):
  315. return BytesIO(b"This is a byte stream.")
  316. def make_character_stream(self):
  317. return StringIO("This is a character stream.")
  318. def checkContent(self, stream, content):
  319. self.assertIsNotNone(stream)
  320. self.assertEqual(stream.read(), content)
  321. stream.close()
  322. def test_character_stream(self):
  323. # If the source is an InputSource with a character stream, use it.
  324. src = InputSource(self.file)
  325. src.setCharacterStream(self.make_character_stream())
  326. prep = prepare_input_source(src)
  327. self.assertIsNone(prep.getByteStream())
  328. self.checkContent(prep.getCharacterStream(),
  329. "This is a character stream.")
  330. def test_byte_stream(self):
  331. # If the source is an InputSource that does not have a character
  332. # stream but does have a byte stream, use the byte stream.
  333. src = InputSource(self.file)
  334. src.setByteStream(self.make_byte_stream())
  335. prep = prepare_input_source(src)
  336. self.assertIsNone(prep.getCharacterStream())
  337. self.checkContent(prep.getByteStream(),
  338. b"This is a byte stream.")
  339. def test_system_id(self):
  340. # If the source is an InputSource that has neither a character
  341. # stream nor a byte stream, open the system ID.
  342. src = InputSource(self.file)
  343. prep = prepare_input_source(src)
  344. self.assertIsNone(prep.getCharacterStream())
  345. self.checkContent(prep.getByteStream(),
  346. b"This was read from a file.")
  347. def test_string(self):
  348. # If the source is a string, use it as a system ID and open it.
  349. prep = prepare_input_source(self.file)
  350. self.assertIsNone(prep.getCharacterStream())
  351. self.checkContent(prep.getByteStream(),
  352. b"This was read from a file.")
  353. def test_path_objects(self):
  354. # If the source is a Path object, use it as a system ID and open it.
  355. prep = prepare_input_source(FakePath(self.file))
  356. self.assertIsNone(prep.getCharacterStream())
  357. self.checkContent(prep.getByteStream(),
  358. b"This was read from a file.")
  359. def test_binary_file(self):
  360. # If the source is a binary file-like object, use it as a byte
  361. # stream.
  362. prep = prepare_input_source(self.make_byte_stream())
  363. self.assertIsNone(prep.getCharacterStream())
  364. self.checkContent(prep.getByteStream(),
  365. b"This is a byte stream.")
  366. def test_text_file(self):
  367. # If the source is a text file-like object, use it as a character
  368. # stream.
  369. prep = prepare_input_source(self.make_character_stream())
  370. self.assertIsNone(prep.getByteStream())
  371. self.checkContent(prep.getCharacterStream(),
  372. "This is a character stream.")
  373. # ===== XMLGenerator
  374. class XmlgenTest:
  375. def test_xmlgen_basic(self):
  376. result = self.ioclass()
  377. gen = XMLGenerator(result)
  378. gen.startDocument()
  379. gen.startElement("doc", {})
  380. gen.endElement("doc")
  381. gen.endDocument()
  382. self.assertEqual(result.getvalue(), self.xml("<doc></doc>"))
  383. def test_xmlgen_basic_empty(self):
  384. result = self.ioclass()
  385. gen = XMLGenerator(result, short_empty_elements=True)
  386. gen.startDocument()
  387. gen.startElement("doc", {})
  388. gen.endElement("doc")
  389. gen.endDocument()
  390. self.assertEqual(result.getvalue(), self.xml("<doc/>"))
  391. def test_xmlgen_content(self):
  392. result = self.ioclass()
  393. gen = XMLGenerator(result)
  394. gen.startDocument()
  395. gen.startElement("doc", {})
  396. gen.characters("huhei")
  397. gen.endElement("doc")
  398. gen.endDocument()
  399. self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
  400. def test_xmlgen_content_empty(self):
  401. result = self.ioclass()
  402. gen = XMLGenerator(result, short_empty_elements=True)
  403. gen.startDocument()
  404. gen.startElement("doc", {})
  405. gen.characters("huhei")
  406. gen.endElement("doc")
  407. gen.endDocument()
  408. self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
  409. def test_xmlgen_pi(self):
  410. result = self.ioclass()
  411. gen = XMLGenerator(result)
  412. gen.startDocument()
  413. gen.processingInstruction("test", "data")
  414. gen.startElement("doc", {})
  415. gen.endElement("doc")
  416. gen.endDocument()
  417. self.assertEqual(result.getvalue(),
  418. self.xml("<?test data?><doc></doc>"))
  419. def test_xmlgen_content_escape(self):
  420. result = self.ioclass()
  421. gen = XMLGenerator(result)
  422. gen.startDocument()
  423. gen.startElement("doc", {})
  424. gen.characters("<huhei&")
  425. gen.endElement("doc")
  426. gen.endDocument()
  427. self.assertEqual(result.getvalue(),
  428. self.xml("<doc>&lt;huhei&amp;</doc>"))
  429. def test_xmlgen_attr_escape(self):
  430. result = self.ioclass()
  431. gen = XMLGenerator(result)
  432. gen.startDocument()
  433. gen.startElement("doc", {"a": '"'})
  434. gen.startElement("e", {"a": "'"})
  435. gen.endElement("e")
  436. gen.startElement("e", {"a": "'\""})
  437. gen.endElement("e")
  438. gen.startElement("e", {"a": "\n\r\t"})
  439. gen.endElement("e")
  440. gen.endElement("doc")
  441. gen.endDocument()
  442. self.assertEqual(result.getvalue(), self.xml(
  443. "<doc a='\"'><e a=\"'\"></e>"
  444. "<e a=\"'&quot;\"></e>"
  445. "<e a=\"&#10;&#13;&#9;\"></e></doc>"))
  446. def test_xmlgen_encoding(self):
  447. encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
  448. 'utf-16', 'utf-16be', 'utf-16le',
  449. 'utf-32', 'utf-32be', 'utf-32le')
  450. for encoding in encodings:
  451. result = self.ioclass()
  452. gen = XMLGenerator(result, encoding=encoding)
  453. gen.startDocument()
  454. gen.startElement("doc", {"a": '\u20ac'})
  455. gen.characters("\u20ac")
  456. gen.endElement("doc")
  457. gen.endDocument()
  458. self.assertEqual(result.getvalue(),
  459. self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding))
  460. def test_xmlgen_unencodable(self):
  461. result = self.ioclass()
  462. gen = XMLGenerator(result, encoding='ascii')
  463. gen.startDocument()
  464. gen.startElement("doc", {"a": '\u20ac'})
  465. gen.characters("\u20ac")
  466. gen.endElement("doc")
  467. gen.endDocument()
  468. self.assertEqual(result.getvalue(),
  469. self.xml('<doc a="&#8364;">&#8364;</doc>', encoding='ascii'))
  470. def test_xmlgen_ignorable(self):
  471. result = self.ioclass()
  472. gen = XMLGenerator(result)
  473. gen.startDocument()
  474. gen.startElement("doc", {})
  475. gen.ignorableWhitespace(" ")
  476. gen.endElement("doc")
  477. gen.endDocument()
  478. self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
  479. def test_xmlgen_ignorable_empty(self):
  480. result = self.ioclass()
  481. gen = XMLGenerator(result, short_empty_elements=True)
  482. gen.startDocument()
  483. gen.startElement("doc", {})
  484. gen.ignorableWhitespace(" ")
  485. gen.endElement("doc")
  486. gen.endDocument()
  487. self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
  488. def test_xmlgen_encoding_bytes(self):
  489. encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
  490. 'utf-16', 'utf-16be', 'utf-16le',
  491. 'utf-32', 'utf-32be', 'utf-32le')
  492. for encoding in encodings:
  493. result = self.ioclass()
  494. gen = XMLGenerator(result, encoding=encoding)
  495. gen.startDocument()
  496. gen.startElement("doc", {"a": '\u20ac'})
  497. gen.characters("\u20ac".encode(encoding))
  498. gen.ignorableWhitespace(" ".encode(encoding))
  499. gen.endElement("doc")
  500. gen.endDocument()
  501. self.assertEqual(result.getvalue(),
  502. self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding))
  503. def test_xmlgen_ns(self):
  504. result = self.ioclass()
  505. gen = XMLGenerator(result)
  506. gen.startDocument()
  507. gen.startPrefixMapping("ns1", ns_uri)
  508. gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
  509. # add an unqualified name
  510. gen.startElementNS((None, "udoc"), None, {})
  511. gen.endElementNS((None, "udoc"), None)
  512. gen.endElementNS((ns_uri, "doc"), "ns1:doc")
  513. gen.endPrefixMapping("ns1")
  514. gen.endDocument()
  515. self.assertEqual(result.getvalue(), self.xml(
  516. '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
  517. ns_uri))
  518. def test_xmlgen_ns_empty(self):
  519. result = self.ioclass()
  520. gen = XMLGenerator(result, short_empty_elements=True)
  521. gen.startDocument()
  522. gen.startPrefixMapping("ns1", ns_uri)
  523. gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
  524. # add an unqualified name
  525. gen.startElementNS((None, "udoc"), None, {})
  526. gen.endElementNS((None, "udoc"), None)
  527. gen.endElementNS((ns_uri, "doc"), "ns1:doc")
  528. gen.endPrefixMapping("ns1")
  529. gen.endDocument()
  530. self.assertEqual(result.getvalue(), self.xml(
  531. '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' %
  532. ns_uri))
  533. def test_1463026_1(self):
  534. result = self.ioclass()
  535. gen = XMLGenerator(result)
  536. gen.startDocument()
  537. gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
  538. gen.endElementNS((None, 'a'), 'a')
  539. gen.endDocument()
  540. self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>'))
  541. def test_1463026_1_empty(self):
  542. result = self.ioclass()
  543. gen = XMLGenerator(result, short_empty_elements=True)
  544. gen.startDocument()
  545. gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
  546. gen.endElementNS((None, 'a'), 'a')
  547. gen.endDocument()
  548. self.assertEqual(result.getvalue(), self.xml('<a b="c"/>'))
  549. def test_1463026_2(self):
  550. result = self.ioclass()
  551. gen = XMLGenerator(result)
  552. gen.startDocument()
  553. gen.startPrefixMapping(None, 'qux')
  554. gen.startElementNS(('qux', 'a'), 'a', {})
  555. gen.endElementNS(('qux', 'a'), 'a')
  556. gen.endPrefixMapping(None)
  557. gen.endDocument()
  558. self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>'))
  559. def test_1463026_2_empty(self):
  560. result = self.ioclass()
  561. gen = XMLGenerator(result, short_empty_elements=True)
  562. gen.startDocument()
  563. gen.startPrefixMapping(None, 'qux')
  564. gen.startElementNS(('qux', 'a'), 'a', {})
  565. gen.endElementNS(('qux', 'a'), 'a')
  566. gen.endPrefixMapping(None)
  567. gen.endDocument()
  568. self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>'))
  569. def test_1463026_3(self):
  570. result = self.ioclass()
  571. gen = XMLGenerator(result)
  572. gen.startDocument()
  573. gen.startPrefixMapping('my', 'qux')
  574. gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
  575. gen.endElementNS(('qux', 'a'), 'a')
  576. gen.endPrefixMapping('my')
  577. gen.endDocument()
  578. self.assertEqual(result.getvalue(),
  579. self.xml('<my:a xmlns:my="qux" b="c"></my:a>'))
  580. def test_1463026_3_empty(self):
  581. result = self.ioclass()
  582. gen = XMLGenerator(result, short_empty_elements=True)
  583. gen.startDocument()
  584. gen.startPrefixMapping('my', 'qux')
  585. gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
  586. gen.endElementNS(('qux', 'a'), 'a')
  587. gen.endPrefixMapping('my')
  588. gen.endDocument()
  589. self.assertEqual(result.getvalue(),
  590. self.xml('<my:a xmlns:my="qux" b="c"/>'))
  591. def test_5027_1(self):
  592. # The xml prefix (as in xml:lang below) is reserved and bound by
  593. # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
  594. # a bug whereby a KeyError is raised because this namespace is missing
  595. # from a dictionary.
  596. #
  597. # This test demonstrates the bug by parsing a document.
  598. test_xml = StringIO(
  599. '<?xml version="1.0"?>'
  600. '<a:g1 xmlns:a="http://example.com/ns">'
  601. '<a:g2 xml:lang="en">Hello</a:g2>'
  602. '</a:g1>')
  603. parser = make_parser()
  604. parser.setFeature(feature_namespaces, True)
  605. result = self.ioclass()
  606. gen = XMLGenerator(result)
  607. parser.setContentHandler(gen)
  608. parser.parse(test_xml)
  609. self.assertEqual(result.getvalue(),
  610. self.xml(
  611. '<a:g1 xmlns:a="http://example.com/ns">'
  612. '<a:g2 xml:lang="en">Hello</a:g2>'
  613. '</a:g1>'))
  614. def test_5027_2(self):
  615. # The xml prefix (as in xml:lang below) is reserved and bound by
  616. # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
  617. # a bug whereby a KeyError is raised because this namespace is missing
  618. # from a dictionary.
  619. #
  620. # This test demonstrates the bug by direct manipulation of the
  621. # XMLGenerator.
  622. result = self.ioclass()
  623. gen = XMLGenerator(result)
  624. gen.startDocument()
  625. gen.startPrefixMapping('a', 'http://example.com/ns')
  626. gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
  627. lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
  628. gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
  629. gen.characters('Hello')
  630. gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
  631. gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
  632. gen.endPrefixMapping('a')
  633. gen.endDocument()
  634. self.assertEqual(result.getvalue(),
  635. self.xml(
  636. '<a:g1 xmlns:a="http://example.com/ns">'
  637. '<a:g2 xml:lang="en">Hello</a:g2>'
  638. '</a:g1>'))
  639. def test_no_close_file(self):
  640. result = self.ioclass()
  641. def func(out):
  642. gen = XMLGenerator(out)
  643. gen.startDocument()
  644. gen.startElement("doc", {})
  645. func(result)
  646. self.assertFalse(result.closed)
  647. def test_xmlgen_fragment(self):
  648. result = self.ioclass()
  649. gen = XMLGenerator(result)
  650. # Don't call gen.startDocument()
  651. gen.startElement("foo", {"a": "1.0"})
  652. gen.characters("Hello")
  653. gen.endElement("foo")
  654. gen.startElement("bar", {"b": "2.0"})
  655. gen.endElement("bar")
  656. # Don't call gen.endDocument()
  657. self.assertEqual(result.getvalue(),
  658. self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):])
  659. class StringXmlgenTest(XmlgenTest, unittest.TestCase):
  660. ioclass = StringIO
  661. def xml(self, doc, encoding='iso-8859-1'):
  662. return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
  663. test_xmlgen_unencodable = None
  664. class BytesXmlgenTest(XmlgenTest, unittest.TestCase):
  665. ioclass = BytesIO
  666. def xml(self, doc, encoding='iso-8859-1'):
  667. return ('<?xml version="1.0" encoding="%s"?>\n%s' %
  668. (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
  669. class WriterXmlgenTest(BytesXmlgenTest):
  670. class ioclass(list):
  671. write = list.append
  672. closed = False
  673. def seekable(self):
  674. return True
  675. def tell(self):
  676. # return 0 at start and not 0 after start
  677. return len(self)
  678. def getvalue(self):
  679. return b''.join(self)
  680. class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase):
  681. def ioclass(self):
  682. raw = BytesIO()
  683. writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
  684. writer.getvalue = raw.getvalue
  685. return writer
  686. def xml(self, doc, encoding='iso-8859-1'):
  687. return ('<?xml version="1.0" encoding="%s"?>\n%s' %
  688. (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
  689. class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase):
  690. fname = os_helper.TESTFN + '-codecs'
  691. def ioclass(self):
  692. writer = codecs.open(self.fname, 'w', encoding='ascii',
  693. errors='xmlcharrefreplace', buffering=0)
  694. def cleanup():
  695. writer.close()
  696. os_helper.unlink(self.fname)
  697. self.addCleanup(cleanup)
  698. def getvalue():
  699. # Windows will not let use reopen without first closing
  700. writer.close()
  701. with open(writer.name, 'rb') as f:
  702. return f.read()
  703. writer.getvalue = getvalue
  704. return writer
  705. def xml(self, doc, encoding='iso-8859-1'):
  706. return ('<?xml version="1.0" encoding="%s"?>\n%s' %
  707. (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
  708. start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n'
  709. class XMLFilterBaseTest(unittest.TestCase):
  710. def test_filter_basic(self):
  711. result = BytesIO()
  712. gen = XMLGenerator(result)
  713. filter = XMLFilterBase()
  714. filter.setContentHandler(gen)
  715. filter.startDocument()
  716. filter.startElement("doc", {})
  717. filter.characters("content")
  718. filter.ignorableWhitespace(" ")
  719. filter.endElement("doc")
  720. filter.endDocument()
  721. self.assertEqual(result.getvalue(), start + b"<doc>content </doc>")
  722. # ===========================================================================
  723. #
  724. # expatreader tests
  725. #
  726. # ===========================================================================
  727. with open(TEST_XMLFILE_OUT, 'rb') as f:
  728. xml_test_out = f.read()
  729. class ExpatReaderTest(XmlTestBase):
  730. # ===== XMLReader support
  731. def test_expat_binary_file(self):
  732. parser = create_parser()
  733. result = BytesIO()
  734. xmlgen = XMLGenerator(result)
  735. parser.setContentHandler(xmlgen)
  736. with open(TEST_XMLFILE, 'rb') as f:
  737. parser.parse(f)
  738. self.assertEqual(result.getvalue(), xml_test_out)
  739. def test_expat_text_file(self):
  740. parser = create_parser()
  741. result = BytesIO()
  742. xmlgen = XMLGenerator(result)
  743. parser.setContentHandler(xmlgen)
  744. with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
  745. parser.parse(f)
  746. self.assertEqual(result.getvalue(), xml_test_out)
  747. @requires_nonascii_filenames
  748. def test_expat_binary_file_nonascii(self):
  749. fname = os_helper.TESTFN_UNICODE
  750. shutil.copyfile(TEST_XMLFILE, fname)
  751. self.addCleanup(os_helper.unlink, fname)
  752. parser = create_parser()
  753. result = BytesIO()
  754. xmlgen = XMLGenerator(result)
  755. parser.setContentHandler(xmlgen)
  756. parser.parse(open(fname, 'rb'))
  757. self.assertEqual(result.getvalue(), xml_test_out)
  758. def test_expat_binary_file_bytes_name(self):
  759. fname = os.fsencode(TEST_XMLFILE)
  760. parser = create_parser()
  761. result = BytesIO()
  762. xmlgen = XMLGenerator(result)
  763. parser.setContentHandler(xmlgen)
  764. with open(fname, 'rb') as f:
  765. parser.parse(f)
  766. self.assertEqual(result.getvalue(), xml_test_out)
  767. def test_expat_binary_file_int_name(self):
  768. parser = create_parser()
  769. result = BytesIO()
  770. xmlgen = XMLGenerator(result)
  771. parser.setContentHandler(xmlgen)
  772. with open(TEST_XMLFILE, 'rb') as f:
  773. with open(f.fileno(), 'rb', closefd=False) as f2:
  774. parser.parse(f2)
  775. self.assertEqual(result.getvalue(), xml_test_out)
  776. # ===== DTDHandler support
  777. class TestDTDHandler:
  778. def __init__(self):
  779. self._notations = []
  780. self._entities = []
  781. def notationDecl(self, name, publicId, systemId):
  782. self._notations.append((name, publicId, systemId))
  783. def unparsedEntityDecl(self, name, publicId, systemId, ndata):
  784. self._entities.append((name, publicId, systemId, ndata))
  785. class TestEntityRecorder:
  786. def __init__(self):
  787. self.entities = []
  788. def resolveEntity(self, publicId, systemId):
  789. self.entities.append((publicId, systemId))
  790. source = InputSource()
  791. source.setPublicId(publicId)
  792. source.setSystemId(systemId)
  793. return source
  794. def test_expat_dtdhandler(self):
  795. parser = create_parser()
  796. handler = self.TestDTDHandler()
  797. parser.setDTDHandler(handler)
  798. parser.feed('<!DOCTYPE doc [\n')
  799. parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
  800. parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
  801. parser.feed(']>\n')
  802. parser.feed('<doc></doc>')
  803. parser.close()
  804. self.assertEqual(handler._notations,
  805. [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
  806. self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
  807. def test_expat_external_dtd_enabled(self):
  808. # clear _opener global variable
  809. self.addCleanup(urllib.request.urlcleanup)
  810. parser = create_parser()
  811. parser.setFeature(feature_external_ges, True)
  812. resolver = self.TestEntityRecorder()
  813. parser.setEntityResolver(resolver)
  814. with self.assertRaises(URLError):
  815. parser.feed(
  816. '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
  817. )
  818. self.assertEqual(
  819. resolver.entities, [(None, 'unsupported://non-existing')]
  820. )
  821. def test_expat_external_dtd_default(self):
  822. parser = create_parser()
  823. resolver = self.TestEntityRecorder()
  824. parser.setEntityResolver(resolver)
  825. parser.feed(
  826. '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
  827. )
  828. parser.feed('<doc />')
  829. parser.close()
  830. self.assertEqual(resolver.entities, [])
  831. # ===== EntityResolver support
  832. class TestEntityResolver:
  833. def resolveEntity(self, publicId, systemId):
  834. inpsrc = InputSource()
  835. inpsrc.setByteStream(BytesIO(b"<entity/>"))
  836. return inpsrc
  837. def test_expat_entityresolver_enabled(self):
  838. parser = create_parser()
  839. parser.setFeature(feature_external_ges, True)
  840. parser.setEntityResolver(self.TestEntityResolver())
  841. result = BytesIO()
  842. parser.setContentHandler(XMLGenerator(result))
  843. parser.feed('<!DOCTYPE doc [\n')
  844. parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
  845. parser.feed(']>\n')
  846. parser.feed('<doc>&test;</doc>')
  847. parser.close()
  848. self.assertEqual(result.getvalue(), start +
  849. b"<doc><entity></entity></doc>")
  850. def test_expat_entityresolver_default(self):
  851. parser = create_parser()
  852. self.assertEqual(parser.getFeature(feature_external_ges), False)
  853. parser.setEntityResolver(self.TestEntityResolver())
  854. result = BytesIO()
  855. parser.setContentHandler(XMLGenerator(result))
  856. parser.feed('<!DOCTYPE doc [\n')
  857. parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
  858. parser.feed(']>\n')
  859. parser.feed('<doc>&test;</doc>')
  860. parser.close()
  861. self.assertEqual(result.getvalue(), start +
  862. b"<doc></doc>")
  863. # ===== Attributes support
  864. class AttrGatherer(ContentHandler):
  865. def startElement(self, name, attrs):
  866. self._attrs = attrs
  867. def startElementNS(self, name, qname, attrs):
  868. self._attrs = attrs
  869. def test_expat_attrs_empty(self):
  870. parser = create_parser()
  871. gather = self.AttrGatherer()
  872. parser.setContentHandler(gather)
  873. parser.feed("<doc/>")
  874. parser.close()
  875. self.verify_empty_attrs(gather._attrs)
  876. def test_expat_attrs_wattr(self):
  877. parser = create_parser()
  878. gather = self.AttrGatherer()
  879. parser.setContentHandler(gather)
  880. parser.feed("<doc attr='val'/>")
  881. parser.close()
  882. self.verify_attrs_wattr(gather._attrs)
  883. def test_expat_nsattrs_empty(self):
  884. parser = create_parser(1)
  885. gather = self.AttrGatherer()
  886. parser.setContentHandler(gather)
  887. parser.feed("<doc/>")
  888. parser.close()
  889. self.verify_empty_nsattrs(gather._attrs)
  890. def test_expat_nsattrs_wattr(self):
  891. parser = create_parser(1)
  892. gather = self.AttrGatherer()
  893. parser.setContentHandler(gather)
  894. parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
  895. parser.close()
  896. attrs = gather._attrs
  897. self.assertEqual(attrs.getLength(), 1)
  898. self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
  899. self.assertTrue((attrs.getQNames() == [] or
  900. attrs.getQNames() == ["ns:attr"]))
  901. self.assertEqual(len(attrs), 1)
  902. self.assertIn((ns_uri, "attr"), attrs)
  903. self.assertEqual(attrs.get((ns_uri, "attr")), "val")
  904. self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
  905. self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
  906. self.assertEqual(list(attrs.values()), ["val"])
  907. self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
  908. self.assertEqual(attrs[(ns_uri, "attr")], "val")
  909. # ===== InputSource support
  910. def test_expat_inpsource_filename(self):
  911. parser = create_parser()
  912. result = BytesIO()
  913. xmlgen = XMLGenerator(result)
  914. parser.setContentHandler(xmlgen)
  915. parser.parse(TEST_XMLFILE)
  916. self.assertEqual(result.getvalue(), xml_test_out)
  917. def test_expat_inpsource_sysid(self):
  918. parser = create_parser()
  919. result = BytesIO()
  920. xmlgen = XMLGenerator(result)
  921. parser.setContentHandler(xmlgen)
  922. parser.parse(InputSource(TEST_XMLFILE))
  923. self.assertEqual(result.getvalue(), xml_test_out)
  924. @requires_nonascii_filenames
  925. def test_expat_inpsource_sysid_nonascii(self):
  926. fname = os_helper.TESTFN_UNICODE
  927. shutil.copyfile(TEST_XMLFILE, fname)
  928. self.addCleanup(os_helper.unlink, fname)
  929. parser = create_parser()
  930. result = BytesIO()
  931. xmlgen = XMLGenerator(result)
  932. parser.setContentHandler(xmlgen)
  933. parser.parse(InputSource(fname))
  934. self.assertEqual(result.getvalue(), xml_test_out)
  935. def test_expat_inpsource_byte_stream(self):
  936. parser = create_parser()
  937. result = BytesIO()
  938. xmlgen = XMLGenerator(result)
  939. parser.setContentHandler(xmlgen)
  940. inpsrc = InputSource()
  941. with open(TEST_XMLFILE, 'rb') as f:
  942. inpsrc.setByteStream(f)
  943. parser.parse(inpsrc)
  944. self.assertEqual(result.getvalue(), xml_test_out)
  945. def test_expat_inpsource_character_stream(self):
  946. parser = create_parser()
  947. result = BytesIO()
  948. xmlgen = XMLGenerator(result)
  949. parser.setContentHandler(xmlgen)
  950. inpsrc = InputSource()
  951. with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
  952. inpsrc.setCharacterStream(f)
  953. parser.parse(inpsrc)
  954. self.assertEqual(result.getvalue(), xml_test_out)
  955. # ===== IncrementalParser support
  956. def test_expat_incremental(self):
  957. result = BytesIO()
  958. xmlgen = XMLGenerator(result)
  959. parser = create_parser()
  960. parser.setContentHandler(xmlgen)
  961. parser.feed("<doc>")
  962. parser.feed("</doc>")
  963. parser.close()
  964. self.assertEqual(result.getvalue(), start + b"<doc></doc>")
  965. def test_expat_incremental_reset(self):
  966. result = BytesIO()
  967. xmlgen = XMLGenerator(result)
  968. parser = create_parser()
  969. parser.setContentHandler(xmlgen)
  970. parser.feed("<doc>")
  971. parser.feed("text")
  972. result = BytesIO()
  973. xmlgen = XMLGenerator(result)
  974. parser.setContentHandler(xmlgen)
  975. parser.reset()
  976. parser.feed("<doc>")
  977. parser.feed("text")
  978. parser.feed("</doc>")
  979. parser.close()
  980. self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
  981. # ===== Locator support
  982. def test_expat_locator_noinfo(self):
  983. result = BytesIO()
  984. xmlgen = XMLGenerator(result)
  985. parser = create_parser()
  986. parser.setContentHandler(xmlgen)
  987. parser.feed("<doc>")
  988. parser.feed("</doc>")
  989. parser.close()
  990. self.assertEqual(parser.getSystemId(), None)
  991. self.assertEqual(parser.getPublicId(), None)
  992. self.assertEqual(parser.getLineNumber(), 1)
  993. def test_expat_locator_withinfo(self):
  994. result = BytesIO()
  995. xmlgen = XMLGenerator(result)
  996. parser = create_parser()
  997. parser.setContentHandler(xmlgen)
  998. parser.parse(TEST_XMLFILE)
  999. self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
  1000. self.assertEqual(parser.getPublicId(), None)
  1001. @requires_nonascii_filenames
  1002. def test_expat_locator_withinfo_nonascii(self):
  1003. fname = os_helper.TESTFN_UNICODE
  1004. shutil.copyfile(TEST_XMLFILE, fname)
  1005. self.addCleanup(os_helper.unlink, fname)
  1006. result = BytesIO()
  1007. xmlgen = XMLGenerator(result)
  1008. parser = create_parser()
  1009. parser.setContentHandler(xmlgen)
  1010. parser.parse(fname)
  1011. self.assertEqual(parser.getSystemId(), fname)
  1012. self.assertEqual(parser.getPublicId(), None)
  1013. # ===========================================================================
  1014. #
  1015. # error reporting
  1016. #
  1017. # ===========================================================================
  1018. class ErrorReportingTest(unittest.TestCase):
  1019. def test_expat_inpsource_location(self):
  1020. parser = create_parser()
  1021. parser.setContentHandler(ContentHandler()) # do nothing
  1022. source = InputSource()
  1023. source.setByteStream(BytesIO(b"<foo bar foobar>")) #ill-formed
  1024. name = "a file name"
  1025. source.setSystemId(name)
  1026. try:
  1027. parser.parse(source)
  1028. self.fail()
  1029. except SAXException as e:
  1030. self.assertEqual(e.getSystemId(), name)
  1031. def test_expat_incomplete(self):
  1032. parser = create_parser()
  1033. parser.setContentHandler(ContentHandler()) # do nothing
  1034. self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
  1035. self.assertEqual(parser.getColumnNumber(), 5)
  1036. self.assertEqual(parser.getLineNumber(), 1)
  1037. def test_sax_parse_exception_str(self):
  1038. # pass various values from a locator to the SAXParseException to
  1039. # make sure that the __str__() doesn't fall apart when None is
  1040. # passed instead of an integer line and column number
  1041. #
  1042. # use "normal" values for the locator:
  1043. str(SAXParseException("message", None,
  1044. self.DummyLocator(1, 1)))
  1045. # use None for the line number:
  1046. str(SAXParseException("message", None,
  1047. self.DummyLocator(None, 1)))
  1048. # use None for the column number:
  1049. str(SAXParseException("message", None,
  1050. self.DummyLocator(1, None)))
  1051. # use None for both:
  1052. str(SAXParseException("message", None,
  1053. self.DummyLocator(None, None)))
  1054. class DummyLocator:
  1055. def __init__(self, lineno, colno):
  1056. self._lineno = lineno
  1057. self._colno = colno
  1058. def getPublicId(self):
  1059. return "pubid"
  1060. def getSystemId(self):
  1061. return "sysid"
  1062. def getLineNumber(self):
  1063. return self._lineno
  1064. def getColumnNumber(self):
  1065. return self._colno
  1066. # ===========================================================================
  1067. #
  1068. # xmlreader tests
  1069. #
  1070. # ===========================================================================
  1071. class XmlReaderTest(XmlTestBase):
  1072. # ===== AttributesImpl
  1073. def test_attrs_empty(self):
  1074. self.verify_empty_attrs(AttributesImpl({}))
  1075. def test_attrs_wattr(self):
  1076. self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
  1077. def test_nsattrs_empty(self):
  1078. self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
  1079. def test_nsattrs_wattr(self):
  1080. attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
  1081. {(ns_uri, "attr") : "ns:attr"})
  1082. self.assertEqual(attrs.getLength(), 1)
  1083. self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
  1084. self.assertEqual(attrs.getQNames(), ["ns:attr"])
  1085. self.assertEqual(len(attrs), 1)
  1086. self.assertIn((ns_uri, "attr"), attrs)
  1087. self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
  1088. self.assertEqual(attrs.get((ns_uri, "attr")), "val")
  1089. self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
  1090. self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
  1091. self.assertEqual(list(attrs.values()), ["val"])
  1092. self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
  1093. self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
  1094. self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
  1095. self.assertEqual(attrs[(ns_uri, "attr")], "val")
  1096. self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
  1097. class LexicalHandlerTest(unittest.TestCase):
  1098. def setUp(self):
  1099. self.parser = None
  1100. self.specified_version = '1.0'
  1101. self.specified_encoding = 'UTF-8'
  1102. self.specified_doctype = 'wish'
  1103. self.specified_entity_names = ('nbsp', 'source', 'target')
  1104. self.specified_comment = ('Comment in a DTD',
  1105. 'Really! You think so?')
  1106. self.test_data = StringIO()
  1107. self.test_data.write('<?xml version="{}" encoding="{}"?>\n'.
  1108. format(self.specified_version,
  1109. self.specified_encoding))
  1110. self.test_data.write('<!DOCTYPE {} [\n'.
  1111. format(self.specified_doctype))
  1112. self.test_data.write('<!-- {} -->\n'.
  1113. format(self.specified_comment[0]))
  1114. self.test_data.write('<!ELEMENT {} (to,from,heading,body,footer)>\n'.
  1115. format(self.specified_doctype))
  1116. self.test_data.write('<!ELEMENT to (#PCDATA)>\n')
  1117. self.test_data.write('<!ELEMENT from (#PCDATA)>\n')
  1118. self.test_data.write('<!ELEMENT heading (#PCDATA)>\n')
  1119. self.test_data.write('<!ELEMENT body (#PCDATA)>\n')
  1120. self.test_data.write('<!ELEMENT footer (#PCDATA)>\n')
  1121. self.test_data.write('<!ENTITY {} "&#xA0;">\n'.
  1122. format(self.specified_entity_names[0]))
  1123. self.test_data.write('<!ENTITY {} "Written by: Alexander.">\n'.
  1124. format(self.specified_entity_names[1]))
  1125. self.test_data.write('<!ENTITY {} "Hope it gets to: Aristotle.">\n'.
  1126. format(self.specified_entity_names[2]))
  1127. self.test_data.write(']>\n')
  1128. self.test_data.write('<{}>'.format(self.specified_doctype))
  1129. self.test_data.write('<to>Aristotle</to>\n')
  1130. self.test_data.write('<from>Alexander</from>\n')
  1131. self.test_data.write('<heading>Supplication</heading>\n')
  1132. self.test_data.write('<body>Teach me patience!</body>\n')
  1133. self.test_data.write('<footer>&{};&{};&{};</footer>\n'.
  1134. format(self.specified_entity_names[1],
  1135. self.specified_entity_names[0],
  1136. self.specified_entity_names[2]))
  1137. self.test_data.write('<!-- {} -->\n'.format(self.specified_comment[1]))
  1138. self.test_data.write('</{}>\n'.format(self.specified_doctype))
  1139. self.test_data.seek(0)
  1140. # Data received from handlers - to be validated
  1141. self.version = None
  1142. self.encoding = None
  1143. self.standalone = None
  1144. self.doctype = None
  1145. self.publicID = None
  1146. self.systemID = None
  1147. self.end_of_dtd = False
  1148. self.comments = []
  1149. def test_handlers(self):
  1150. class TestLexicalHandler(LexicalHandler):
  1151. def __init__(self, test_harness, *args, **kwargs):
  1152. super().__init__(*args, **kwargs)
  1153. self.test_harness = test_harness
  1154. def startDTD(self, doctype, publicID, systemID):
  1155. self.test_harness.doctype = doctype
  1156. self.test_harness.publicID = publicID
  1157. self.test_harness.systemID = systemID
  1158. def endDTD(self):
  1159. self.test_harness.end_of_dtd = True
  1160. def comment(self, text):
  1161. self.test_harness.comments.append(text)
  1162. self.parser = create_parser()
  1163. self.parser.setContentHandler(ContentHandler())
  1164. self.parser.setProperty(
  1165. 'http://xml.org/sax/properties/lexical-handler',
  1166. TestLexicalHandler(self))
  1167. source = InputSource()
  1168. source.setCharacterStream(self.test_data)
  1169. self.parser.parse(source)
  1170. self.assertEqual(self.doctype, self.specified_doctype)
  1171. self.assertIsNone(self.publicID)
  1172. self.assertIsNone(self.systemID)
  1173. self.assertTrue(self.end_of_dtd)
  1174. self.assertEqual(len(self.comments),
  1175. len(self.specified_comment))
  1176. self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0])
  1177. class CDATAHandlerTest(unittest.TestCase):
  1178. def setUp(self):
  1179. self.parser = None
  1180. self.specified_chars = []
  1181. self.specified_chars.append(('Parseable character data', False))
  1182. self.specified_chars.append(('<> &% - assorted other XML junk.', True))
  1183. self.char_index = 0 # Used to index specified results within handlers
  1184. self.test_data = StringIO()
  1185. self.test_data.write('<root_doc>\n')
  1186. self.test_data.write('<some_pcdata>\n')
  1187. self.test_data.write(f'{self.specified_chars[0][0]}\n')
  1188. self.test_data.write('</some_pcdata>\n')
  1189. self.test_data.write('<some_cdata>\n')
  1190. self.test_data.write(f'<![CDATA[{self.specified_chars[1][0]}]]>\n')
  1191. self.test_data.write('</some_cdata>\n')
  1192. self.test_data.write('</root_doc>\n')
  1193. self.test_data.seek(0)
  1194. # Data received from handlers - to be validated
  1195. self.chardata = []
  1196. self.in_cdata = False
  1197. def test_handlers(self):
  1198. class TestLexicalHandler(LexicalHandler):
  1199. def __init__(self, test_harness, *args, **kwargs):
  1200. super().__init__(*args, **kwargs)
  1201. self.test_harness = test_harness
  1202. def startCDATA(self):
  1203. self.test_harness.in_cdata = True
  1204. def endCDATA(self):
  1205. self.test_harness.in_cdata = False
  1206. class TestCharHandler(ContentHandler):
  1207. def __init__(self, test_harness, *args, **kwargs):
  1208. super().__init__(*args, **kwargs)
  1209. self.test_harness = test_harness
  1210. def characters(self, content):
  1211. if content != '\n':
  1212. h = self.test_harness
  1213. t = h.specified_chars[h.char_index]
  1214. h.assertEqual(t[0], content)
  1215. h.assertEqual(t[1], h.in_cdata)
  1216. h.char_index += 1
  1217. self.parser = create_parser()
  1218. self.parser.setContentHandler(TestCharHandler(self))
  1219. self.parser.setProperty(
  1220. 'http://xml.org/sax/properties/lexical-handler',
  1221. TestLexicalHandler(self))
  1222. source = InputSource()
  1223. source.setCharacterStream(self.test_data)
  1224. self.parser.parse(source)
  1225. self.assertFalse(self.in_cdata)
  1226. self.assertEqual(self.char_index, 2)
  1227. if __name__ == "__main__":
  1228. unittest.main()