test_pulldom.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. import io
  2. import unittest
  3. import xml.sax
  4. from xml.sax.xmlreader import AttributesImpl
  5. from xml.sax.handler import feature_external_ges
  6. from xml.dom import pulldom
  7. from test.support import findfile
  8. tstfile = findfile("test.xml", subdir="xmltestdata")
  9. # A handy XML snippet, containing attributes, a namespace prefix, and a
  10. # self-closing tag:
  11. SMALL_SAMPLE = """<?xml version="1.0"?>
  12. <html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
  13. <!-- A comment -->
  14. <title>Introduction to XSL</title>
  15. <hr/>
  16. <p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
  17. </html>"""
  18. class PullDOMTestCase(unittest.TestCase):
  19. def test_parse(self):
  20. """Minimal test of DOMEventStream.parse()"""
  21. # This just tests that parsing from a stream works. Actual parser
  22. # semantics are tested using parseString with a more focused XML
  23. # fragment.
  24. # Test with a filename:
  25. handler = pulldom.parse(tstfile)
  26. self.addCleanup(handler.stream.close)
  27. list(handler)
  28. # Test with a file object:
  29. with open(tstfile, "rb") as fin:
  30. list(pulldom.parse(fin))
  31. def test_parse_semantics(self):
  32. """Test DOMEventStream parsing semantics."""
  33. items = pulldom.parseString(SMALL_SAMPLE)
  34. evt, node = next(items)
  35. # Just check the node is a Document:
  36. self.assertTrue(hasattr(node, "createElement"))
  37. self.assertEqual(pulldom.START_DOCUMENT, evt)
  38. evt, node = next(items)
  39. self.assertEqual(pulldom.START_ELEMENT, evt)
  40. self.assertEqual("html", node.tagName)
  41. self.assertEqual(2, len(node.attributes))
  42. self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
  43. "http://www.xml.com/books")
  44. evt, node = next(items)
  45. self.assertEqual(pulldom.CHARACTERS, evt) # Line break
  46. evt, node = next(items)
  47. # XXX - A comment should be reported here!
  48. # self.assertEqual(pulldom.COMMENT, evt)
  49. # Line break after swallowed comment:
  50. self.assertEqual(pulldom.CHARACTERS, evt)
  51. evt, node = next(items)
  52. self.assertEqual("title", node.tagName)
  53. title_node = node
  54. evt, node = next(items)
  55. self.assertEqual(pulldom.CHARACTERS, evt)
  56. self.assertEqual("Introduction to XSL", node.data)
  57. evt, node = next(items)
  58. self.assertEqual(pulldom.END_ELEMENT, evt)
  59. self.assertEqual("title", node.tagName)
  60. self.assertTrue(title_node is node)
  61. evt, node = next(items)
  62. self.assertEqual(pulldom.CHARACTERS, evt)
  63. evt, node = next(items)
  64. self.assertEqual(pulldom.START_ELEMENT, evt)
  65. self.assertEqual("hr", node.tagName)
  66. evt, node = next(items)
  67. self.assertEqual(pulldom.END_ELEMENT, evt)
  68. self.assertEqual("hr", node.tagName)
  69. evt, node = next(items)
  70. self.assertEqual(pulldom.CHARACTERS, evt)
  71. evt, node = next(items)
  72. self.assertEqual(pulldom.START_ELEMENT, evt)
  73. self.assertEqual("p", node.tagName)
  74. evt, node = next(items)
  75. self.assertEqual(pulldom.START_ELEMENT, evt)
  76. self.assertEqual("xdc:author", node.tagName)
  77. evt, node = next(items)
  78. self.assertEqual(pulldom.CHARACTERS, evt)
  79. evt, node = next(items)
  80. self.assertEqual(pulldom.END_ELEMENT, evt)
  81. self.assertEqual("xdc:author", node.tagName)
  82. evt, node = next(items)
  83. self.assertEqual(pulldom.END_ELEMENT, evt)
  84. evt, node = next(items)
  85. self.assertEqual(pulldom.CHARACTERS, evt)
  86. evt, node = next(items)
  87. self.assertEqual(pulldom.END_ELEMENT, evt)
  88. # XXX No END_DOCUMENT item is ever obtained:
  89. #evt, node = next(items)
  90. #self.assertEqual(pulldom.END_DOCUMENT, evt)
  91. def test_expandItem(self):
  92. """Ensure expandItem works as expected."""
  93. items = pulldom.parseString(SMALL_SAMPLE)
  94. # Loop through the nodes until we get to a "title" start tag:
  95. for evt, item in items:
  96. if evt == pulldom.START_ELEMENT and item.tagName == "title":
  97. items.expandNode(item)
  98. self.assertEqual(1, len(item.childNodes))
  99. break
  100. else:
  101. self.fail("No \"title\" element detected in SMALL_SAMPLE!")
  102. # Loop until we get to the next start-element:
  103. for evt, node in items:
  104. if evt == pulldom.START_ELEMENT:
  105. break
  106. self.assertEqual("hr", node.tagName,
  107. "expandNode did not leave DOMEventStream in the correct state.")
  108. # Attempt to expand a standalone element:
  109. items.expandNode(node)
  110. self.assertEqual(next(items)[0], pulldom.CHARACTERS)
  111. evt, node = next(items)
  112. self.assertEqual(node.tagName, "p")
  113. items.expandNode(node)
  114. next(items) # Skip character data
  115. evt, node = next(items)
  116. self.assertEqual(node.tagName, "html")
  117. with self.assertRaises(StopIteration):
  118. next(items)
  119. items.clear()
  120. self.assertIsNone(items.parser)
  121. self.assertIsNone(items.stream)
  122. @unittest.expectedFailure
  123. def test_comment(self):
  124. """PullDOM does not receive "comment" events."""
  125. items = pulldom.parseString(SMALL_SAMPLE)
  126. for evt, _ in items:
  127. if evt == pulldom.COMMENT:
  128. break
  129. else:
  130. self.fail("No comment was encountered")
  131. @unittest.expectedFailure
  132. def test_end_document(self):
  133. """PullDOM does not receive "end-document" events."""
  134. items = pulldom.parseString(SMALL_SAMPLE)
  135. # Read all of the nodes up to and including </html>:
  136. for evt, node in items:
  137. if evt == pulldom.END_ELEMENT and node.tagName == "html":
  138. break
  139. try:
  140. # Assert that the next node is END_DOCUMENT:
  141. evt, node = next(items)
  142. self.assertEqual(pulldom.END_DOCUMENT, evt)
  143. except StopIteration:
  144. self.fail(
  145. "Ran out of events, but should have received END_DOCUMENT")
  146. def test_external_ges_default(self):
  147. parser = pulldom.parseString(SMALL_SAMPLE)
  148. saxparser = parser.parser
  149. ges = saxparser.getFeature(feature_external_ges)
  150. self.assertEqual(ges, False)
  151. class ThoroughTestCase(unittest.TestCase):
  152. """Test the hard-to-reach parts of pulldom."""
  153. def test_thorough_parse(self):
  154. """Test some of the hard-to-reach parts of PullDOM."""
  155. self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
  156. @unittest.expectedFailure
  157. def test_sax2dom_fail(self):
  158. """SAX2DOM can"t handle a PI before the root element."""
  159. pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
  160. self._test_thorough(pd)
  161. def test_thorough_sax2dom(self):
  162. """Test some of the hard-to-reach parts of SAX2DOM."""
  163. pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
  164. self._test_thorough(pd, False)
  165. def _test_thorough(self, pd, before_root=True):
  166. """Test some of the hard-to-reach parts of the parser, using a mock
  167. parser."""
  168. evt, node = next(pd)
  169. self.assertEqual(pulldom.START_DOCUMENT, evt)
  170. # Just check the node is a Document:
  171. self.assertTrue(hasattr(node, "createElement"))
  172. if before_root:
  173. evt, node = next(pd)
  174. self.assertEqual(pulldom.COMMENT, evt)
  175. self.assertEqual("a comment", node.data)
  176. evt, node = next(pd)
  177. self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
  178. self.assertEqual("target", node.target)
  179. self.assertEqual("data", node.data)
  180. evt, node = next(pd)
  181. self.assertEqual(pulldom.START_ELEMENT, evt)
  182. self.assertEqual("html", node.tagName)
  183. evt, node = next(pd)
  184. self.assertEqual(pulldom.COMMENT, evt)
  185. self.assertEqual("a comment", node.data)
  186. evt, node = next(pd)
  187. self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
  188. self.assertEqual("target", node.target)
  189. self.assertEqual("data", node.data)
  190. evt, node = next(pd)
  191. self.assertEqual(pulldom.START_ELEMENT, evt)
  192. self.assertEqual("p", node.tagName)
  193. evt, node = next(pd)
  194. self.assertEqual(pulldom.CHARACTERS, evt)
  195. self.assertEqual("text", node.data)
  196. evt, node = next(pd)
  197. self.assertEqual(pulldom.END_ELEMENT, evt)
  198. self.assertEqual("p", node.tagName)
  199. evt, node = next(pd)
  200. self.assertEqual(pulldom.END_ELEMENT, evt)
  201. self.assertEqual("html", node.tagName)
  202. evt, node = next(pd)
  203. self.assertEqual(pulldom.END_DOCUMENT, evt)
  204. class SAXExerciser(object):
  205. """A fake sax parser that calls some of the harder-to-reach sax methods to
  206. ensure it emits the correct events"""
  207. def setContentHandler(self, handler):
  208. self._handler = handler
  209. def parse(self, _):
  210. h = self._handler
  211. h.startDocument()
  212. # The next two items ensure that items preceding the first
  213. # start_element are properly stored and emitted:
  214. h.comment("a comment")
  215. h.processingInstruction("target", "data")
  216. h.startElement("html", AttributesImpl({}))
  217. h.comment("a comment")
  218. h.processingInstruction("target", "data")
  219. h.startElement("p", AttributesImpl({"class": "paraclass"}))
  220. h.characters("text")
  221. h.endElement("p")
  222. h.endElement("html")
  223. h.endDocument()
  224. def stub(self, *args, **kwargs):
  225. """Stub method. Does nothing."""
  226. pass
  227. setProperty = stub
  228. setFeature = stub
  229. class SAX2DOMExerciser(SAXExerciser):
  230. """The same as SAXExerciser, but without the processing instruction and
  231. comment before the root element, because S2D can"t handle it"""
  232. def parse(self, _):
  233. h = self._handler
  234. h.startDocument()
  235. h.startElement("html", AttributesImpl({}))
  236. h.comment("a comment")
  237. h.processingInstruction("target", "data")
  238. h.startElement("p", AttributesImpl({"class": "paraclass"}))
  239. h.characters("text")
  240. h.endElement("p")
  241. h.endElement("html")
  242. h.endDocument()
  243. class SAX2DOMTestHelper(pulldom.DOMEventStream):
  244. """Allows us to drive SAX2DOM from a DOMEventStream."""
  245. def reset(self):
  246. self.pulldom = pulldom.SAX2DOM()
  247. # This content handler relies on namespace support
  248. self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
  249. self.parser.setContentHandler(self.pulldom)
  250. class SAX2DOMTestCase(unittest.TestCase):
  251. def confirm(self, test, testname="Test"):
  252. self.assertTrue(test, testname)
  253. def test_basic(self):
  254. """Ensure SAX2DOM can parse from a stream."""
  255. with io.StringIO(SMALL_SAMPLE) as fin:
  256. sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
  257. len(SMALL_SAMPLE))
  258. for evt, node in sd:
  259. if evt == pulldom.START_ELEMENT and node.tagName == "html":
  260. break
  261. # Because the buffer is the same length as the XML, all the
  262. # nodes should have been parsed and added:
  263. self.assertGreater(len(node.childNodes), 0)
  264. def testSAX2DOM(self):
  265. """Ensure SAX2DOM expands nodes as expected."""
  266. sax2dom = pulldom.SAX2DOM()
  267. sax2dom.startDocument()
  268. sax2dom.startElement("doc", {})
  269. sax2dom.characters("text")
  270. sax2dom.startElement("subelm", {})
  271. sax2dom.characters("text")
  272. sax2dom.endElement("subelm")
  273. sax2dom.characters("text")
  274. sax2dom.endElement("doc")
  275. sax2dom.endDocument()
  276. doc = sax2dom.document
  277. root = doc.documentElement
  278. (text1, elm1, text2) = root.childNodes
  279. text3 = elm1.childNodes[0]
  280. self.assertIsNone(text1.previousSibling)
  281. self.assertIs(text1.nextSibling, elm1)
  282. self.assertIs(elm1.previousSibling, text1)
  283. self.assertIs(elm1.nextSibling, text2)
  284. self.assertIs(text2.previousSibling, elm1)
  285. self.assertIsNone(text2.nextSibling)
  286. self.assertIsNone(text3.previousSibling)
  287. self.assertIsNone(text3.nextSibling)
  288. self.assertIs(root.parentNode, doc)
  289. self.assertIs(text1.parentNode, root)
  290. self.assertIs(elm1.parentNode, root)
  291. self.assertIs(text2.parentNode, root)
  292. self.assertIs(text3.parentNode, elm1)
  293. doc.unlink()
  294. if __name__ == "__main__":
  295. unittest.main()