| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349 |
- import io
- import unittest
- import xml.sax
- from xml.sax.xmlreader import AttributesImpl
- from xml.sax.handler import feature_external_ges
- from xml.dom import pulldom
- from test.support import findfile
- tstfile = findfile("test.xml", subdir="xmltestdata")
- # A handy XML snippet, containing attributes, a namespace prefix, and a
- # self-closing tag:
- SMALL_SAMPLE = """<?xml version="1.0"?>
- <html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
- <!-- A comment -->
- <title>Introduction to XSL</title>
- <hr/>
- <p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
- </html>"""
- class PullDOMTestCase(unittest.TestCase):
- def test_parse(self):
- """Minimal test of DOMEventStream.parse()"""
- # This just tests that parsing from a stream works. Actual parser
- # semantics are tested using parseString with a more focused XML
- # fragment.
- # Test with a filename:
- handler = pulldom.parse(tstfile)
- self.addCleanup(handler.stream.close)
- list(handler)
- # Test with a file object:
- with open(tstfile, "rb") as fin:
- list(pulldom.parse(fin))
- def test_parse_semantics(self):
- """Test DOMEventStream parsing semantics."""
- items = pulldom.parseString(SMALL_SAMPLE)
- evt, node = next(items)
- # Just check the node is a Document:
- self.assertTrue(hasattr(node, "createElement"))
- self.assertEqual(pulldom.START_DOCUMENT, evt)
- evt, node = next(items)
- self.assertEqual(pulldom.START_ELEMENT, evt)
- self.assertEqual("html", node.tagName)
- self.assertEqual(2, len(node.attributes))
- self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
- "http://www.xml.com/books")
- evt, node = next(items)
- self.assertEqual(pulldom.CHARACTERS, evt) # Line break
- evt, node = next(items)
- # XXX - A comment should be reported here!
- # self.assertEqual(pulldom.COMMENT, evt)
- # Line break after swallowed comment:
- self.assertEqual(pulldom.CHARACTERS, evt)
- evt, node = next(items)
- self.assertEqual("title", node.tagName)
- title_node = node
- evt, node = next(items)
- self.assertEqual(pulldom.CHARACTERS, evt)
- self.assertEqual("Introduction to XSL", node.data)
- evt, node = next(items)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- self.assertEqual("title", node.tagName)
- self.assertTrue(title_node is node)
- evt, node = next(items)
- self.assertEqual(pulldom.CHARACTERS, evt)
- evt, node = next(items)
- self.assertEqual(pulldom.START_ELEMENT, evt)
- self.assertEqual("hr", node.tagName)
- evt, node = next(items)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- self.assertEqual("hr", node.tagName)
- evt, node = next(items)
- self.assertEqual(pulldom.CHARACTERS, evt)
- evt, node = next(items)
- self.assertEqual(pulldom.START_ELEMENT, evt)
- self.assertEqual("p", node.tagName)
- evt, node = next(items)
- self.assertEqual(pulldom.START_ELEMENT, evt)
- self.assertEqual("xdc:author", node.tagName)
- evt, node = next(items)
- self.assertEqual(pulldom.CHARACTERS, evt)
- evt, node = next(items)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- self.assertEqual("xdc:author", node.tagName)
- evt, node = next(items)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- evt, node = next(items)
- self.assertEqual(pulldom.CHARACTERS, evt)
- evt, node = next(items)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- # XXX No END_DOCUMENT item is ever obtained:
- #evt, node = next(items)
- #self.assertEqual(pulldom.END_DOCUMENT, evt)
- def test_expandItem(self):
- """Ensure expandItem works as expected."""
- items = pulldom.parseString(SMALL_SAMPLE)
- # Loop through the nodes until we get to a "title" start tag:
- for evt, item in items:
- if evt == pulldom.START_ELEMENT and item.tagName == "title":
- items.expandNode(item)
- self.assertEqual(1, len(item.childNodes))
- break
- else:
- self.fail("No \"title\" element detected in SMALL_SAMPLE!")
- # Loop until we get to the next start-element:
- for evt, node in items:
- if evt == pulldom.START_ELEMENT:
- break
- self.assertEqual("hr", node.tagName,
- "expandNode did not leave DOMEventStream in the correct state.")
- # Attempt to expand a standalone element:
- items.expandNode(node)
- self.assertEqual(next(items)[0], pulldom.CHARACTERS)
- evt, node = next(items)
- self.assertEqual(node.tagName, "p")
- items.expandNode(node)
- next(items) # Skip character data
- evt, node = next(items)
- self.assertEqual(node.tagName, "html")
- with self.assertRaises(StopIteration):
- next(items)
- items.clear()
- self.assertIsNone(items.parser)
- self.assertIsNone(items.stream)
- @unittest.expectedFailure
- def test_comment(self):
- """PullDOM does not receive "comment" events."""
- items = pulldom.parseString(SMALL_SAMPLE)
- for evt, _ in items:
- if evt == pulldom.COMMENT:
- break
- else:
- self.fail("No comment was encountered")
- @unittest.expectedFailure
- def test_end_document(self):
- """PullDOM does not receive "end-document" events."""
- items = pulldom.parseString(SMALL_SAMPLE)
- # Read all of the nodes up to and including </html>:
- for evt, node in items:
- if evt == pulldom.END_ELEMENT and node.tagName == "html":
- break
- try:
- # Assert that the next node is END_DOCUMENT:
- evt, node = next(items)
- self.assertEqual(pulldom.END_DOCUMENT, evt)
- except StopIteration:
- self.fail(
- "Ran out of events, but should have received END_DOCUMENT")
- def test_external_ges_default(self):
- parser = pulldom.parseString(SMALL_SAMPLE)
- saxparser = parser.parser
- ges = saxparser.getFeature(feature_external_ges)
- self.assertEqual(ges, False)
- class ThoroughTestCase(unittest.TestCase):
- """Test the hard-to-reach parts of pulldom."""
- def test_thorough_parse(self):
- """Test some of the hard-to-reach parts of PullDOM."""
- self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
- @unittest.expectedFailure
- def test_sax2dom_fail(self):
- """SAX2DOM can"t handle a PI before the root element."""
- pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
- self._test_thorough(pd)
- def test_thorough_sax2dom(self):
- """Test some of the hard-to-reach parts of SAX2DOM."""
- pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
- self._test_thorough(pd, False)
- def _test_thorough(self, pd, before_root=True):
- """Test some of the hard-to-reach parts of the parser, using a mock
- parser."""
- evt, node = next(pd)
- self.assertEqual(pulldom.START_DOCUMENT, evt)
- # Just check the node is a Document:
- self.assertTrue(hasattr(node, "createElement"))
- if before_root:
- evt, node = next(pd)
- self.assertEqual(pulldom.COMMENT, evt)
- self.assertEqual("a comment", node.data)
- evt, node = next(pd)
- self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
- self.assertEqual("target", node.target)
- self.assertEqual("data", node.data)
- evt, node = next(pd)
- self.assertEqual(pulldom.START_ELEMENT, evt)
- self.assertEqual("html", node.tagName)
- evt, node = next(pd)
- self.assertEqual(pulldom.COMMENT, evt)
- self.assertEqual("a comment", node.data)
- evt, node = next(pd)
- self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
- self.assertEqual("target", node.target)
- self.assertEqual("data", node.data)
- evt, node = next(pd)
- self.assertEqual(pulldom.START_ELEMENT, evt)
- self.assertEqual("p", node.tagName)
- evt, node = next(pd)
- self.assertEqual(pulldom.CHARACTERS, evt)
- self.assertEqual("text", node.data)
- evt, node = next(pd)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- self.assertEqual("p", node.tagName)
- evt, node = next(pd)
- self.assertEqual(pulldom.END_ELEMENT, evt)
- self.assertEqual("html", node.tagName)
- evt, node = next(pd)
- self.assertEqual(pulldom.END_DOCUMENT, evt)
- class SAXExerciser(object):
- """A fake sax parser that calls some of the harder-to-reach sax methods to
- ensure it emits the correct events"""
- def setContentHandler(self, handler):
- self._handler = handler
- def parse(self, _):
- h = self._handler
- h.startDocument()
- # The next two items ensure that items preceding the first
- # start_element are properly stored and emitted:
- h.comment("a comment")
- h.processingInstruction("target", "data")
- h.startElement("html", AttributesImpl({}))
- h.comment("a comment")
- h.processingInstruction("target", "data")
- h.startElement("p", AttributesImpl({"class": "paraclass"}))
- h.characters("text")
- h.endElement("p")
- h.endElement("html")
- h.endDocument()
- def stub(self, *args, **kwargs):
- """Stub method. Does nothing."""
- pass
- setProperty = stub
- setFeature = stub
- class SAX2DOMExerciser(SAXExerciser):
- """The same as SAXExerciser, but without the processing instruction and
- comment before the root element, because S2D can"t handle it"""
- def parse(self, _):
- h = self._handler
- h.startDocument()
- h.startElement("html", AttributesImpl({}))
- h.comment("a comment")
- h.processingInstruction("target", "data")
- h.startElement("p", AttributesImpl({"class": "paraclass"}))
- h.characters("text")
- h.endElement("p")
- h.endElement("html")
- h.endDocument()
- class SAX2DOMTestHelper(pulldom.DOMEventStream):
- """Allows us to drive SAX2DOM from a DOMEventStream."""
- def reset(self):
- self.pulldom = pulldom.SAX2DOM()
- # This content handler relies on namespace support
- self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
- self.parser.setContentHandler(self.pulldom)
- class SAX2DOMTestCase(unittest.TestCase):
- def confirm(self, test, testname="Test"):
- self.assertTrue(test, testname)
- def test_basic(self):
- """Ensure SAX2DOM can parse from a stream."""
- with io.StringIO(SMALL_SAMPLE) as fin:
- sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
- len(SMALL_SAMPLE))
- for evt, node in sd:
- if evt == pulldom.START_ELEMENT and node.tagName == "html":
- break
- # Because the buffer is the same length as the XML, all the
- # nodes should have been parsed and added:
- self.assertGreater(len(node.childNodes), 0)
- def testSAX2DOM(self):
- """Ensure SAX2DOM expands nodes as expected."""
- sax2dom = pulldom.SAX2DOM()
- sax2dom.startDocument()
- sax2dom.startElement("doc", {})
- sax2dom.characters("text")
- sax2dom.startElement("subelm", {})
- sax2dom.characters("text")
- sax2dom.endElement("subelm")
- sax2dom.characters("text")
- sax2dom.endElement("doc")
- sax2dom.endDocument()
- doc = sax2dom.document
- root = doc.documentElement
- (text1, elm1, text2) = root.childNodes
- text3 = elm1.childNodes[0]
- self.assertIsNone(text1.previousSibling)
- self.assertIs(text1.nextSibling, elm1)
- self.assertIs(elm1.previousSibling, text1)
- self.assertIs(elm1.nextSibling, text2)
- self.assertIs(text2.previousSibling, elm1)
- self.assertIsNone(text2.nextSibling)
- self.assertIsNone(text3.previousSibling)
- self.assertIsNone(text3.nextSibling)
- self.assertIs(root.parentNode, doc)
- self.assertIs(text1.parentNode, root)
- self.assertIs(elm1.parentNode, root)
- self.assertIs(text2.parentNode, root)
- self.assertIs(text3.parentNode, elm1)
- doc.unlink()
- if __name__ == "__main__":
- unittest.main()
|