test_xml_etree.py 160 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295
  1. # IMPORTANT: the same tests are run from "test_xml_etree_c" in order
  2. # to ensure consistency between the C implementation and the Python
  3. # implementation.
  4. #
  5. # For this purpose, the module-level "ET" symbol is temporarily
  6. # monkey-patched when running the "test_xml_etree_c" test suite.
  7. import copy
  8. import functools
  9. import html
  10. import io
  11. import itertools
  12. import operator
  13. import os
  14. import pickle
  15. import sys
  16. import textwrap
  17. import types
  18. import unittest
  19. import warnings
  20. import weakref
  21. from functools import partial
  22. from itertools import product, islice
  23. from test import support
  24. from test.support import os_helper
  25. from test.support import warnings_helper
  26. from test.support import findfile, gc_collect, swap_attr, swap_item
  27. from test.support.import_helper import import_fresh_module
  28. from test.support.os_helper import TESTFN
  29. # pyET is the pure-Python implementation.
  30. #
  31. # ET is pyET in test_xml_etree and is the C accelerated version in
  32. # test_xml_etree_c.
  33. pyET = None
  34. ET = None
  35. SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
  36. try:
  37. SIMPLE_XMLFILE.encode("utf-8")
  38. except UnicodeEncodeError:
  39. raise unittest.SkipTest("filename is not encodable to utf8")
  40. SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
  41. UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
  42. SAMPLE_XML = """\
  43. <body>
  44. <tag class='a'>text</tag>
  45. <tag class='b' />
  46. <section>
  47. <tag class='b' id='inner'>subtext</tag>
  48. </section>
  49. </body>
  50. """
  51. SAMPLE_SECTION = """\
  52. <section>
  53. <tag class='b' id='inner'>subtext</tag>
  54. <nexttag />
  55. <nextsection>
  56. <tag />
  57. </nextsection>
  58. </section>
  59. """
  60. SAMPLE_XML_NS = """
  61. <body xmlns="http://effbot.org/ns">
  62. <tag>text</tag>
  63. <tag />
  64. <section>
  65. <tag>subtext</tag>
  66. </section>
  67. </body>
  68. """
  69. SAMPLE_XML_NS_ELEMS = """
  70. <root>
  71. <h:table xmlns:h="hello">
  72. <h:tr>
  73. <h:td>Apples</h:td>
  74. <h:td>Bananas</h:td>
  75. </h:tr>
  76. </h:table>
  77. <f:table xmlns:f="foo">
  78. <f:name>African Coffee Table</f:name>
  79. <f:width>80</f:width>
  80. <f:length>120</f:length>
  81. </f:table>
  82. </root>
  83. """
  84. ENTITY_XML = """\
  85. <!DOCTYPE points [
  86. <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
  87. %user-entities;
  88. ]>
  89. <document>&entity;</document>
  90. """
  91. EXTERNAL_ENTITY_XML = """\
  92. <!DOCTYPE points [
  93. <!ENTITY entity SYSTEM "file:///non-existing-file.xml">
  94. ]>
  95. <document>&entity;</document>
  96. """
  97. ATTLIST_XML = """\
  98. <?xml version="1.0" encoding="UTF-8"?>
  99. <!DOCTYPE Foo [
  100. <!ELEMENT foo (bar*)>
  101. <!ELEMENT bar (#PCDATA)*>
  102. <!ATTLIST bar xml:lang CDATA "eng">
  103. <!ENTITY qux "quux">
  104. ]>
  105. <foo>
  106. <bar>&qux;</bar>
  107. </foo>
  108. """
  109. def checkwarnings(*filters, quiet=False):
  110. def decorator(test):
  111. def newtest(*args, **kwargs):
  112. with warnings_helper.check_warnings(*filters, quiet=quiet):
  113. test(*args, **kwargs)
  114. functools.update_wrapper(newtest, test)
  115. return newtest
  116. return decorator
  117. def convlinesep(data):
  118. return data.replace(b'\n', os.linesep.encode())
  119. class ModuleTest(unittest.TestCase):
  120. def test_sanity(self):
  121. # Import sanity.
  122. from xml.etree import ElementTree
  123. from xml.etree import ElementInclude
  124. from xml.etree import ElementPath
  125. def test_all(self):
  126. names = ("xml.etree.ElementTree", "_elementtree")
  127. support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",))
  128. def serialize(elem, to_string=True, encoding='unicode', **options):
  129. if encoding != 'unicode':
  130. file = io.BytesIO()
  131. else:
  132. file = io.StringIO()
  133. tree = ET.ElementTree(elem)
  134. tree.write(file, encoding=encoding, **options)
  135. if to_string:
  136. return file.getvalue()
  137. else:
  138. file.seek(0)
  139. return file
  140. def summarize_list(seq):
  141. return [elem.tag for elem in seq]
  142. class ElementTestCase:
  143. @classmethod
  144. def setUpClass(cls):
  145. cls.modules = {pyET, ET}
  146. def pickleRoundTrip(self, obj, name, dumper, loader, proto):
  147. try:
  148. with swap_item(sys.modules, name, dumper):
  149. temp = pickle.dumps(obj, proto)
  150. with swap_item(sys.modules, name, loader):
  151. result = pickle.loads(temp)
  152. except pickle.PicklingError as pe:
  153. # pyET must be second, because pyET may be (equal to) ET.
  154. human = dict([(ET, "cET"), (pyET, "pyET")])
  155. raise support.TestFailed("Failed to round-trip %r from %r to %r"
  156. % (obj,
  157. human.get(dumper, dumper),
  158. human.get(loader, loader))) from pe
  159. return result
  160. def assertEqualElements(self, alice, bob):
  161. self.assertIsInstance(alice, (ET.Element, pyET.Element))
  162. self.assertIsInstance(bob, (ET.Element, pyET.Element))
  163. self.assertEqual(len(list(alice)), len(list(bob)))
  164. for x, y in zip(alice, bob):
  165. self.assertEqualElements(x, y)
  166. properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
  167. self.assertEqual(properties(alice), properties(bob))
  168. # --------------------------------------------------------------------
  169. # element tree tests
  170. class ElementTreeTest(unittest.TestCase):
  171. def serialize_check(self, elem, expected):
  172. self.assertEqual(serialize(elem), expected)
  173. def test_interface(self):
  174. # Test element tree interface.
  175. def check_string(string):
  176. len(string)
  177. for char in string:
  178. self.assertEqual(len(char), 1,
  179. msg="expected one-character string, got %r" % char)
  180. new_string = string + ""
  181. new_string = string + " "
  182. string[:0]
  183. def check_mapping(mapping):
  184. len(mapping)
  185. keys = mapping.keys()
  186. items = mapping.items()
  187. for key in keys:
  188. item = mapping[key]
  189. mapping["key"] = "value"
  190. self.assertEqual(mapping["key"], "value",
  191. msg="expected value string, got %r" % mapping["key"])
  192. def check_element(element):
  193. self.assertTrue(ET.iselement(element), msg="not an element")
  194. direlem = dir(element)
  195. for attr in 'tag', 'attrib', 'text', 'tail':
  196. self.assertTrue(hasattr(element, attr),
  197. msg='no %s member' % attr)
  198. self.assertIn(attr, direlem,
  199. msg='no %s visible by dir' % attr)
  200. check_string(element.tag)
  201. check_mapping(element.attrib)
  202. if element.text is not None:
  203. check_string(element.text)
  204. if element.tail is not None:
  205. check_string(element.tail)
  206. for elem in element:
  207. check_element(elem)
  208. element = ET.Element("tag")
  209. check_element(element)
  210. tree = ET.ElementTree(element)
  211. check_element(tree.getroot())
  212. element = ET.Element("t\xe4g", key="value")
  213. tree = ET.ElementTree(element)
  214. self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
  215. element = ET.Element("tag", key="value")
  216. # Make sure all standard element methods exist.
  217. def check_method(method):
  218. self.assertTrue(hasattr(method, '__call__'),
  219. msg="%s not callable" % method)
  220. check_method(element.append)
  221. check_method(element.extend)
  222. check_method(element.insert)
  223. check_method(element.remove)
  224. check_method(element.find)
  225. check_method(element.iterfind)
  226. check_method(element.findall)
  227. check_method(element.findtext)
  228. check_method(element.clear)
  229. check_method(element.get)
  230. check_method(element.set)
  231. check_method(element.keys)
  232. check_method(element.items)
  233. check_method(element.iter)
  234. check_method(element.itertext)
  235. # These methods return an iterable. See bug 6472.
  236. def check_iter(it):
  237. check_method(it.__next__)
  238. check_iter(element.iterfind("tag"))
  239. check_iter(element.iterfind("*"))
  240. check_iter(tree.iterfind("tag"))
  241. check_iter(tree.iterfind("*"))
  242. # These aliases are provided:
  243. self.assertEqual(ET.XML, ET.fromstring)
  244. self.assertEqual(ET.PI, ET.ProcessingInstruction)
  245. def test_set_attribute(self):
  246. element = ET.Element('tag')
  247. self.assertEqual(element.tag, 'tag')
  248. element.tag = 'Tag'
  249. self.assertEqual(element.tag, 'Tag')
  250. element.tag = 'TAG'
  251. self.assertEqual(element.tag, 'TAG')
  252. self.assertIsNone(element.text)
  253. element.text = 'Text'
  254. self.assertEqual(element.text, 'Text')
  255. element.text = 'TEXT'
  256. self.assertEqual(element.text, 'TEXT')
  257. self.assertIsNone(element.tail)
  258. element.tail = 'Tail'
  259. self.assertEqual(element.tail, 'Tail')
  260. element.tail = 'TAIL'
  261. self.assertEqual(element.tail, 'TAIL')
  262. self.assertEqual(element.attrib, {})
  263. element.attrib = {'a': 'b', 'c': 'd'}
  264. self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
  265. element.attrib = {'A': 'B', 'C': 'D'}
  266. self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
  267. def test_simpleops(self):
  268. # Basic method sanity checks.
  269. elem = ET.XML("<body><tag/></body>")
  270. self.serialize_check(elem, '<body><tag /></body>')
  271. e = ET.Element("tag2")
  272. elem.append(e)
  273. self.serialize_check(elem, '<body><tag /><tag2 /></body>')
  274. elem.remove(e)
  275. self.serialize_check(elem, '<body><tag /></body>')
  276. elem.insert(0, e)
  277. self.serialize_check(elem, '<body><tag2 /><tag /></body>')
  278. elem.remove(e)
  279. elem.extend([e])
  280. self.serialize_check(elem, '<body><tag /><tag2 /></body>')
  281. elem.remove(e)
  282. elem.extend(iter([e]))
  283. self.serialize_check(elem, '<body><tag /><tag2 /></body>')
  284. elem.remove(e)
  285. element = ET.Element("tag", key="value")
  286. self.serialize_check(element, '<tag key="value" />') # 1
  287. subelement = ET.Element("subtag")
  288. element.append(subelement)
  289. self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
  290. element.insert(0, subelement)
  291. self.serialize_check(element,
  292. '<tag key="value"><subtag /><subtag /></tag>') # 3
  293. element.remove(subelement)
  294. self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
  295. element.remove(subelement)
  296. self.serialize_check(element, '<tag key="value" />') # 5
  297. with self.assertRaises(ValueError) as cm:
  298. element.remove(subelement)
  299. self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
  300. self.serialize_check(element, '<tag key="value" />') # 6
  301. element[0:0] = [subelement, subelement, subelement]
  302. self.serialize_check(element[1], '<subtag />')
  303. self.assertEqual(element[1:9], [element[1], element[2]])
  304. self.assertEqual(element[:9:2], [element[0], element[2]])
  305. del element[1:2]
  306. self.serialize_check(element,
  307. '<tag key="value"><subtag /><subtag /></tag>')
  308. def test_cdata(self):
  309. # Test CDATA handling (etc).
  310. self.serialize_check(ET.XML("<tag>hello</tag>"),
  311. '<tag>hello</tag>')
  312. self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
  313. '<tag>hello</tag>')
  314. self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
  315. '<tag>hello</tag>')
  316. def test_file_init(self):
  317. stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
  318. tree = ET.ElementTree(file=stringfile)
  319. self.assertEqual(tree.find("tag").tag, 'tag')
  320. self.assertEqual(tree.find("section/tag").tag, 'tag')
  321. tree = ET.ElementTree(file=SIMPLE_XMLFILE)
  322. self.assertEqual(tree.find("element").tag, 'element')
  323. self.assertEqual(tree.find("element/../empty-element").tag,
  324. 'empty-element')
  325. def test_path_cache(self):
  326. # Check that the path cache behaves sanely.
  327. from xml.etree import ElementPath
  328. elem = ET.XML(SAMPLE_XML)
  329. for i in range(10): ET.ElementTree(elem).find('./'+str(i))
  330. cache_len_10 = len(ElementPath._cache)
  331. for i in range(10): ET.ElementTree(elem).find('./'+str(i))
  332. self.assertEqual(len(ElementPath._cache), cache_len_10)
  333. for i in range(20): ET.ElementTree(elem).find('./'+str(i))
  334. self.assertGreater(len(ElementPath._cache), cache_len_10)
  335. for i in range(600): ET.ElementTree(elem).find('./'+str(i))
  336. self.assertLess(len(ElementPath._cache), 500)
  337. def test_copy(self):
  338. # Test copy handling (etc).
  339. import copy
  340. e1 = ET.XML("<tag>hello<foo/></tag>")
  341. e2 = copy.copy(e1)
  342. e3 = copy.deepcopy(e1)
  343. e1.find("foo").tag = "bar"
  344. self.serialize_check(e1, '<tag>hello<bar /></tag>')
  345. self.serialize_check(e2, '<tag>hello<bar /></tag>')
  346. self.serialize_check(e3, '<tag>hello<foo /></tag>')
  347. def test_attrib(self):
  348. # Test attribute handling.
  349. elem = ET.Element("tag")
  350. elem.get("key") # 1.1
  351. self.assertEqual(elem.get("key", "default"), 'default') # 1.2
  352. elem.set("key", "value")
  353. self.assertEqual(elem.get("key"), 'value') # 1.3
  354. elem = ET.Element("tag", key="value")
  355. self.assertEqual(elem.get("key"), 'value') # 2.1
  356. self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
  357. attrib = {"key": "value"}
  358. elem = ET.Element("tag", attrib)
  359. attrib.clear() # check for aliasing issues
  360. self.assertEqual(elem.get("key"), 'value') # 3.1
  361. self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
  362. attrib = {"key": "value"}
  363. elem = ET.Element("tag", **attrib)
  364. attrib.clear() # check for aliasing issues
  365. self.assertEqual(elem.get("key"), 'value') # 4.1
  366. self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
  367. elem = ET.Element("tag", {"key": "other"}, key="value")
  368. self.assertEqual(elem.get("key"), 'value') # 5.1
  369. self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
  370. elem = ET.Element('test')
  371. elem.text = "aa"
  372. elem.set('testa', 'testval')
  373. elem.set('testb', 'test2')
  374. self.assertEqual(ET.tostring(elem),
  375. b'<test testa="testval" testb="test2">aa</test>')
  376. self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
  377. self.assertEqual(sorted(elem.items()),
  378. [('testa', 'testval'), ('testb', 'test2')])
  379. self.assertEqual(elem.attrib['testb'], 'test2')
  380. elem.attrib['testb'] = 'test1'
  381. elem.attrib['testc'] = 'test2'
  382. self.assertEqual(ET.tostring(elem),
  383. b'<test testa="testval" testb="test1" testc="test2">aa</test>')
  384. # Test preserving white space chars in attributes
  385. elem = ET.Element('test')
  386. elem.set('a', '\r')
  387. elem.set('b', '\r\n')
  388. elem.set('c', '\t\n\r ')
  389. elem.set('d', '\n\n\r\r\t\t ')
  390. self.assertEqual(ET.tostring(elem),
  391. b'<test a="&#13;" b="&#13;&#10;" c="&#09;&#10;&#13; " d="&#10;&#10;&#13;&#13;&#09;&#09; " />')
  392. def test_makeelement(self):
  393. # Test makeelement handling.
  394. elem = ET.Element("tag")
  395. attrib = {"key": "value"}
  396. subelem = elem.makeelement("subtag", attrib)
  397. self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
  398. elem.append(subelem)
  399. self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
  400. elem.clear()
  401. self.serialize_check(elem, '<tag />')
  402. elem.append(subelem)
  403. self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
  404. elem.extend([subelem, subelem])
  405. self.serialize_check(elem,
  406. '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
  407. elem[:] = [subelem]
  408. self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
  409. elem[:] = tuple([subelem])
  410. self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
  411. def test_parsefile(self):
  412. # Test parsing from file.
  413. tree = ET.parse(SIMPLE_XMLFILE)
  414. stream = io.StringIO()
  415. tree.write(stream, encoding='unicode')
  416. self.assertEqual(stream.getvalue(),
  417. '<root>\n'
  418. ' <element key="value">text</element>\n'
  419. ' <element>text</element>tail\n'
  420. ' <empty-element />\n'
  421. '</root>')
  422. tree = ET.parse(SIMPLE_NS_XMLFILE)
  423. stream = io.StringIO()
  424. tree.write(stream, encoding='unicode')
  425. self.assertEqual(stream.getvalue(),
  426. '<ns0:root xmlns:ns0="namespace">\n'
  427. ' <ns0:element key="value">text</ns0:element>\n'
  428. ' <ns0:element>text</ns0:element>tail\n'
  429. ' <ns0:empty-element />\n'
  430. '</ns0:root>')
  431. with open(SIMPLE_XMLFILE) as f:
  432. data = f.read()
  433. parser = ET.XMLParser()
  434. self.assertRegex(parser.version, r'^Expat ')
  435. parser.feed(data)
  436. self.serialize_check(parser.close(),
  437. '<root>\n'
  438. ' <element key="value">text</element>\n'
  439. ' <element>text</element>tail\n'
  440. ' <empty-element />\n'
  441. '</root>')
  442. target = ET.TreeBuilder()
  443. parser = ET.XMLParser(target=target)
  444. parser.feed(data)
  445. self.serialize_check(parser.close(),
  446. '<root>\n'
  447. ' <element key="value">text</element>\n'
  448. ' <element>text</element>tail\n'
  449. ' <empty-element />\n'
  450. '</root>')
  451. def test_parseliteral(self):
  452. element = ET.XML("<html><body>text</body></html>")
  453. self.assertEqual(ET.tostring(element, encoding='unicode'),
  454. '<html><body>text</body></html>')
  455. element = ET.fromstring("<html><body>text</body></html>")
  456. self.assertEqual(ET.tostring(element, encoding='unicode'),
  457. '<html><body>text</body></html>')
  458. sequence = ["<html><body>", "text</bo", "dy></html>"]
  459. element = ET.fromstringlist(sequence)
  460. self.assertEqual(ET.tostring(element),
  461. b'<html><body>text</body></html>')
  462. self.assertEqual(b"".join(ET.tostringlist(element)),
  463. b'<html><body>text</body></html>')
  464. self.assertEqual(ET.tostring(element, "ascii"),
  465. b"<?xml version='1.0' encoding='ascii'?>\n"
  466. b"<html><body>text</body></html>")
  467. _, ids = ET.XMLID("<html><body>text</body></html>")
  468. self.assertEqual(len(ids), 0)
  469. _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
  470. self.assertEqual(len(ids), 1)
  471. self.assertEqual(ids["body"].tag, 'body')
  472. def test_iterparse(self):
  473. # Test iterparse interface.
  474. iterparse = ET.iterparse
  475. context = iterparse(SIMPLE_XMLFILE)
  476. action, elem = next(context)
  477. self.assertEqual((action, elem.tag), ('end', 'element'))
  478. self.assertEqual([(action, elem.tag) for action, elem in context], [
  479. ('end', 'element'),
  480. ('end', 'empty-element'),
  481. ('end', 'root'),
  482. ])
  483. self.assertEqual(context.root.tag, 'root')
  484. context = iterparse(SIMPLE_NS_XMLFILE)
  485. self.assertEqual([(action, elem.tag) for action, elem in context], [
  486. ('end', '{namespace}element'),
  487. ('end', '{namespace}element'),
  488. ('end', '{namespace}empty-element'),
  489. ('end', '{namespace}root'),
  490. ])
  491. events = ()
  492. context = iterparse(SIMPLE_XMLFILE, events)
  493. self.assertEqual([(action, elem.tag) for action, elem in context], [])
  494. events = ()
  495. context = iterparse(SIMPLE_XMLFILE, events=events)
  496. self.assertEqual([(action, elem.tag) for action, elem in context], [])
  497. events = ("start", "end")
  498. context = iterparse(SIMPLE_XMLFILE, events)
  499. self.assertEqual([(action, elem.tag) for action, elem in context], [
  500. ('start', 'root'),
  501. ('start', 'element'),
  502. ('end', 'element'),
  503. ('start', 'element'),
  504. ('end', 'element'),
  505. ('start', 'empty-element'),
  506. ('end', 'empty-element'),
  507. ('end', 'root'),
  508. ])
  509. events = ("start", "end", "start-ns", "end-ns")
  510. context = iterparse(SIMPLE_NS_XMLFILE, events)
  511. self.assertEqual([(action, elem.tag) if action in ("start", "end")
  512. else (action, elem)
  513. for action, elem in context], [
  514. ('start-ns', ('', 'namespace')),
  515. ('start', '{namespace}root'),
  516. ('start', '{namespace}element'),
  517. ('end', '{namespace}element'),
  518. ('start', '{namespace}element'),
  519. ('end', '{namespace}element'),
  520. ('start', '{namespace}empty-element'),
  521. ('end', '{namespace}empty-element'),
  522. ('end', '{namespace}root'),
  523. ('end-ns', None),
  524. ])
  525. events = ('start-ns', 'end-ns')
  526. context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
  527. res = [action for action, elem in context]
  528. self.assertEqual(res, ['start-ns', 'end-ns'])
  529. events = ("start", "end", "bogus")
  530. with open(SIMPLE_XMLFILE, "rb") as f:
  531. with self.assertRaises(ValueError) as cm:
  532. iterparse(f, events)
  533. self.assertFalse(f.closed)
  534. self.assertEqual(str(cm.exception), "unknown event 'bogus'")
  535. with warnings_helper.check_no_resource_warning(self):
  536. with self.assertRaises(ValueError) as cm:
  537. iterparse(SIMPLE_XMLFILE, events)
  538. self.assertEqual(str(cm.exception), "unknown event 'bogus'")
  539. del cm
  540. source = io.BytesIO(
  541. b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
  542. b"<body xmlns='http://&#233;ffbot.org/ns'\n"
  543. b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
  544. events = ("start-ns",)
  545. context = iterparse(source, events)
  546. self.assertEqual([(action, elem) for action, elem in context], [
  547. ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
  548. ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
  549. ])
  550. source = io.StringIO("<document />junk")
  551. it = iterparse(source)
  552. action, elem = next(it)
  553. self.assertEqual((action, elem.tag), ('end', 'document'))
  554. with self.assertRaises(ET.ParseError) as cm:
  555. next(it)
  556. self.assertEqual(str(cm.exception),
  557. 'junk after document element: line 1, column 12')
  558. self.addCleanup(os_helper.unlink, TESTFN)
  559. with open(TESTFN, "wb") as f:
  560. f.write(b"<document />junk")
  561. it = iterparse(TESTFN)
  562. action, elem = next(it)
  563. self.assertEqual((action, elem.tag), ('end', 'document'))
  564. with warnings_helper.check_no_resource_warning(self):
  565. with self.assertRaises(ET.ParseError) as cm:
  566. next(it)
  567. self.assertEqual(str(cm.exception),
  568. 'junk after document element: line 1, column 12')
  569. del cm, it
  570. # Not exhausting the iterator still closes the resource (bpo-43292)
  571. with warnings_helper.check_no_resource_warning(self):
  572. it = iterparse(TESTFN)
  573. del it
  574. with self.assertRaises(FileNotFoundError):
  575. iterparse("nonexistent")
  576. def test_writefile(self):
  577. elem = ET.Element("tag")
  578. elem.text = "text"
  579. self.serialize_check(elem, '<tag>text</tag>')
  580. ET.SubElement(elem, "subtag").text = "subtext"
  581. self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
  582. # Test tag suppression
  583. elem.tag = None
  584. self.serialize_check(elem, 'text<subtag>subtext</subtag>')
  585. elem.insert(0, ET.Comment("comment"))
  586. self.serialize_check(elem,
  587. 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3
  588. elem[0] = ET.PI("key", "value")
  589. self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
  590. def test_custom_builder(self):
  591. # Test parser w. custom builder.
  592. with open(SIMPLE_XMLFILE) as f:
  593. data = f.read()
  594. class Builder(list):
  595. def start(self, tag, attrib):
  596. self.append(("start", tag))
  597. def end(self, tag):
  598. self.append(("end", tag))
  599. def data(self, text):
  600. pass
  601. builder = Builder()
  602. parser = ET.XMLParser(target=builder)
  603. parser.feed(data)
  604. self.assertEqual(builder, [
  605. ('start', 'root'),
  606. ('start', 'element'),
  607. ('end', 'element'),
  608. ('start', 'element'),
  609. ('end', 'element'),
  610. ('start', 'empty-element'),
  611. ('end', 'empty-element'),
  612. ('end', 'root'),
  613. ])
  614. with open(SIMPLE_NS_XMLFILE) as f:
  615. data = f.read()
  616. class Builder(list):
  617. def start(self, tag, attrib):
  618. self.append(("start", tag))
  619. def end(self, tag):
  620. self.append(("end", tag))
  621. def data(self, text):
  622. pass
  623. def pi(self, target, data):
  624. self.append(("pi", target, data))
  625. def comment(self, data):
  626. self.append(("comment", data))
  627. def start_ns(self, prefix, uri):
  628. self.append(("start-ns", prefix, uri))
  629. def end_ns(self, prefix):
  630. self.append(("end-ns", prefix))
  631. builder = Builder()
  632. parser = ET.XMLParser(target=builder)
  633. parser.feed(data)
  634. self.assertEqual(builder, [
  635. ('pi', 'pi', 'data'),
  636. ('comment', ' comment '),
  637. ('start-ns', '', 'namespace'),
  638. ('start', '{namespace}root'),
  639. ('start', '{namespace}element'),
  640. ('end', '{namespace}element'),
  641. ('start', '{namespace}element'),
  642. ('end', '{namespace}element'),
  643. ('start', '{namespace}empty-element'),
  644. ('end', '{namespace}empty-element'),
  645. ('end', '{namespace}root'),
  646. ('end-ns', ''),
  647. ])
  648. def test_custom_builder_only_end_ns(self):
  649. class Builder(list):
  650. def end_ns(self, prefix):
  651. self.append(("end-ns", prefix))
  652. builder = Builder()
  653. parser = ET.XMLParser(target=builder)
  654. parser.feed(textwrap.dedent("""\
  655. <?pi data?>
  656. <!-- comment -->
  657. <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
  658. <a:element key='value'>text</a:element>
  659. <p:element>text</p:element>tail
  660. <empty-element/>
  661. </root>
  662. """))
  663. self.assertEqual(builder, [
  664. ('end-ns', 'a'),
  665. ('end-ns', 'p'),
  666. ('end-ns', ''),
  667. ])
  668. def test_initialize_parser_without_target(self):
  669. # Explicit None
  670. parser = ET.XMLParser(target=None)
  671. self.assertIsInstance(parser.target, ET.TreeBuilder)
  672. # Implicit None
  673. parser2 = ET.XMLParser()
  674. self.assertIsInstance(parser2.target, ET.TreeBuilder)
  675. def test_children(self):
  676. # Test Element children iteration
  677. with open(SIMPLE_XMLFILE, "rb") as f:
  678. tree = ET.parse(f)
  679. self.assertEqual([summarize_list(elem)
  680. for elem in tree.getroot().iter()], [
  681. ['element', 'element', 'empty-element'],
  682. [],
  683. [],
  684. [],
  685. ])
  686. self.assertEqual([summarize_list(elem)
  687. for elem in tree.iter()], [
  688. ['element', 'element', 'empty-element'],
  689. [],
  690. [],
  691. [],
  692. ])
  693. elem = ET.XML(SAMPLE_XML)
  694. self.assertEqual(len(list(elem)), 3)
  695. self.assertEqual(len(list(elem[2])), 1)
  696. self.assertEqual(elem[:], list(elem))
  697. child1 = elem[0]
  698. child2 = elem[2]
  699. del elem[1:2]
  700. self.assertEqual(len(list(elem)), 2)
  701. self.assertEqual(child1, elem[0])
  702. self.assertEqual(child2, elem[1])
  703. elem[0:2] = [child2, child1]
  704. self.assertEqual(child2, elem[0])
  705. self.assertEqual(child1, elem[1])
  706. self.assertNotEqual(child1, elem[0])
  707. elem.clear()
  708. self.assertEqual(list(elem), [])
  709. def test_writestring(self):
  710. elem = ET.XML("<html><body>text</body></html>")
  711. self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
  712. elem = ET.fromstring("<html><body>text</body></html>")
  713. self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
  714. def test_indent(self):
  715. elem = ET.XML("<root></root>")
  716. ET.indent(elem)
  717. self.assertEqual(ET.tostring(elem), b'<root />')
  718. elem = ET.XML("<html><body>text</body></html>")
  719. ET.indent(elem)
  720. self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
  721. elem = ET.XML("<html> <body>text</body> </html>")
  722. ET.indent(elem)
  723. self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
  724. elem = ET.XML("<html><body>text</body>tail</html>")
  725. ET.indent(elem)
  726. self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
  727. elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
  728. ET.indent(elem)
  729. self.assertEqual(
  730. ET.tostring(elem),
  731. b'<html>\n'
  732. b' <body>\n'
  733. b' <p>par</p>\n'
  734. b' <p>text</p>\n'
  735. b' <p>\n'
  736. b' <br />\n'
  737. b' </p>\n'
  738. b' </body>\n'
  739. b'</html>'
  740. )
  741. elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
  742. ET.indent(elem)
  743. self.assertEqual(
  744. ET.tostring(elem),
  745. b'<html>\n'
  746. b' <body>\n'
  747. b' <p>pre<br />post</p>\n'
  748. b' <p>text</p>\n'
  749. b' </body>\n'
  750. b'</html>'
  751. )
  752. def test_indent_space(self):
  753. elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
  754. ET.indent(elem, space='\t')
  755. self.assertEqual(
  756. ET.tostring(elem),
  757. b'<html>\n'
  758. b'\t<body>\n'
  759. b'\t\t<p>pre<br />post</p>\n'
  760. b'\t\t<p>text</p>\n'
  761. b'\t</body>\n'
  762. b'</html>'
  763. )
  764. elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
  765. ET.indent(elem, space='')
  766. self.assertEqual(
  767. ET.tostring(elem),
  768. b'<html>\n'
  769. b'<body>\n'
  770. b'<p>pre<br />post</p>\n'
  771. b'<p>text</p>\n'
  772. b'</body>\n'
  773. b'</html>'
  774. )
  775. def test_indent_space_caching(self):
  776. elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
  777. ET.indent(elem)
  778. self.assertEqual(
  779. {el.tail for el in elem.iter()},
  780. {None, "\n", "\n ", "\n "}
  781. )
  782. self.assertEqual(
  783. {el.text for el in elem.iter()},
  784. {None, "\n ", "\n ", "\n ", "par", "text"}
  785. )
  786. self.assertEqual(
  787. len({el.tail for el in elem.iter()}),
  788. len({id(el.tail) for el in elem.iter()}),
  789. )
  790. def test_indent_level(self):
  791. elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
  792. with self.assertRaises(ValueError):
  793. ET.indent(elem, level=-1)
  794. self.assertEqual(
  795. ET.tostring(elem),
  796. b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
  797. )
  798. ET.indent(elem, level=2)
  799. self.assertEqual(
  800. ET.tostring(elem),
  801. b'<html>\n'
  802. b' <body>\n'
  803. b' <p>pre<br />post</p>\n'
  804. b' <p>text</p>\n'
  805. b' </body>\n'
  806. b' </html>'
  807. )
  808. elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
  809. ET.indent(elem, level=1, space=' ')
  810. self.assertEqual(
  811. ET.tostring(elem),
  812. b'<html>\n'
  813. b' <body>\n'
  814. b' <p>pre<br />post</p>\n'
  815. b' <p>text</p>\n'
  816. b' </body>\n'
  817. b' </html>'
  818. )
  819. def test_tostring_default_namespace(self):
  820. elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
  821. self.assertEqual(
  822. ET.tostring(elem, encoding='unicode'),
  823. '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
  824. )
  825. self.assertEqual(
  826. ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
  827. '<body xmlns="http://effbot.org/ns"><tag /></body>'
  828. )
  829. def test_tostring_default_namespace_different_namespace(self):
  830. elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
  831. self.assertEqual(
  832. ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
  833. '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
  834. )
  835. def test_tostring_default_namespace_original_no_namespace(self):
  836. elem = ET.XML('<body><tag/></body>')
  837. EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
  838. with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
  839. ET.tostring(elem, encoding='unicode', default_namespace='foobar')
  840. def test_tostring_no_xml_declaration(self):
  841. elem = ET.XML('<body><tag/></body>')
  842. self.assertEqual(
  843. ET.tostring(elem, encoding='unicode'),
  844. '<body><tag /></body>'
  845. )
  846. def test_tostring_xml_declaration(self):
  847. elem = ET.XML('<body><tag/></body>')
  848. self.assertEqual(
  849. ET.tostring(elem, encoding='utf8', xml_declaration=True),
  850. b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
  851. )
  852. def test_tostring_xml_declaration_unicode_encoding(self):
  853. elem = ET.XML('<body><tag/></body>')
  854. self.assertEqual(
  855. ET.tostring(elem, encoding='unicode', xml_declaration=True),
  856. "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
  857. )
  858. def test_tostring_xml_declaration_cases(self):
  859. elem = ET.XML('<body><tag>ø</tag></body>')
  860. TESTCASES = [
  861. # (expected_retval, encoding, xml_declaration)
  862. # ... xml_declaration = None
  863. (b'<body><tag>&#248;</tag></body>', None, None),
  864. (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
  865. (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
  866. (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
  867. b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
  868. ('<body><tag>ø</tag></body>', 'unicode', None),
  869. # ... xml_declaration = False
  870. (b"<body><tag>&#248;</tag></body>", None, False),
  871. (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
  872. (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
  873. (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
  874. ("<body><tag>ø</tag></body>", 'unicode', False),
  875. # ... xml_declaration = True
  876. (b"<?xml version='1.0' encoding='us-ascii'?>\n"
  877. b"<body><tag>&#248;</tag></body>", None, True),
  878. (b"<?xml version='1.0' encoding='UTF-8'?>\n"
  879. b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
  880. (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
  881. b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
  882. (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
  883. b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
  884. ("<?xml version='1.0' encoding='utf-8'?>\n"
  885. "<body><tag>ø</tag></body>", 'unicode', True),
  886. ]
  887. for expected_retval, encoding, xml_declaration in TESTCASES:
  888. with self.subTest(f'encoding={encoding} '
  889. f'xml_declaration={xml_declaration}'):
  890. self.assertEqual(
  891. ET.tostring(
  892. elem,
  893. encoding=encoding,
  894. xml_declaration=xml_declaration
  895. ),
  896. expected_retval
  897. )
  898. def test_tostringlist_default_namespace(self):
  899. elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
  900. self.assertEqual(
  901. ''.join(ET.tostringlist(elem, encoding='unicode')),
  902. '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
  903. )
  904. self.assertEqual(
  905. ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
  906. '<body xmlns="http://effbot.org/ns"><tag /></body>'
  907. )
  908. def test_tostringlist_xml_declaration(self):
  909. elem = ET.XML('<body><tag/></body>')
  910. self.assertEqual(
  911. ''.join(ET.tostringlist(elem, encoding='unicode')),
  912. '<body><tag /></body>'
  913. )
  914. self.assertEqual(
  915. b''.join(ET.tostringlist(elem, xml_declaration=True)),
  916. b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
  917. )
  918. stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
  919. self.assertEqual(
  920. ''.join(stringlist),
  921. "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
  922. )
  923. self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
  924. self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
  925. def test_encoding(self):
  926. def check(encoding, body=''):
  927. xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
  928. (encoding, body))
  929. self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
  930. self.assertEqual(ET.XML(xml).text, body)
  931. check("ascii", 'a')
  932. check("us-ascii", 'a')
  933. check("iso-8859-1", '\xbd')
  934. check("iso-8859-15", '\u20ac')
  935. check("cp437", '\u221a')
  936. check("mac-roman", '\u02da')
  937. def xml(encoding):
  938. return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
  939. def bxml(encoding):
  940. return xml(encoding).encode(encoding)
  941. supported_encodings = [
  942. 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
  943. 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
  944. 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
  945. 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
  946. 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
  947. 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
  948. 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
  949. 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
  950. 'cp1256', 'cp1257', 'cp1258',
  951. 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
  952. 'mac-roman', 'mac-turkish',
  953. 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
  954. 'iso2022-jp-3', 'iso2022-jp-ext',
  955. 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
  956. 'hz', 'ptcp154',
  957. ]
  958. for encoding in supported_encodings:
  959. self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
  960. unsupported_ascii_compatible_encodings = [
  961. 'big5', 'big5hkscs',
  962. 'cp932', 'cp949', 'cp950',
  963. 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
  964. 'gb2312', 'gbk', 'gb18030',
  965. 'iso2022-kr', 'johab',
  966. 'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
  967. 'utf-7',
  968. ]
  969. for encoding in unsupported_ascii_compatible_encodings:
  970. self.assertRaises(ValueError, ET.XML, bxml(encoding))
  971. unsupported_ascii_incompatible_encodings = [
  972. 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
  973. 'utf_32', 'utf_32_be', 'utf_32_le',
  974. ]
  975. for encoding in unsupported_ascii_incompatible_encodings:
  976. self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
  977. self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
  978. self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
  979. def test_methods(self):
  980. # Test serialization methods.
  981. e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
  982. e.tail = "\n"
  983. self.assertEqual(serialize(e),
  984. '<html><link /><script>1 &lt; 2</script></html>\n')
  985. self.assertEqual(serialize(e, method=None),
  986. '<html><link /><script>1 &lt; 2</script></html>\n')
  987. self.assertEqual(serialize(e, method="xml"),
  988. '<html><link /><script>1 &lt; 2</script></html>\n')
  989. self.assertEqual(serialize(e, method="html"),
  990. '<html><link><script>1 < 2</script></html>\n')
  991. self.assertEqual(serialize(e, method="text"), '1 < 2\n')
  992. def test_issue18347(self):
  993. e = ET.XML('<html><CamelCase>text</CamelCase></html>')
  994. self.assertEqual(serialize(e),
  995. '<html><CamelCase>text</CamelCase></html>')
  996. self.assertEqual(serialize(e, method="html"),
  997. '<html><CamelCase>text</CamelCase></html>')
  998. def test_entity(self):
  999. # Test entity handling.
  1000. # 1) good entities
  1001. e = ET.XML("<document title='&#x8230;'>test</document>")
  1002. self.assertEqual(serialize(e, encoding="us-ascii"),
  1003. b'<document title="&#33328;">test</document>')
  1004. self.serialize_check(e, '<document title="\u8230">test</document>')
  1005. # 2) bad entities
  1006. with self.assertRaises(ET.ParseError) as cm:
  1007. ET.XML("<document>&entity;</document>")
  1008. self.assertEqual(str(cm.exception),
  1009. 'undefined entity: line 1, column 10')
  1010. with self.assertRaises(ET.ParseError) as cm:
  1011. ET.XML(ENTITY_XML)
  1012. self.assertEqual(str(cm.exception),
  1013. 'undefined entity &entity;: line 5, column 10')
  1014. # 3) custom entity
  1015. parser = ET.XMLParser()
  1016. parser.entity["entity"] = "text"
  1017. parser.feed(ENTITY_XML)
  1018. root = parser.close()
  1019. self.serialize_check(root, '<document>text</document>')
  1020. # 4) external (SYSTEM) entity
  1021. with self.assertRaises(ET.ParseError) as cm:
  1022. ET.XML(EXTERNAL_ENTITY_XML)
  1023. self.assertEqual(str(cm.exception),
  1024. 'undefined entity &entity;: line 4, column 10')
  1025. def test_namespace(self):
  1026. # Test namespace issues.
  1027. # 1) xml namespace
  1028. elem = ET.XML("<tag xml:lang='en' />")
  1029. self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
  1030. # 2) other "well-known" namespaces
  1031. elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
  1032. self.serialize_check(elem,
  1033. '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
  1034. elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
  1035. self.serialize_check(elem,
  1036. '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
  1037. elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
  1038. self.serialize_check(elem,
  1039. '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
  1040. # 3) unknown namespaces
  1041. elem = ET.XML(SAMPLE_XML_NS)
  1042. self.serialize_check(elem,
  1043. '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
  1044. ' <ns0:tag>text</ns0:tag>\n'
  1045. ' <ns0:tag />\n'
  1046. ' <ns0:section>\n'
  1047. ' <ns0:tag>subtext</ns0:tag>\n'
  1048. ' </ns0:section>\n'
  1049. '</ns0:body>')
  1050. def test_qname(self):
  1051. # Test QName handling.
  1052. # 1) decorated tags
  1053. elem = ET.Element("{uri}tag")
  1054. self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
  1055. elem = ET.Element(ET.QName("{uri}tag"))
  1056. self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
  1057. elem = ET.Element(ET.QName("uri", "tag"))
  1058. self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
  1059. elem = ET.Element(ET.QName("uri", "tag"))
  1060. subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
  1061. subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
  1062. self.serialize_check(elem,
  1063. '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
  1064. # 2) decorated attributes
  1065. elem.clear()
  1066. elem.attrib["{uri}key"] = "value"
  1067. self.serialize_check(elem,
  1068. '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
  1069. elem.clear()
  1070. elem.attrib[ET.QName("{uri}key")] = "value"
  1071. self.serialize_check(elem,
  1072. '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
  1073. # 3) decorated values are not converted by default, but the
  1074. # QName wrapper can be used for values
  1075. elem.clear()
  1076. elem.attrib["{uri}key"] = "{uri}value"
  1077. self.serialize_check(elem,
  1078. '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
  1079. elem.clear()
  1080. elem.attrib["{uri}key"] = ET.QName("{uri}value")
  1081. self.serialize_check(elem,
  1082. '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
  1083. elem.clear()
  1084. subelem = ET.Element("tag")
  1085. subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
  1086. elem.append(subelem)
  1087. elem.append(subelem)
  1088. self.serialize_check(elem,
  1089. '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
  1090. '<tag ns1:key="ns2:value" />'
  1091. '<tag ns1:key="ns2:value" />'
  1092. '</ns0:tag>') # 3.3
  1093. # 4) Direct QName tests
  1094. self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
  1095. self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
  1096. q1 = ET.QName('ns', 'tag')
  1097. q2 = ET.QName('ns', 'tag')
  1098. self.assertEqual(q1, q2)
  1099. q2 = ET.QName('ns', 'other-tag')
  1100. self.assertNotEqual(q1, q2)
  1101. self.assertNotEqual(q1, 'ns:tag')
  1102. self.assertEqual(q1, '{ns}tag')
  1103. def test_doctype_public(self):
  1104. # Test PUBLIC doctype.
  1105. elem = ET.XML('<!DOCTYPE html PUBLIC'
  1106. ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
  1107. ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
  1108. '<html>text</html>')
  1109. def test_xpath_tokenizer(self):
  1110. # Test the XPath tokenizer.
  1111. from xml.etree import ElementPath
  1112. def check(p, expected, namespaces=None):
  1113. self.assertEqual([op or tag
  1114. for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
  1115. expected)
  1116. # tests from the xml specification
  1117. check("*", ['*'])
  1118. check("text()", ['text', '()'])
  1119. check("@name", ['@', 'name'])
  1120. check("@*", ['@', '*'])
  1121. check("para[1]", ['para', '[', '1', ']'])
  1122. check("para[last()]", ['para', '[', 'last', '()', ']'])
  1123. check("*/para", ['*', '/', 'para'])
  1124. check("/doc/chapter[5]/section[2]",
  1125. ['/', 'doc', '/', 'chapter', '[', '5', ']',
  1126. '/', 'section', '[', '2', ']'])
  1127. check("chapter//para", ['chapter', '//', 'para'])
  1128. check("//para", ['//', 'para'])
  1129. check("//olist/item", ['//', 'olist', '/', 'item'])
  1130. check(".", ['.'])
  1131. check(".//para", ['.', '//', 'para'])
  1132. check("..", ['..'])
  1133. check("../@lang", ['..', '/', '@', 'lang'])
  1134. check("chapter[title]", ['chapter', '[', 'title', ']'])
  1135. check("employee[@secretary and @assistant]", ['employee',
  1136. '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
  1137. # additional tests
  1138. check("@{ns}attr", ['@', '{ns}attr'])
  1139. check("{http://spam}egg", ['{http://spam}egg'])
  1140. check("./spam.egg", ['.', '/', 'spam.egg'])
  1141. check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
  1142. # wildcard tags
  1143. check("{ns}*", ['{ns}*'])
  1144. check("{}*", ['{}*'])
  1145. check("{*}tag", ['{*}tag'])
  1146. check("{*}*", ['{*}*'])
  1147. check(".//{*}tag", ['.', '//', '{*}tag'])
  1148. # namespace prefix resolution
  1149. check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
  1150. {'xsd': 'http://www.w3.org/2001/XMLSchema'})
  1151. check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
  1152. {'': 'http://www.w3.org/2001/XMLSchema'})
  1153. check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
  1154. {'xsd': 'http://www.w3.org/2001/XMLSchema'})
  1155. check("@type", ['@', 'type'],
  1156. {'': 'http://www.w3.org/2001/XMLSchema'})
  1157. check("@{*}type", ['@', '{*}type'],
  1158. {'': 'http://www.w3.org/2001/XMLSchema'})
  1159. check("@{ns}attr", ['@', '{ns}attr'],
  1160. {'': 'http://www.w3.org/2001/XMLSchema',
  1161. 'ns': 'http://www.w3.org/2001/XMLSchema'})
  1162. def test_processinginstruction(self):
  1163. # Test ProcessingInstruction directly
  1164. self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
  1165. b'<?test instruction?>')
  1166. self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
  1167. b'<?test instruction?>')
  1168. # Issue #2746
  1169. self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
  1170. b'<?test <testing&>?>')
  1171. self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
  1172. b"<?xml version='1.0' encoding='latin-1'?>\n"
  1173. b"<?test <testing&>\xe3?>")
  1174. def test_html_empty_elems_serialization(self):
  1175. # issue 15970
  1176. # from http://www.w3.org/TR/html401/index/elements.html
  1177. for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME',
  1178. 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM',
  1179. 'SOURCE', 'TRACK', 'WBR']:
  1180. for elem in [element, element.lower()]:
  1181. expected = '<%s>' % elem
  1182. serialized = serialize(ET.XML('<%s />' % elem), method='html')
  1183. self.assertEqual(serialized, expected)
  1184. serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
  1185. method='html')
  1186. self.assertEqual(serialized, expected)
  1187. def test_dump_attribute_order(self):
  1188. # See BPO 34160
  1189. e = ET.Element('cirriculum', status='public', company='example')
  1190. with support.captured_stdout() as stdout:
  1191. ET.dump(e)
  1192. self.assertEqual(stdout.getvalue(),
  1193. '<cirriculum status="public" company="example" />\n')
  1194. def test_tree_write_attribute_order(self):
  1195. # See BPO 34160
  1196. root = ET.Element('cirriculum', status='public', company='example')
  1197. self.assertEqual(serialize(root),
  1198. '<cirriculum status="public" company="example" />')
  1199. self.assertEqual(serialize(root, method='html'),
  1200. '<cirriculum status="public" company="example"></cirriculum>')
  1201. def test_attlist_default(self):
  1202. # Test default attribute values; See BPO 42151.
  1203. root = ET.fromstring(ATTLIST_XML)
  1204. self.assertEqual(root[0].attrib,
  1205. {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
  1206. class XMLPullParserTest(unittest.TestCase):
  1207. def _feed(self, parser, data, chunk_size=None):
  1208. if chunk_size is None:
  1209. parser.feed(data)
  1210. else:
  1211. for i in range(0, len(data), chunk_size):
  1212. parser.feed(data[i:i+chunk_size])
  1213. def assert_events(self, parser, expected, max_events=None):
  1214. self.assertEqual(
  1215. [(event, (elem.tag, elem.text))
  1216. for event, elem in islice(parser.read_events(), max_events)],
  1217. expected)
  1218. def assert_event_tuples(self, parser, expected, max_events=None):
  1219. self.assertEqual(
  1220. list(islice(parser.read_events(), max_events)),
  1221. expected)
  1222. def assert_event_tags(self, parser, expected, max_events=None):
  1223. events = islice(parser.read_events(), max_events)
  1224. self.assertEqual([(action, elem.tag) for action, elem in events],
  1225. expected)
  1226. def test_simple_xml(self):
  1227. for chunk_size in (None, 1, 5):
  1228. with self.subTest(chunk_size=chunk_size):
  1229. parser = ET.XMLPullParser()
  1230. self.assert_event_tags(parser, [])
  1231. self._feed(parser, "<!-- comment -->\n", chunk_size)
  1232. self.assert_event_tags(parser, [])
  1233. self._feed(parser,
  1234. "<root>\n <element key='value'>text</element",
  1235. chunk_size)
  1236. self.assert_event_tags(parser, [])
  1237. self._feed(parser, ">\n", chunk_size)
  1238. self.assert_event_tags(parser, [('end', 'element')])
  1239. self._feed(parser, "<element>text</element>tail\n", chunk_size)
  1240. self._feed(parser, "<empty-element/>\n", chunk_size)
  1241. self.assert_event_tags(parser, [
  1242. ('end', 'element'),
  1243. ('end', 'empty-element'),
  1244. ])
  1245. self._feed(parser, "</root>\n", chunk_size)
  1246. self.assert_event_tags(parser, [('end', 'root')])
  1247. self.assertIsNone(parser.close())
  1248. def test_feed_while_iterating(self):
  1249. parser = ET.XMLPullParser()
  1250. it = parser.read_events()
  1251. self._feed(parser, "<root>\n <element key='value'>text</element>\n")
  1252. action, elem = next(it)
  1253. self.assertEqual((action, elem.tag), ('end', 'element'))
  1254. self._feed(parser, "</root>\n")
  1255. action, elem = next(it)
  1256. self.assertEqual((action, elem.tag), ('end', 'root'))
  1257. with self.assertRaises(StopIteration):
  1258. next(it)
  1259. def test_simple_xml_with_ns(self):
  1260. parser = ET.XMLPullParser()
  1261. self.assert_event_tags(parser, [])
  1262. self._feed(parser, "<!-- comment -->\n")
  1263. self.assert_event_tags(parser, [])
  1264. self._feed(parser, "<root xmlns='namespace'>\n")
  1265. self.assert_event_tags(parser, [])
  1266. self._feed(parser, "<element key='value'>text</element")
  1267. self.assert_event_tags(parser, [])
  1268. self._feed(parser, ">\n")
  1269. self.assert_event_tags(parser, [('end', '{namespace}element')])
  1270. self._feed(parser, "<element>text</element>tail\n")
  1271. self._feed(parser, "<empty-element/>\n")
  1272. self.assert_event_tags(parser, [
  1273. ('end', '{namespace}element'),
  1274. ('end', '{namespace}empty-element'),
  1275. ])
  1276. self._feed(parser, "</root>\n")
  1277. self.assert_event_tags(parser, [('end', '{namespace}root')])
  1278. self.assertIsNone(parser.close())
  1279. def test_ns_events(self):
  1280. parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
  1281. self._feed(parser, "<!-- comment -->\n")
  1282. self._feed(parser, "<root xmlns='namespace'>\n")
  1283. self.assertEqual(
  1284. list(parser.read_events()),
  1285. [('start-ns', ('', 'namespace'))])
  1286. self._feed(parser, "<element key='value'>text</element")
  1287. self._feed(parser, ">\n")
  1288. self._feed(parser, "<element>text</element>tail\n")
  1289. self._feed(parser, "<empty-element/>\n")
  1290. self._feed(parser, "</root>\n")
  1291. self.assertEqual(list(parser.read_events()), [('end-ns', None)])
  1292. self.assertIsNone(parser.close())
  1293. def test_ns_events_start(self):
  1294. parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
  1295. self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
  1296. self.assert_event_tuples(parser, [
  1297. ('start-ns', ('', 'abc')),
  1298. ('start-ns', ('p', 'xyz')),
  1299. ], max_events=2)
  1300. self.assert_event_tags(parser, [
  1301. ('start', '{abc}tag'),
  1302. ], max_events=1)
  1303. self._feed(parser, "<child />\n")
  1304. self.assert_event_tags(parser, [
  1305. ('start', '{abc}child'),
  1306. ('end', '{abc}child'),
  1307. ])
  1308. self._feed(parser, "</tag>\n")
  1309. parser.close()
  1310. self.assert_event_tags(parser, [
  1311. ('end', '{abc}tag'),
  1312. ])
  1313. def test_ns_events_start_end(self):
  1314. parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
  1315. self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
  1316. self.assert_event_tuples(parser, [
  1317. ('start-ns', ('', 'abc')),
  1318. ('start-ns', ('p', 'xyz')),
  1319. ], max_events=2)
  1320. self.assert_event_tags(parser, [
  1321. ('start', '{abc}tag'),
  1322. ], max_events=1)
  1323. self._feed(parser, "<child />\n")
  1324. self.assert_event_tags(parser, [
  1325. ('start', '{abc}child'),
  1326. ('end', '{abc}child'),
  1327. ])
  1328. self._feed(parser, "</tag>\n")
  1329. parser.close()
  1330. self.assert_event_tags(parser, [
  1331. ('end', '{abc}tag'),
  1332. ], max_events=1)
  1333. self.assert_event_tuples(parser, [
  1334. ('end-ns', None),
  1335. ('end-ns', None),
  1336. ])
  1337. def test_events(self):
  1338. parser = ET.XMLPullParser(events=())
  1339. self._feed(parser, "<root/>\n")
  1340. self.assert_event_tags(parser, [])
  1341. parser = ET.XMLPullParser(events=('start', 'end'))
  1342. self._feed(parser, "<!-- text here -->\n")
  1343. self.assert_events(parser, [])
  1344. parser = ET.XMLPullParser(events=('start', 'end'))
  1345. self._feed(parser, "<root>\n")
  1346. self.assert_event_tags(parser, [('start', 'root')])
  1347. self._feed(parser, "<element key='value'>text</element")
  1348. self.assert_event_tags(parser, [('start', 'element')])
  1349. self._feed(parser, ">\n")
  1350. self.assert_event_tags(parser, [('end', 'element')])
  1351. self._feed(parser,
  1352. "<element xmlns='foo'>text<empty-element/></element>tail\n")
  1353. self.assert_event_tags(parser, [
  1354. ('start', '{foo}element'),
  1355. ('start', '{foo}empty-element'),
  1356. ('end', '{foo}empty-element'),
  1357. ('end', '{foo}element'),
  1358. ])
  1359. self._feed(parser, "</root>")
  1360. self.assertIsNone(parser.close())
  1361. self.assert_event_tags(parser, [('end', 'root')])
  1362. parser = ET.XMLPullParser(events=('start',))
  1363. self._feed(parser, "<!-- comment -->\n")
  1364. self.assert_event_tags(parser, [])
  1365. self._feed(parser, "<root>\n")
  1366. self.assert_event_tags(parser, [('start', 'root')])
  1367. self._feed(parser, "<element key='value'>text</element")
  1368. self.assert_event_tags(parser, [('start', 'element')])
  1369. self._feed(parser, ">\n")
  1370. self.assert_event_tags(parser, [])
  1371. self._feed(parser,
  1372. "<element xmlns='foo'>text<empty-element/></element>tail\n")
  1373. self.assert_event_tags(parser, [
  1374. ('start', '{foo}element'),
  1375. ('start', '{foo}empty-element'),
  1376. ])
  1377. self._feed(parser, "</root>")
  1378. self.assertIsNone(parser.close())
  1379. def test_events_comment(self):
  1380. parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
  1381. self._feed(parser, "<!-- text here -->\n")
  1382. self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
  1383. self._feed(parser, "<!-- more text here -->\n")
  1384. self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
  1385. self._feed(parser, "<root-tag>text")
  1386. self.assert_event_tags(parser, [('start', 'root-tag')])
  1387. self._feed(parser, "<!-- inner comment-->\n")
  1388. self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
  1389. self._feed(parser, "</root-tag>\n")
  1390. self.assert_event_tags(parser, [('end', 'root-tag')])
  1391. self._feed(parser, "<!-- outer comment -->\n")
  1392. self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
  1393. parser = ET.XMLPullParser(events=('comment',))
  1394. self._feed(parser, "<!-- text here -->\n")
  1395. self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
  1396. def test_events_pi(self):
  1397. parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
  1398. self._feed(parser, "<?pitarget?>\n")
  1399. self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
  1400. parser = ET.XMLPullParser(events=('pi',))
  1401. self._feed(parser, "<?pitarget some text ?>\n")
  1402. self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
  1403. def test_events_sequence(self):
  1404. # Test that events can be some sequence that's not just a tuple or list
  1405. eventset = {'end', 'start'}
  1406. parser = ET.XMLPullParser(events=eventset)
  1407. self._feed(parser, "<foo>bar</foo>")
  1408. self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
  1409. class DummyIter:
  1410. def __init__(self):
  1411. self.events = iter(['start', 'end', 'start-ns'])
  1412. def __iter__(self):
  1413. return self
  1414. def __next__(self):
  1415. return next(self.events)
  1416. parser = ET.XMLPullParser(events=DummyIter())
  1417. self._feed(parser, "<foo>bar</foo>")
  1418. self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
  1419. def test_unknown_event(self):
  1420. with self.assertRaises(ValueError):
  1421. ET.XMLPullParser(events=('start', 'end', 'bogus'))
  1422. #
  1423. # xinclude tests (samples from appendix C of the xinclude specification)
  1424. XINCLUDE = {}
  1425. XINCLUDE["C1.xml"] = """\
  1426. <?xml version='1.0'?>
  1427. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1428. <p>120 Mz is adequate for an average home user.</p>
  1429. <xi:include href="disclaimer.xml"/>
  1430. </document>
  1431. """
  1432. XINCLUDE["disclaimer.xml"] = """\
  1433. <?xml version='1.0'?>
  1434. <disclaimer>
  1435. <p>The opinions represented herein represent those of the individual
  1436. and should not be interpreted as official policy endorsed by this
  1437. organization.</p>
  1438. </disclaimer>
  1439. """
  1440. XINCLUDE["C2.xml"] = """\
  1441. <?xml version='1.0'?>
  1442. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1443. <p>This document has been accessed
  1444. <xi:include href="count.txt" parse="text"/> times.</p>
  1445. </document>
  1446. """
  1447. XINCLUDE["count.txt"] = "324387"
  1448. XINCLUDE["C2b.xml"] = """\
  1449. <?xml version='1.0'?>
  1450. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1451. <p>This document has been <em>accessed</em>
  1452. <xi:include href="count.txt" parse="text"/> times.</p>
  1453. </document>
  1454. """
  1455. XINCLUDE["C3.xml"] = """\
  1456. <?xml version='1.0'?>
  1457. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1458. <p>The following is the source of the "data.xml" resource:</p>
  1459. <example><xi:include href="data.xml" parse="text"/></example>
  1460. </document>
  1461. """
  1462. XINCLUDE["data.xml"] = """\
  1463. <?xml version='1.0'?>
  1464. <data>
  1465. <item><![CDATA[Brooks & Shields]]></item>
  1466. </data>
  1467. """
  1468. XINCLUDE["C5.xml"] = """\
  1469. <?xml version='1.0'?>
  1470. <div xmlns:xi="http://www.w3.org/2001/XInclude">
  1471. <xi:include href="example.txt" parse="text">
  1472. <xi:fallback>
  1473. <xi:include href="fallback-example.txt" parse="text">
  1474. <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
  1475. </xi:include>
  1476. </xi:fallback>
  1477. </xi:include>
  1478. </div>
  1479. """
  1480. XINCLUDE["default.xml"] = """\
  1481. <?xml version='1.0'?>
  1482. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1483. <p>Example.</p>
  1484. <xi:include href="{}"/>
  1485. </document>
  1486. """.format(html.escape(SIMPLE_XMLFILE, True))
  1487. XINCLUDE["include_c1_repeated.xml"] = """\
  1488. <?xml version='1.0'?>
  1489. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1490. <p>The following is the source code of Recursive1.xml:</p>
  1491. <xi:include href="C1.xml"/>
  1492. <xi:include href="C1.xml"/>
  1493. <xi:include href="C1.xml"/>
  1494. <xi:include href="C1.xml"/>
  1495. </document>
  1496. """
  1497. #
  1498. # badly formatted xi:include tags
  1499. XINCLUDE_BAD = {}
  1500. XINCLUDE_BAD["B1.xml"] = """\
  1501. <?xml version='1.0'?>
  1502. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1503. <p>120 Mz is adequate for an average home user.</p>
  1504. <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
  1505. </document>
  1506. """
  1507. XINCLUDE_BAD["B2.xml"] = """\
  1508. <?xml version='1.0'?>
  1509. <div xmlns:xi="http://www.w3.org/2001/XInclude">
  1510. <xi:fallback></xi:fallback>
  1511. </div>
  1512. """
  1513. XINCLUDE["Recursive1.xml"] = """\
  1514. <?xml version='1.0'?>
  1515. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1516. <p>The following is the source code of Recursive2.xml:</p>
  1517. <xi:include href="Recursive2.xml"/>
  1518. </document>
  1519. """
  1520. XINCLUDE["Recursive2.xml"] = """\
  1521. <?xml version='1.0'?>
  1522. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1523. <p>The following is the source code of Recursive3.xml:</p>
  1524. <xi:include href="Recursive3.xml"/>
  1525. </document>
  1526. """
  1527. XINCLUDE["Recursive3.xml"] = """\
  1528. <?xml version='1.0'?>
  1529. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1530. <p>The following is the source code of Recursive1.xml:</p>
  1531. <xi:include href="Recursive1.xml"/>
  1532. </document>
  1533. """
  1534. class XIncludeTest(unittest.TestCase):
  1535. def xinclude_loader(self, href, parse="xml", encoding=None):
  1536. try:
  1537. data = XINCLUDE[href]
  1538. except KeyError:
  1539. raise OSError("resource not found")
  1540. if parse == "xml":
  1541. data = ET.XML(data)
  1542. return data
  1543. def none_loader(self, href, parser, encoding=None):
  1544. return None
  1545. def _my_loader(self, href, parse):
  1546. # Used to avoid a test-dependency problem where the default loader
  1547. # of ElementInclude uses the pyET parser for cET tests.
  1548. if parse == 'xml':
  1549. with open(href, 'rb') as f:
  1550. return ET.parse(f).getroot()
  1551. else:
  1552. return None
  1553. def test_xinclude_default(self):
  1554. from xml.etree import ElementInclude
  1555. doc = self.xinclude_loader('default.xml')
  1556. ElementInclude.include(doc, self._my_loader)
  1557. self.assertEqual(serialize(doc),
  1558. '<document>\n'
  1559. ' <p>Example.</p>\n'
  1560. ' <root>\n'
  1561. ' <element key="value">text</element>\n'
  1562. ' <element>text</element>tail\n'
  1563. ' <empty-element />\n'
  1564. '</root>\n'
  1565. '</document>')
  1566. def test_xinclude(self):
  1567. from xml.etree import ElementInclude
  1568. # Basic inclusion example (XInclude C.1)
  1569. document = self.xinclude_loader("C1.xml")
  1570. ElementInclude.include(document, self.xinclude_loader)
  1571. self.assertEqual(serialize(document),
  1572. '<document>\n'
  1573. ' <p>120 Mz is adequate for an average home user.</p>\n'
  1574. ' <disclaimer>\n'
  1575. ' <p>The opinions represented herein represent those of the individual\n'
  1576. ' and should not be interpreted as official policy endorsed by this\n'
  1577. ' organization.</p>\n'
  1578. '</disclaimer>\n'
  1579. '</document>') # C1
  1580. # Textual inclusion example (XInclude C.2)
  1581. document = self.xinclude_loader("C2.xml")
  1582. ElementInclude.include(document, self.xinclude_loader)
  1583. self.assertEqual(serialize(document),
  1584. '<document>\n'
  1585. ' <p>This document has been accessed\n'
  1586. ' 324387 times.</p>\n'
  1587. '</document>') # C2
  1588. # Textual inclusion after sibling element (based on modified XInclude C.2)
  1589. document = self.xinclude_loader("C2b.xml")
  1590. ElementInclude.include(document, self.xinclude_loader)
  1591. self.assertEqual(serialize(document),
  1592. '<document>\n'
  1593. ' <p>This document has been <em>accessed</em>\n'
  1594. ' 324387 times.</p>\n'
  1595. '</document>') # C2b
  1596. # Textual inclusion of XML example (XInclude C.3)
  1597. document = self.xinclude_loader("C3.xml")
  1598. ElementInclude.include(document, self.xinclude_loader)
  1599. self.assertEqual(serialize(document),
  1600. '<document>\n'
  1601. ' <p>The following is the source of the "data.xml" resource:</p>\n'
  1602. " <example>&lt;?xml version='1.0'?&gt;\n"
  1603. '&lt;data&gt;\n'
  1604. ' &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
  1605. '&lt;/data&gt;\n'
  1606. '</example>\n'
  1607. '</document>') # C3
  1608. # Fallback example (XInclude C.5)
  1609. # Note! Fallback support is not yet implemented
  1610. document = self.xinclude_loader("C5.xml")
  1611. with self.assertRaises(OSError) as cm:
  1612. ElementInclude.include(document, self.xinclude_loader)
  1613. self.assertEqual(str(cm.exception), 'resource not found')
  1614. self.assertEqual(serialize(document),
  1615. '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
  1616. ' <ns0:include href="example.txt" parse="text">\n'
  1617. ' <ns0:fallback>\n'
  1618. ' <ns0:include href="fallback-example.txt" parse="text">\n'
  1619. ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
  1620. ' </ns0:include>\n'
  1621. ' </ns0:fallback>\n'
  1622. ' </ns0:include>\n'
  1623. '</div>') # C5
  1624. def test_xinclude_repeated(self):
  1625. from xml.etree import ElementInclude
  1626. document = self.xinclude_loader("include_c1_repeated.xml")
  1627. ElementInclude.include(document, self.xinclude_loader)
  1628. self.assertEqual(1+4*2, len(document.findall(".//p")))
  1629. def test_xinclude_failures(self):
  1630. from xml.etree import ElementInclude
  1631. # Test failure to locate included XML file.
  1632. document = ET.XML(XINCLUDE["C1.xml"])
  1633. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1634. ElementInclude.include(document, loader=self.none_loader)
  1635. self.assertEqual(str(cm.exception),
  1636. "cannot load 'disclaimer.xml' as 'xml'")
  1637. # Test failure to locate included text file.
  1638. document = ET.XML(XINCLUDE["C2.xml"])
  1639. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1640. ElementInclude.include(document, loader=self.none_loader)
  1641. self.assertEqual(str(cm.exception),
  1642. "cannot load 'count.txt' as 'text'")
  1643. # Test bad parse type.
  1644. document = ET.XML(XINCLUDE_BAD["B1.xml"])
  1645. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1646. ElementInclude.include(document, loader=self.none_loader)
  1647. self.assertEqual(str(cm.exception),
  1648. "unknown parse type in xi:include tag ('BAD_TYPE')")
  1649. # Test xi:fallback outside xi:include.
  1650. document = ET.XML(XINCLUDE_BAD["B2.xml"])
  1651. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1652. ElementInclude.include(document, loader=self.none_loader)
  1653. self.assertEqual(str(cm.exception),
  1654. "xi:fallback tag must be child of xi:include "
  1655. "('{http://www.w3.org/2001/XInclude}fallback')")
  1656. # Test infinitely recursive includes.
  1657. document = self.xinclude_loader("Recursive1.xml")
  1658. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1659. ElementInclude.include(document, self.xinclude_loader)
  1660. self.assertEqual(str(cm.exception),
  1661. "recursive include of Recursive2.xml")
  1662. # Test 'max_depth' limitation.
  1663. document = self.xinclude_loader("Recursive1.xml")
  1664. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1665. ElementInclude.include(document, self.xinclude_loader, max_depth=None)
  1666. self.assertEqual(str(cm.exception),
  1667. "recursive include of Recursive2.xml")
  1668. document = self.xinclude_loader("Recursive1.xml")
  1669. with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
  1670. ElementInclude.include(document, self.xinclude_loader, max_depth=0)
  1671. self.assertEqual(str(cm.exception),
  1672. "maximum xinclude depth reached when including file Recursive2.xml")
  1673. document = self.xinclude_loader("Recursive1.xml")
  1674. with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
  1675. ElementInclude.include(document, self.xinclude_loader, max_depth=1)
  1676. self.assertEqual(str(cm.exception),
  1677. "maximum xinclude depth reached when including file Recursive3.xml")
  1678. document = self.xinclude_loader("Recursive1.xml")
  1679. with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
  1680. ElementInclude.include(document, self.xinclude_loader, max_depth=2)
  1681. self.assertEqual(str(cm.exception),
  1682. "maximum xinclude depth reached when including file Recursive1.xml")
  1683. document = self.xinclude_loader("Recursive1.xml")
  1684. with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
  1685. ElementInclude.include(document, self.xinclude_loader, max_depth=3)
  1686. self.assertEqual(str(cm.exception),
  1687. "recursive include of Recursive2.xml")
  1688. # --------------------------------------------------------------------
  1689. # reported bugs
  1690. class BugsTest(unittest.TestCase):
  1691. def test_bug_xmltoolkit21(self):
  1692. # marshaller gives obscure errors for non-string values
  1693. def check(elem):
  1694. with self.assertRaises(TypeError) as cm:
  1695. serialize(elem)
  1696. self.assertEqual(str(cm.exception),
  1697. 'cannot serialize 123 (type int)')
  1698. elem = ET.Element(123)
  1699. check(elem) # tag
  1700. elem = ET.Element("elem")
  1701. elem.text = 123
  1702. check(elem) # text
  1703. elem = ET.Element("elem")
  1704. elem.tail = 123
  1705. check(elem) # tail
  1706. elem = ET.Element("elem")
  1707. elem.set(123, "123")
  1708. check(elem) # attribute key
  1709. elem = ET.Element("elem")
  1710. elem.set("123", 123)
  1711. check(elem) # attribute value
  1712. def test_bug_xmltoolkit25(self):
  1713. # typo in ElementTree.findtext
  1714. elem = ET.XML(SAMPLE_XML)
  1715. tree = ET.ElementTree(elem)
  1716. self.assertEqual(tree.findtext("tag"), 'text')
  1717. self.assertEqual(tree.findtext("section/tag"), 'subtext')
  1718. def test_bug_xmltoolkit28(self):
  1719. # .//tag causes exceptions
  1720. tree = ET.XML("<doc><table><tbody/></table></doc>")
  1721. self.assertEqual(summarize_list(tree.findall(".//thead")), [])
  1722. self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
  1723. def test_bug_xmltoolkitX1(self):
  1724. # dump() doesn't flush the output buffer
  1725. tree = ET.XML("<doc><table><tbody/></table></doc>")
  1726. with support.captured_stdout() as stdout:
  1727. ET.dump(tree)
  1728. self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
  1729. def test_bug_xmltoolkit39(self):
  1730. # non-ascii element and attribute names doesn't work
  1731. tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
  1732. self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
  1733. tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
  1734. b"<tag \xe4ttr='v&#228;lue' />")
  1735. self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
  1736. self.assertEqual(ET.tostring(tree, "utf-8"),
  1737. b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
  1738. tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
  1739. b'<t\xe4g>text</t\xe4g>')
  1740. self.assertEqual(ET.tostring(tree, "utf-8"),
  1741. b'<t\xc3\xa4g>text</t\xc3\xa4g>')
  1742. tree = ET.Element("t\u00e4g")
  1743. self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
  1744. tree = ET.Element("tag")
  1745. tree.set("\u00e4ttr", "v\u00e4lue")
  1746. self.assertEqual(ET.tostring(tree, "utf-8"),
  1747. b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
  1748. def test_bug_xmltoolkit54(self):
  1749. # problems handling internally defined entities
  1750. e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
  1751. '<doc>&ldots;</doc>')
  1752. self.assertEqual(serialize(e, encoding="us-ascii"),
  1753. b'<doc>&#33328;</doc>')
  1754. self.assertEqual(serialize(e), '<doc>\u8230</doc>')
  1755. def test_bug_xmltoolkit55(self):
  1756. # make sure we're reporting the first error, not the last
  1757. with self.assertRaises(ET.ParseError) as cm:
  1758. ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
  1759. b'<doc>&ldots;&ndots;&rdots;</doc>')
  1760. self.assertEqual(str(cm.exception),
  1761. 'undefined entity &ldots;: line 1, column 36')
  1762. def test_bug_xmltoolkit60(self):
  1763. # Handle crash in stream source.
  1764. class ExceptionFile:
  1765. def read(self, x):
  1766. raise OSError
  1767. self.assertRaises(OSError, ET.parse, ExceptionFile())
  1768. def test_bug_xmltoolkit62(self):
  1769. # Don't crash when using custom entities.
  1770. ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
  1771. parser = ET.XMLParser()
  1772. parser.entity.update(ENTITIES)
  1773. parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
  1774. <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
  1775. <patent-application-publication>
  1776. <subdoc-abstract>
  1777. <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
  1778. </subdoc-abstract>
  1779. </patent-application-publication>""")
  1780. t = parser.close()
  1781. self.assertEqual(t.find('.//paragraph').text,
  1782. 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
  1783. @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
  1784. def test_bug_xmltoolkit63(self):
  1785. # Check reference leak.
  1786. def xmltoolkit63():
  1787. tree = ET.TreeBuilder()
  1788. tree.start("tag", {})
  1789. tree.data("text")
  1790. tree.end("tag")
  1791. xmltoolkit63()
  1792. count = sys.getrefcount(None)
  1793. for i in range(1000):
  1794. xmltoolkit63()
  1795. self.assertEqual(sys.getrefcount(None), count)
  1796. def test_bug_200708_newline(self):
  1797. # Preserve newlines in attributes.
  1798. e = ET.Element('SomeTag', text="def _f():\n return 3\n")
  1799. self.assertEqual(ET.tostring(e),
  1800. b'<SomeTag text="def _f():&#10; return 3&#10;" />')
  1801. self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
  1802. 'def _f():\n return 3\n')
  1803. self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
  1804. b'<SomeTag text="def _f():&#10; return 3&#10;" />')
  1805. def test_bug_200708_close(self):
  1806. # Test default builder.
  1807. parser = ET.XMLParser() # default
  1808. parser.feed("<element>some text</element>")
  1809. self.assertEqual(parser.close().tag, 'element')
  1810. # Test custom builder.
  1811. class EchoTarget:
  1812. def close(self):
  1813. return ET.Element("element") # simulate root
  1814. parser = ET.XMLParser(target=EchoTarget())
  1815. parser.feed("<element>some text</element>")
  1816. self.assertEqual(parser.close().tag, 'element')
  1817. def test_bug_200709_default_namespace(self):
  1818. e = ET.Element("{default}elem")
  1819. s = ET.SubElement(e, "{default}elem")
  1820. self.assertEqual(serialize(e, default_namespace="default"), # 1
  1821. '<elem xmlns="default"><elem /></elem>')
  1822. e = ET.Element("{default}elem")
  1823. s = ET.SubElement(e, "{default}elem")
  1824. s = ET.SubElement(e, "{not-default}elem")
  1825. self.assertEqual(serialize(e, default_namespace="default"), # 2
  1826. '<elem xmlns="default" xmlns:ns1="not-default">'
  1827. '<elem />'
  1828. '<ns1:elem />'
  1829. '</elem>')
  1830. e = ET.Element("{default}elem")
  1831. s = ET.SubElement(e, "{default}elem")
  1832. s = ET.SubElement(e, "elem") # unprefixed name
  1833. with self.assertRaises(ValueError) as cm:
  1834. serialize(e, default_namespace="default") # 3
  1835. self.assertEqual(str(cm.exception),
  1836. 'cannot use non-qualified names with default_namespace option')
  1837. def test_bug_200709_register_namespace(self):
  1838. e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
  1839. self.assertEqual(ET.tostring(e),
  1840. b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
  1841. ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
  1842. e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
  1843. self.assertEqual(ET.tostring(e),
  1844. b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
  1845. # And the Dublin Core namespace is in the default list:
  1846. e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
  1847. self.assertEqual(ET.tostring(e),
  1848. b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
  1849. def test_bug_200709_element_comment(self):
  1850. # Not sure if this can be fixed, really (since the serializer needs
  1851. # ET.Comment, not cET.comment).
  1852. a = ET.Element('a')
  1853. a.append(ET.Comment('foo'))
  1854. self.assertEqual(a[0].tag, ET.Comment)
  1855. a = ET.Element('a')
  1856. a.append(ET.PI('foo'))
  1857. self.assertEqual(a[0].tag, ET.PI)
  1858. def test_bug_200709_element_insert(self):
  1859. a = ET.Element('a')
  1860. b = ET.SubElement(a, 'b')
  1861. c = ET.SubElement(a, 'c')
  1862. d = ET.Element('d')
  1863. a.insert(0, d)
  1864. self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
  1865. a.insert(-1, d)
  1866. self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
  1867. def test_bug_200709_iter_comment(self):
  1868. a = ET.Element('a')
  1869. b = ET.SubElement(a, 'b')
  1870. comment_b = ET.Comment("TEST-b")
  1871. b.append(comment_b)
  1872. self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
  1873. # --------------------------------------------------------------------
  1874. # reported on bugs.python.org
  1875. def test_bug_1534630(self):
  1876. bob = ET.TreeBuilder()
  1877. e = bob.data("data")
  1878. e = bob.start("tag", {})
  1879. e = bob.end("tag")
  1880. e = bob.close()
  1881. self.assertEqual(serialize(e), '<tag />')
  1882. def test_issue6233(self):
  1883. e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
  1884. b'<body>t\xc3\xa3g</body>')
  1885. self.assertEqual(ET.tostring(e, 'ascii'),
  1886. b"<?xml version='1.0' encoding='ascii'?>\n"
  1887. b'<body>t&#227;g</body>')
  1888. e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
  1889. b'<body>t\xe3g</body>')
  1890. self.assertEqual(ET.tostring(e, 'ascii'),
  1891. b"<?xml version='1.0' encoding='ascii'?>\n"
  1892. b'<body>t&#227;g</body>')
  1893. def test_issue6565(self):
  1894. elem = ET.XML("<body><tag/></body>")
  1895. self.assertEqual(summarize_list(elem), ['tag'])
  1896. newelem = ET.XML(SAMPLE_XML)
  1897. elem[:] = newelem[:]
  1898. self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
  1899. def test_issue10777(self):
  1900. # Registering a namespace twice caused a "dictionary changed size during
  1901. # iteration" bug.
  1902. ET.register_namespace('test10777', 'http://myuri/')
  1903. ET.register_namespace('test10777', 'http://myuri/')
  1904. def test_lost_text(self):
  1905. # Issue #25902: Borrowed text can disappear
  1906. class Text:
  1907. def __bool__(self):
  1908. e.text = 'changed'
  1909. return True
  1910. e = ET.Element('tag')
  1911. e.text = Text()
  1912. i = e.itertext()
  1913. t = next(i)
  1914. self.assertIsInstance(t, Text)
  1915. self.assertIsInstance(e.text, str)
  1916. self.assertEqual(e.text, 'changed')
  1917. def test_lost_tail(self):
  1918. # Issue #25902: Borrowed tail can disappear
  1919. class Text:
  1920. def __bool__(self):
  1921. e[0].tail = 'changed'
  1922. return True
  1923. e = ET.Element('root')
  1924. e.append(ET.Element('tag'))
  1925. e[0].tail = Text()
  1926. i = e.itertext()
  1927. t = next(i)
  1928. self.assertIsInstance(t, Text)
  1929. self.assertIsInstance(e[0].tail, str)
  1930. self.assertEqual(e[0].tail, 'changed')
  1931. def test_lost_elem(self):
  1932. # Issue #25902: Borrowed element can disappear
  1933. class Tag:
  1934. def __eq__(self, other):
  1935. e[0] = ET.Element('changed')
  1936. next(i)
  1937. return True
  1938. e = ET.Element('root')
  1939. e.append(ET.Element(Tag()))
  1940. e.append(ET.Element('tag'))
  1941. i = e.iter('tag')
  1942. try:
  1943. t = next(i)
  1944. except ValueError:
  1945. self.skipTest('generators are not reentrant')
  1946. self.assertIsInstance(t.tag, Tag)
  1947. self.assertIsInstance(e[0].tag, str)
  1948. self.assertEqual(e[0].tag, 'changed')
  1949. def check_expat224_utf8_bug(self, text):
  1950. xml = b'<a b="%s"/>' % text
  1951. root = ET.XML(xml)
  1952. self.assertEqual(root.get('b'), text.decode('utf-8'))
  1953. def test_expat224_utf8_bug(self):
  1954. # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
  1955. # Check that Expat 2.2.4 fixed the bug.
  1956. #
  1957. # Test buffer bounds at odd and even positions.
  1958. text = b'\xc3\xa0' * 1024
  1959. self.check_expat224_utf8_bug(text)
  1960. text = b'x' + b'\xc3\xa0' * 1024
  1961. self.check_expat224_utf8_bug(text)
  1962. def test_expat224_utf8_bug_file(self):
  1963. with open(UTF8_BUG_XMLFILE, 'rb') as fp:
  1964. raw = fp.read()
  1965. root = ET.fromstring(raw)
  1966. xmlattr = root.get('b')
  1967. # "Parse" manually the XML file to extract the value of the 'b'
  1968. # attribute of the <a b='xxx' /> XML element
  1969. text = raw.decode('utf-8').strip()
  1970. text = text.replace('\r\n', ' ')
  1971. text = text[6:-4]
  1972. self.assertEqual(root.get('b'), text)
  1973. def test_39495_treebuilder_start(self):
  1974. self.assertRaises(TypeError, ET.TreeBuilder().start, "tag")
  1975. self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None)
  1976. # --------------------------------------------------------------------
  1977. class BasicElementTest(ElementTestCase, unittest.TestCase):
  1978. def test___init__(self):
  1979. tag = "foo"
  1980. attrib = { "zix": "wyp" }
  1981. element_foo = ET.Element(tag, attrib)
  1982. # traits of an element
  1983. self.assertIsInstance(element_foo, ET.Element)
  1984. self.assertIn("tag", dir(element_foo))
  1985. self.assertIn("attrib", dir(element_foo))
  1986. self.assertIn("text", dir(element_foo))
  1987. self.assertIn("tail", dir(element_foo))
  1988. # string attributes have expected values
  1989. self.assertEqual(element_foo.tag, tag)
  1990. self.assertIsNone(element_foo.text)
  1991. self.assertIsNone(element_foo.tail)
  1992. # attrib is a copy
  1993. self.assertIsNot(element_foo.attrib, attrib)
  1994. self.assertEqual(element_foo.attrib, attrib)
  1995. # attrib isn't linked
  1996. attrib["bar"] = "baz"
  1997. self.assertIsNot(element_foo.attrib, attrib)
  1998. self.assertNotEqual(element_foo.attrib, attrib)
  1999. def test_copy(self):
  2000. # Only run this test if Element.copy() is defined.
  2001. if "copy" not in dir(ET.Element):
  2002. raise unittest.SkipTest("Element.copy() not present")
  2003. element_foo = ET.Element("foo", { "zix": "wyp" })
  2004. element_foo.append(ET.Element("bar", { "baz": "qix" }))
  2005. with self.assertWarns(DeprecationWarning):
  2006. element_foo2 = element_foo.copy()
  2007. # elements are not the same
  2008. self.assertIsNot(element_foo2, element_foo)
  2009. # string attributes are equal
  2010. self.assertEqual(element_foo2.tag, element_foo.tag)
  2011. self.assertEqual(element_foo2.text, element_foo.text)
  2012. self.assertEqual(element_foo2.tail, element_foo.tail)
  2013. # number of children is the same
  2014. self.assertEqual(len(element_foo2), len(element_foo))
  2015. # children are the same
  2016. for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
  2017. self.assertIs(child1, child2)
  2018. # attrib is a copy
  2019. self.assertEqual(element_foo2.attrib, element_foo.attrib)
  2020. def test___copy__(self):
  2021. element_foo = ET.Element("foo", { "zix": "wyp" })
  2022. element_foo.append(ET.Element("bar", { "baz": "qix" }))
  2023. element_foo2 = copy.copy(element_foo)
  2024. # elements are not the same
  2025. self.assertIsNot(element_foo2, element_foo)
  2026. # string attributes are equal
  2027. self.assertEqual(element_foo2.tag, element_foo.tag)
  2028. self.assertEqual(element_foo2.text, element_foo.text)
  2029. self.assertEqual(element_foo2.tail, element_foo.tail)
  2030. # number of children is the same
  2031. self.assertEqual(len(element_foo2), len(element_foo))
  2032. # children are the same
  2033. for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
  2034. self.assertIs(child1, child2)
  2035. # attrib is a copy
  2036. self.assertEqual(element_foo2.attrib, element_foo.attrib)
  2037. def test___deepcopy__(self):
  2038. element_foo = ET.Element("foo", { "zix": "wyp" })
  2039. element_foo.append(ET.Element("bar", { "baz": "qix" }))
  2040. element_foo2 = copy.deepcopy(element_foo)
  2041. # elements are not the same
  2042. self.assertIsNot(element_foo2, element_foo)
  2043. # string attributes are equal
  2044. self.assertEqual(element_foo2.tag, element_foo.tag)
  2045. self.assertEqual(element_foo2.text, element_foo.text)
  2046. self.assertEqual(element_foo2.tail, element_foo.tail)
  2047. # number of children is the same
  2048. self.assertEqual(len(element_foo2), len(element_foo))
  2049. # children are not the same
  2050. for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
  2051. self.assertIsNot(child1, child2)
  2052. # attrib is a copy
  2053. self.assertIsNot(element_foo2.attrib, element_foo.attrib)
  2054. self.assertEqual(element_foo2.attrib, element_foo.attrib)
  2055. # attrib isn't linked
  2056. element_foo.attrib["bar"] = "baz"
  2057. self.assertIsNot(element_foo2.attrib, element_foo.attrib)
  2058. self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
  2059. def test_augmentation_type_errors(self):
  2060. e = ET.Element('joe')
  2061. self.assertRaises(TypeError, e.append, 'b')
  2062. self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
  2063. self.assertRaises(TypeError, e.insert, 0, 'foo')
  2064. e[:] = [ET.Element('bar')]
  2065. with self.assertRaises(TypeError):
  2066. e[0] = 'foo'
  2067. with self.assertRaises(TypeError):
  2068. e[:] = [ET.Element('bar'), 'foo']
  2069. if hasattr(e, '__setstate__'):
  2070. state = {
  2071. 'tag': 'tag',
  2072. '_children': [None], # non-Element
  2073. 'attrib': 'attr',
  2074. 'tail': 'tail',
  2075. 'text': 'text',
  2076. }
  2077. self.assertRaises(TypeError, e.__setstate__, state)
  2078. if hasattr(e, '__deepcopy__'):
  2079. class E(ET.Element):
  2080. def __deepcopy__(self, memo):
  2081. return None # non-Element
  2082. e[:] = [E('bar')]
  2083. self.assertRaises(TypeError, copy.deepcopy, e)
  2084. def test_cyclic_gc(self):
  2085. class Dummy:
  2086. pass
  2087. # Test the shortest cycle: d->element->d
  2088. d = Dummy()
  2089. d.dummyref = ET.Element('joe', attr=d)
  2090. wref = weakref.ref(d)
  2091. del d
  2092. gc_collect()
  2093. self.assertIsNone(wref())
  2094. # A longer cycle: d->e->e2->d
  2095. e = ET.Element('joe')
  2096. d = Dummy()
  2097. d.dummyref = e
  2098. wref = weakref.ref(d)
  2099. e2 = ET.SubElement(e, 'foo', attr=d)
  2100. del d, e, e2
  2101. gc_collect()
  2102. self.assertIsNone(wref())
  2103. # A cycle between Element objects as children of one another
  2104. # e1->e2->e3->e1
  2105. e1 = ET.Element('e1')
  2106. e2 = ET.Element('e2')
  2107. e3 = ET.Element('e3')
  2108. e3.append(e1)
  2109. e2.append(e3)
  2110. e1.append(e2)
  2111. wref = weakref.ref(e1)
  2112. del e1, e2, e3
  2113. gc_collect()
  2114. self.assertIsNone(wref())
  2115. def test_weakref(self):
  2116. flag = False
  2117. def wref_cb(w):
  2118. nonlocal flag
  2119. flag = True
  2120. e = ET.Element('e')
  2121. wref = weakref.ref(e, wref_cb)
  2122. self.assertEqual(wref().tag, 'e')
  2123. del e
  2124. gc_collect() # For PyPy or other GCs.
  2125. self.assertEqual(flag, True)
  2126. self.assertEqual(wref(), None)
  2127. def test_get_keyword_args(self):
  2128. e1 = ET.Element('foo' , x=1, y=2, z=3)
  2129. self.assertEqual(e1.get('x', default=7), 1)
  2130. self.assertEqual(e1.get('w', default=7), 7)
  2131. def test_pickle(self):
  2132. # issue #16076: the C implementation wasn't pickleable.
  2133. for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
  2134. for dumper, loader in product(self.modules, repeat=2):
  2135. e = dumper.Element('foo', bar=42)
  2136. e.text = "text goes here"
  2137. e.tail = "opposite of head"
  2138. dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
  2139. e.append(dumper.Element('child'))
  2140. e.findall('.//grandchild')[0].set('attr', 'other value')
  2141. e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
  2142. dumper, loader, proto)
  2143. self.assertEqual(e2.tag, 'foo')
  2144. self.assertEqual(e2.attrib['bar'], 42)
  2145. self.assertEqual(len(e2), 2)
  2146. self.assertEqualElements(e, e2)
  2147. def test_pickle_issue18997(self):
  2148. for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
  2149. for dumper, loader in product(self.modules, repeat=2):
  2150. XMLTEXT = """<?xml version="1.0"?>
  2151. <group><dogs>4</dogs>
  2152. </group>"""
  2153. e1 = dumper.fromstring(XMLTEXT)
  2154. self.assertEqual(e1.__getstate__()['tag'], 'group')
  2155. e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
  2156. dumper, loader, proto)
  2157. self.assertEqual(e2.tag, 'group')
  2158. self.assertEqual(e2[0].tag, 'dogs')
  2159. class BadElementTest(ElementTestCase, unittest.TestCase):
  2160. def test_extend_mutable_list(self):
  2161. class X:
  2162. @property
  2163. def __class__(self):
  2164. L[:] = [ET.Element('baz')]
  2165. return ET.Element
  2166. L = [X()]
  2167. e = ET.Element('foo')
  2168. try:
  2169. e.extend(L)
  2170. except TypeError:
  2171. pass
  2172. class Y(X, ET.Element):
  2173. pass
  2174. L = [Y('x')]
  2175. e = ET.Element('foo')
  2176. e.extend(L)
  2177. def test_extend_mutable_list2(self):
  2178. class X:
  2179. @property
  2180. def __class__(self):
  2181. del L[:]
  2182. return ET.Element
  2183. L = [X(), ET.Element('baz')]
  2184. e = ET.Element('foo')
  2185. try:
  2186. e.extend(L)
  2187. except TypeError:
  2188. pass
  2189. class Y(X, ET.Element):
  2190. pass
  2191. L = [Y('bar'), ET.Element('baz')]
  2192. e = ET.Element('foo')
  2193. e.extend(L)
  2194. def test_remove_with_mutating(self):
  2195. class X(ET.Element):
  2196. def __eq__(self, o):
  2197. del e[:]
  2198. return False
  2199. e = ET.Element('foo')
  2200. e.extend([X('bar')])
  2201. self.assertRaises(ValueError, e.remove, ET.Element('baz'))
  2202. e = ET.Element('foo')
  2203. e.extend([ET.Element('bar')])
  2204. self.assertRaises(ValueError, e.remove, X('baz'))
  2205. def test_recursive_repr(self):
  2206. # Issue #25455
  2207. e = ET.Element('foo')
  2208. with swap_attr(e, 'tag', e):
  2209. with self.assertRaises(RuntimeError):
  2210. repr(e) # Should not crash
  2211. def test_element_get_text(self):
  2212. # Issue #27863
  2213. class X(str):
  2214. def __del__(self):
  2215. try:
  2216. elem.text
  2217. except NameError:
  2218. pass
  2219. b = ET.TreeBuilder()
  2220. b.start('tag', {})
  2221. b.data('ABCD')
  2222. b.data(X('EFGH'))
  2223. b.data('IJKL')
  2224. b.end('tag')
  2225. elem = b.close()
  2226. self.assertEqual(elem.text, 'ABCDEFGHIJKL')
  2227. def test_element_get_tail(self):
  2228. # Issue #27863
  2229. class X(str):
  2230. def __del__(self):
  2231. try:
  2232. elem[0].tail
  2233. except NameError:
  2234. pass
  2235. b = ET.TreeBuilder()
  2236. b.start('root', {})
  2237. b.start('tag', {})
  2238. b.end('tag')
  2239. b.data('ABCD')
  2240. b.data(X('EFGH'))
  2241. b.data('IJKL')
  2242. b.end('root')
  2243. elem = b.close()
  2244. self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
  2245. def test_subscr(self):
  2246. # Issue #27863
  2247. class X:
  2248. def __index__(self):
  2249. del e[:]
  2250. return 1
  2251. e = ET.Element('elem')
  2252. e.append(ET.Element('child'))
  2253. e[:X()] # shouldn't crash
  2254. e.append(ET.Element('child'))
  2255. e[0:10:X()] # shouldn't crash
  2256. def test_ass_subscr(self):
  2257. # Issue #27863
  2258. class X:
  2259. def __index__(self):
  2260. e[:] = []
  2261. return 1
  2262. e = ET.Element('elem')
  2263. for _ in range(10):
  2264. e.insert(0, ET.Element('child'))
  2265. e[0:10:X()] = [] # shouldn't crash
  2266. def test_treebuilder_start(self):
  2267. # Issue #27863
  2268. def element_factory(x, y):
  2269. return []
  2270. b = ET.TreeBuilder(element_factory=element_factory)
  2271. b.start('tag', {})
  2272. b.data('ABCD')
  2273. self.assertRaises(AttributeError, b.start, 'tag2', {})
  2274. del b
  2275. gc_collect()
  2276. def test_treebuilder_end(self):
  2277. # Issue #27863
  2278. def element_factory(x, y):
  2279. return []
  2280. b = ET.TreeBuilder(element_factory=element_factory)
  2281. b.start('tag', {})
  2282. b.data('ABCD')
  2283. self.assertRaises(AttributeError, b.end, 'tag')
  2284. del b
  2285. gc_collect()
  2286. class MutatingElementPath(str):
  2287. def __new__(cls, elem, *args):
  2288. self = str.__new__(cls, *args)
  2289. self.elem = elem
  2290. return self
  2291. def __eq__(self, o):
  2292. del self.elem[:]
  2293. return True
  2294. MutatingElementPath.__hash__ = str.__hash__
  2295. class BadElementPath(str):
  2296. def __eq__(self, o):
  2297. raise 1/0
  2298. BadElementPath.__hash__ = str.__hash__
  2299. class BadElementPathTest(ElementTestCase, unittest.TestCase):
  2300. def setUp(self):
  2301. super().setUp()
  2302. from xml.etree import ElementPath
  2303. self.path_cache = ElementPath._cache
  2304. ElementPath._cache = {}
  2305. def tearDown(self):
  2306. from xml.etree import ElementPath
  2307. ElementPath._cache = self.path_cache
  2308. super().tearDown()
  2309. def test_find_with_mutating(self):
  2310. e = ET.Element('foo')
  2311. e.extend([ET.Element('bar')])
  2312. e.find(MutatingElementPath(e, 'x'))
  2313. def test_find_with_error(self):
  2314. e = ET.Element('foo')
  2315. e.extend([ET.Element('bar')])
  2316. try:
  2317. e.find(BadElementPath('x'))
  2318. except ZeroDivisionError:
  2319. pass
  2320. def test_findtext_with_mutating(self):
  2321. e = ET.Element('foo')
  2322. e.extend([ET.Element('bar')])
  2323. e.findtext(MutatingElementPath(e, 'x'))
  2324. def test_findtext_with_error(self):
  2325. e = ET.Element('foo')
  2326. e.extend([ET.Element('bar')])
  2327. try:
  2328. e.findtext(BadElementPath('x'))
  2329. except ZeroDivisionError:
  2330. pass
  2331. def test_findtext_with_falsey_text_attribute(self):
  2332. root_elem = ET.Element('foo')
  2333. sub_elem = ET.SubElement(root_elem, 'bar')
  2334. falsey = ["", 0, False, [], (), {}]
  2335. for val in falsey:
  2336. sub_elem.text = val
  2337. self.assertEqual(root_elem.findtext('./bar'), val)
  2338. def test_findtext_with_none_text_attribute(self):
  2339. root_elem = ET.Element('foo')
  2340. sub_elem = ET.SubElement(root_elem, 'bar')
  2341. sub_elem.text = None
  2342. self.assertEqual(root_elem.findtext('./bar'), '')
  2343. def test_findall_with_mutating(self):
  2344. e = ET.Element('foo')
  2345. e.extend([ET.Element('bar')])
  2346. e.findall(MutatingElementPath(e, 'x'))
  2347. def test_findall_with_error(self):
  2348. e = ET.Element('foo')
  2349. e.extend([ET.Element('bar')])
  2350. try:
  2351. e.findall(BadElementPath('x'))
  2352. except ZeroDivisionError:
  2353. pass
  2354. class ElementTreeTypeTest(unittest.TestCase):
  2355. def test_istype(self):
  2356. self.assertIsInstance(ET.ParseError, type)
  2357. self.assertIsInstance(ET.QName, type)
  2358. self.assertIsInstance(ET.ElementTree, type)
  2359. self.assertIsInstance(ET.Element, type)
  2360. self.assertIsInstance(ET.TreeBuilder, type)
  2361. self.assertIsInstance(ET.XMLParser, type)
  2362. def test_Element_subclass_trivial(self):
  2363. class MyElement(ET.Element):
  2364. pass
  2365. mye = MyElement('foo')
  2366. self.assertIsInstance(mye, ET.Element)
  2367. self.assertIsInstance(mye, MyElement)
  2368. self.assertEqual(mye.tag, 'foo')
  2369. # test that attribute assignment works (issue 14849)
  2370. mye.text = "joe"
  2371. self.assertEqual(mye.text, "joe")
  2372. def test_Element_subclass_constructor(self):
  2373. class MyElement(ET.Element):
  2374. def __init__(self, tag, attrib={}, **extra):
  2375. super(MyElement, self).__init__(tag + '__', attrib, **extra)
  2376. mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
  2377. self.assertEqual(mye.tag, 'foo__')
  2378. self.assertEqual(sorted(mye.items()),
  2379. [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
  2380. def test_Element_subclass_new_method(self):
  2381. class MyElement(ET.Element):
  2382. def newmethod(self):
  2383. return self.tag
  2384. mye = MyElement('joe')
  2385. self.assertEqual(mye.newmethod(), 'joe')
  2386. def test_Element_subclass_find(self):
  2387. class MyElement(ET.Element):
  2388. pass
  2389. e = ET.Element('foo')
  2390. e.text = 'text'
  2391. sub = MyElement('bar')
  2392. sub.text = 'subtext'
  2393. e.append(sub)
  2394. self.assertEqual(e.findtext('bar'), 'subtext')
  2395. self.assertEqual(e.find('bar').tag, 'bar')
  2396. found = list(e.findall('bar'))
  2397. self.assertEqual(len(found), 1, found)
  2398. self.assertEqual(found[0].tag, 'bar')
  2399. class ElementFindTest(unittest.TestCase):
  2400. def test_find_simple(self):
  2401. e = ET.XML(SAMPLE_XML)
  2402. self.assertEqual(e.find('tag').tag, 'tag')
  2403. self.assertEqual(e.find('section/tag').tag, 'tag')
  2404. self.assertEqual(e.find('./tag').tag, 'tag')
  2405. e[2] = ET.XML(SAMPLE_SECTION)
  2406. self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
  2407. self.assertEqual(e.findtext('./tag'), 'text')
  2408. self.assertEqual(e.findtext('section/tag'), 'subtext')
  2409. # section/nexttag is found but has no text
  2410. self.assertEqual(e.findtext('section/nexttag'), '')
  2411. self.assertEqual(e.findtext('section/nexttag', 'default'), '')
  2412. # tog doesn't exist and 'default' kicks in
  2413. self.assertIsNone(e.findtext('tog'))
  2414. self.assertEqual(e.findtext('tog', 'default'), 'default')
  2415. # Issue #16922
  2416. self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
  2417. def test_find_xpath(self):
  2418. LINEAR_XML = '''
  2419. <body>
  2420. <tag class='a'/>
  2421. <tag class='b'/>
  2422. <tag class='c'/>
  2423. <tag class='d'/>
  2424. </body>'''
  2425. e = ET.XML(LINEAR_XML)
  2426. # Test for numeric indexing and last()
  2427. self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
  2428. self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
  2429. self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
  2430. self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
  2431. self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
  2432. self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
  2433. self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
  2434. self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
  2435. self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
  2436. def test_findall(self):
  2437. e = ET.XML(SAMPLE_XML)
  2438. e[2] = ET.XML(SAMPLE_SECTION)
  2439. self.assertEqual(summarize_list(e.findall('.')), ['body'])
  2440. self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
  2441. self.assertEqual(summarize_list(e.findall('tog')), [])
  2442. self.assertEqual(summarize_list(e.findall('tog/foo')), [])
  2443. self.assertEqual(summarize_list(e.findall('*')),
  2444. ['tag', 'tag', 'section'])
  2445. self.assertEqual(summarize_list(e.findall('.//tag')),
  2446. ['tag'] * 4)
  2447. self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
  2448. self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
  2449. self.assertEqual(summarize_list(e.findall('section/*')),
  2450. ['tag', 'nexttag', 'nextsection'])
  2451. self.assertEqual(summarize_list(e.findall('section//*')),
  2452. ['tag', 'nexttag', 'nextsection', 'tag'])
  2453. self.assertEqual(summarize_list(e.findall('section/.//*')),
  2454. ['tag', 'nexttag', 'nextsection', 'tag'])
  2455. self.assertEqual(summarize_list(e.findall('*/*')),
  2456. ['tag', 'nexttag', 'nextsection'])
  2457. self.assertEqual(summarize_list(e.findall('*//*')),
  2458. ['tag', 'nexttag', 'nextsection', 'tag'])
  2459. self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
  2460. self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
  2461. self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
  2462. self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
  2463. self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
  2464. ['tag'] * 3)
  2465. self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
  2466. ['tag'])
  2467. self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')),
  2468. ['tag'] * 2)
  2469. self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
  2470. ['tag'] * 2)
  2471. self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')),
  2472. ['tag'])
  2473. self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
  2474. ['tag'])
  2475. self.assertEqual(summarize_list(e.findall('.//section[tag]')),
  2476. ['section'])
  2477. self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
  2478. self.assertEqual(summarize_list(e.findall('../tag')), [])
  2479. self.assertEqual(summarize_list(e.findall('section/../tag')),
  2480. ['tag'] * 2)
  2481. self.assertEqual(e.findall('section//'), e.findall('section//*'))
  2482. self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
  2483. ['section'])
  2484. self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
  2485. ['section'])
  2486. self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
  2487. ['section'])
  2488. self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
  2489. ['section'])
  2490. self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
  2491. ['section'])
  2492. # Negations of above tests. They match nothing because the sole section
  2493. # tag has subtext.
  2494. self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")),
  2495. [])
  2496. self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")),
  2497. [])
  2498. self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")),
  2499. [])
  2500. self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")),
  2501. [])
  2502. self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")),
  2503. [])
  2504. self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
  2505. ['tag'])
  2506. self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
  2507. ['tag'])
  2508. self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
  2509. ['tag'])
  2510. self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
  2511. ['tag'])
  2512. self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
  2513. ['tag'])
  2514. self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
  2515. [])
  2516. self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
  2517. [])
  2518. # Negations of above tests.
  2519. # Matches everything but the tag containing subtext
  2520. self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")),
  2521. ['tag'] * 3)
  2522. self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")),
  2523. ['tag'] * 3)
  2524. self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')),
  2525. ['tag'] * 3)
  2526. self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')),
  2527. ['tag'] * 3)
  2528. self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")),
  2529. ['tag'] * 3)
  2530. # Matches all tags.
  2531. self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")),
  2532. ['tag'] * 4)
  2533. self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")),
  2534. ['tag'] * 4)
  2535. # duplicate section => 2x tag matches
  2536. e[1] = e[2]
  2537. self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
  2538. ['section', 'section'])
  2539. self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
  2540. ['tag', 'tag'])
  2541. def test_test_find_with_ns(self):
  2542. e = ET.XML(SAMPLE_XML_NS)
  2543. self.assertEqual(summarize_list(e.findall('tag')), [])
  2544. self.assertEqual(
  2545. summarize_list(e.findall("{http://effbot.org/ns}tag")),
  2546. ['{http://effbot.org/ns}tag'] * 2)
  2547. self.assertEqual(
  2548. summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
  2549. ['{http://effbot.org/ns}tag'] * 3)
  2550. def test_findall_different_nsmaps(self):
  2551. root = ET.XML('''
  2552. <a xmlns:x="X" xmlns:y="Y">
  2553. <x:b><c/></x:b>
  2554. <b/>
  2555. <c><x:b/><b/></c><y:b/>
  2556. </a>''')
  2557. nsmap = {'xx': 'X'}
  2558. self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
  2559. self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
  2560. nsmap = {'xx': 'Y'}
  2561. self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
  2562. self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
  2563. nsmap = {'xx': 'X', '': 'Y'}
  2564. self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
  2565. self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
  2566. def test_findall_wildcard(self):
  2567. root = ET.XML('''
  2568. <a xmlns:x="X" xmlns:y="Y">
  2569. <x:b><c/></x:b>
  2570. <b/>
  2571. <c><x:b/><b/></c><y:b/>
  2572. </a>''')
  2573. root.append(ET.Comment('test'))
  2574. self.assertEqual(summarize_list(root.findall("{*}b")),
  2575. ['{X}b', 'b', '{Y}b'])
  2576. self.assertEqual(summarize_list(root.findall("{*}c")),
  2577. ['c'])
  2578. self.assertEqual(summarize_list(root.findall("{X}*")),
  2579. ['{X}b'])
  2580. self.assertEqual(summarize_list(root.findall("{Y}*")),
  2581. ['{Y}b'])
  2582. self.assertEqual(summarize_list(root.findall("{}*")),
  2583. ['b', 'c'])
  2584. self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency
  2585. ['b'])
  2586. self.assertEqual(summarize_list(root.findall("{}b")),
  2587. summarize_list(root.findall("b")))
  2588. self.assertEqual(summarize_list(root.findall("{*}*")),
  2589. ['{X}b', 'b', 'c', '{Y}b'])
  2590. # This is an unfortunate difference, but that's how find('*') works.
  2591. self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
  2592. summarize_list(root.findall("*")))
  2593. self.assertEqual(summarize_list(root.findall(".//{*}b")),
  2594. ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
  2595. self.assertEqual(summarize_list(root.findall(".//{*}c")),
  2596. ['c', 'c'])
  2597. self.assertEqual(summarize_list(root.findall(".//{X}*")),
  2598. ['{X}b', '{X}b'])
  2599. self.assertEqual(summarize_list(root.findall(".//{Y}*")),
  2600. ['{Y}b'])
  2601. self.assertEqual(summarize_list(root.findall(".//{}*")),
  2602. ['c', 'b', 'c', 'b'])
  2603. self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency
  2604. ['b', 'b'])
  2605. self.assertEqual(summarize_list(root.findall(".//{}b")),
  2606. summarize_list(root.findall(".//b")))
  2607. def test_bad_find(self):
  2608. e = ET.XML(SAMPLE_XML)
  2609. with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
  2610. e.findall('/tag')
  2611. def test_find_through_ElementTree(self):
  2612. e = ET.XML(SAMPLE_XML)
  2613. self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
  2614. self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
  2615. self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
  2616. ['tag'] * 2)
  2617. # this produces a warning
  2618. msg = ("This search is broken in 1.3 and earlier, and will be fixed "
  2619. "in a future version. If you rely on the current behaviour, "
  2620. "change it to '.+'")
  2621. with self.assertWarnsRegex(FutureWarning, msg):
  2622. it = ET.ElementTree(e).findall('//tag')
  2623. self.assertEqual(summarize_list(it), ['tag'] * 3)
  2624. class ElementIterTest(unittest.TestCase):
  2625. def _ilist(self, elem, tag=None):
  2626. return summarize_list(elem.iter(tag))
  2627. def test_basic(self):
  2628. doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
  2629. self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
  2630. self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
  2631. self.assertEqual(next(doc.iter()).tag, 'html')
  2632. self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
  2633. self.assertEqual(''.join(doc.find('body').itertext()),
  2634. 'this is a paragraph.')
  2635. self.assertEqual(next(doc.itertext()), 'this is a ')
  2636. # iterparse should return an iterator
  2637. sourcefile = serialize(doc, to_string=False)
  2638. self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
  2639. # With an explicit parser too (issue #9708)
  2640. sourcefile = serialize(doc, to_string=False)
  2641. parser = ET.XMLParser(target=ET.TreeBuilder())
  2642. self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
  2643. 'end')
  2644. tree = ET.ElementTree(None)
  2645. self.assertRaises(AttributeError, tree.iter)
  2646. # Issue #16913
  2647. doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
  2648. self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
  2649. def test_corners(self):
  2650. # single root, no subelements
  2651. a = ET.Element('a')
  2652. self.assertEqual(self._ilist(a), ['a'])
  2653. # one child
  2654. b = ET.SubElement(a, 'b')
  2655. self.assertEqual(self._ilist(a), ['a', 'b'])
  2656. # one child and one grandchild
  2657. c = ET.SubElement(b, 'c')
  2658. self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
  2659. # two children, only first with grandchild
  2660. d = ET.SubElement(a, 'd')
  2661. self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
  2662. # replace first child by second
  2663. a[0] = a[1]
  2664. del a[1]
  2665. self.assertEqual(self._ilist(a), ['a', 'd'])
  2666. def test_iter_by_tag(self):
  2667. doc = ET.XML('''
  2668. <document>
  2669. <house>
  2670. <room>bedroom1</room>
  2671. <room>bedroom2</room>
  2672. </house>
  2673. <shed>nothing here
  2674. </shed>
  2675. <house>
  2676. <room>bedroom8</room>
  2677. </house>
  2678. </document>''')
  2679. self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
  2680. self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
  2681. # test that iter also accepts 'tag' as a keyword arg
  2682. self.assertEqual(
  2683. summarize_list(doc.iter(tag='room')),
  2684. ['room'] * 3)
  2685. # make sure both tag=None and tag='*' return all tags
  2686. all_tags = ['document', 'house', 'room', 'room',
  2687. 'shed', 'house', 'room']
  2688. self.assertEqual(summarize_list(doc.iter()), all_tags)
  2689. self.assertEqual(self._ilist(doc), all_tags)
  2690. self.assertEqual(self._ilist(doc, '*'), all_tags)
  2691. def test_copy(self):
  2692. a = ET.Element('a')
  2693. it = a.iter()
  2694. with self.assertRaises(TypeError):
  2695. copy.copy(it)
  2696. def test_pickle(self):
  2697. a = ET.Element('a')
  2698. it = a.iter()
  2699. for proto in range(pickle.HIGHEST_PROTOCOL + 1):
  2700. with self.assertRaises((TypeError, pickle.PicklingError)):
  2701. pickle.dumps(it, proto)
  2702. class TreeBuilderTest(unittest.TestCase):
  2703. sample1 = ('<!DOCTYPE html PUBLIC'
  2704. ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
  2705. ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
  2706. '<html>text<div>subtext</div>tail</html>')
  2707. sample2 = '''<toplevel>sometext</toplevel>'''
  2708. def _check_sample1_element(self, e):
  2709. self.assertEqual(e.tag, 'html')
  2710. self.assertEqual(e.text, 'text')
  2711. self.assertEqual(e.tail, None)
  2712. self.assertEqual(e.attrib, {})
  2713. children = list(e)
  2714. self.assertEqual(len(children), 1)
  2715. child = children[0]
  2716. self.assertEqual(child.tag, 'div')
  2717. self.assertEqual(child.text, 'subtext')
  2718. self.assertEqual(child.tail, 'tail')
  2719. self.assertEqual(child.attrib, {})
  2720. def test_dummy_builder(self):
  2721. class BaseDummyBuilder:
  2722. def close(self):
  2723. return 42
  2724. class DummyBuilder(BaseDummyBuilder):
  2725. data = start = end = lambda *a: None
  2726. parser = ET.XMLParser(target=DummyBuilder())
  2727. parser.feed(self.sample1)
  2728. self.assertEqual(parser.close(), 42)
  2729. parser = ET.XMLParser(target=BaseDummyBuilder())
  2730. parser.feed(self.sample1)
  2731. self.assertEqual(parser.close(), 42)
  2732. parser = ET.XMLParser(target=object())
  2733. parser.feed(self.sample1)
  2734. self.assertIsNone(parser.close())
  2735. def test_treebuilder_comment(self):
  2736. b = ET.TreeBuilder()
  2737. self.assertEqual(b.comment('ctext').tag, ET.Comment)
  2738. self.assertEqual(b.comment('ctext').text, 'ctext')
  2739. b = ET.TreeBuilder(comment_factory=ET.Comment)
  2740. self.assertEqual(b.comment('ctext').tag, ET.Comment)
  2741. self.assertEqual(b.comment('ctext').text, 'ctext')
  2742. b = ET.TreeBuilder(comment_factory=len)
  2743. self.assertEqual(b.comment('ctext'), len('ctext'))
  2744. def test_treebuilder_pi(self):
  2745. b = ET.TreeBuilder()
  2746. self.assertEqual(b.pi('target', None).tag, ET.PI)
  2747. self.assertEqual(b.pi('target', None).text, 'target')
  2748. b = ET.TreeBuilder(pi_factory=ET.PI)
  2749. self.assertEqual(b.pi('target').tag, ET.PI)
  2750. self.assertEqual(b.pi('target').text, "target")
  2751. self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
  2752. self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ")
  2753. b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
  2754. self.assertEqual(b.pi('target'), (len('target'), None))
  2755. self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
  2756. def test_late_tail(self):
  2757. # Issue #37399: The tail of an ignored comment could overwrite the text before it.
  2758. class TreeBuilderSubclass(ET.TreeBuilder):
  2759. pass
  2760. xml = "<a>text<!-- comment -->tail</a>"
  2761. a = ET.fromstring(xml)
  2762. self.assertEqual(a.text, "texttail")
  2763. parser = ET.XMLParser(target=TreeBuilderSubclass())
  2764. parser.feed(xml)
  2765. a = parser.close()
  2766. self.assertEqual(a.text, "texttail")
  2767. xml = "<a>text<?pi data?>tail</a>"
  2768. a = ET.fromstring(xml)
  2769. self.assertEqual(a.text, "texttail")
  2770. xml = "<a>text<?pi data?>tail</a>"
  2771. parser = ET.XMLParser(target=TreeBuilderSubclass())
  2772. parser.feed(xml)
  2773. a = parser.close()
  2774. self.assertEqual(a.text, "texttail")
  2775. def test_late_tail_mix_pi_comments(self):
  2776. # Issue #37399: The tail of an ignored comment could overwrite the text before it.
  2777. # Test appending tails to comments/pis.
  2778. class TreeBuilderSubclass(ET.TreeBuilder):
  2779. pass
  2780. xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
  2781. parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
  2782. parser.feed(xml)
  2783. a = parser.close()
  2784. self.assertEqual(a[0].text, ' comment ')
  2785. self.assertEqual(a[0].tail, '\ntail')
  2786. self.assertEqual(a.text, "text ")
  2787. parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
  2788. parser.feed(xml)
  2789. a = parser.close()
  2790. self.assertEqual(a[0].text, ' comment ')
  2791. self.assertEqual(a[0].tail, '\ntail')
  2792. self.assertEqual(a.text, "text ")
  2793. xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
  2794. parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
  2795. parser.feed(xml)
  2796. a = parser.close()
  2797. self.assertEqual(a[0].text, 'pi data')
  2798. self.assertEqual(a[0].tail, 'tail')
  2799. self.assertEqual(a.text, "text\n")
  2800. parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
  2801. parser.feed(xml)
  2802. a = parser.close()
  2803. self.assertEqual(a[0].text, 'pi data')
  2804. self.assertEqual(a[0].tail, 'tail')
  2805. self.assertEqual(a.text, "text\n")
  2806. def test_treebuilder_elementfactory_none(self):
  2807. parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
  2808. parser.feed(self.sample1)
  2809. e = parser.close()
  2810. self._check_sample1_element(e)
  2811. def test_subclass(self):
  2812. class MyTreeBuilder(ET.TreeBuilder):
  2813. def foobar(self, x):
  2814. return x * 2
  2815. tb = MyTreeBuilder()
  2816. self.assertEqual(tb.foobar(10), 20)
  2817. parser = ET.XMLParser(target=tb)
  2818. parser.feed(self.sample1)
  2819. e = parser.close()
  2820. self._check_sample1_element(e)
  2821. def test_subclass_comment_pi(self):
  2822. class MyTreeBuilder(ET.TreeBuilder):
  2823. def foobar(self, x):
  2824. return x * 2
  2825. tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
  2826. self.assertEqual(tb.foobar(10), 20)
  2827. parser = ET.XMLParser(target=tb)
  2828. parser.feed(self.sample1)
  2829. parser.feed('<!-- a comment--><?and a pi?>')
  2830. e = parser.close()
  2831. self._check_sample1_element(e)
  2832. def test_element_factory(self):
  2833. lst = []
  2834. def myfactory(tag, attrib):
  2835. nonlocal lst
  2836. lst.append(tag)
  2837. return ET.Element(tag, attrib)
  2838. tb = ET.TreeBuilder(element_factory=myfactory)
  2839. parser = ET.XMLParser(target=tb)
  2840. parser.feed(self.sample2)
  2841. parser.close()
  2842. self.assertEqual(lst, ['toplevel'])
  2843. def _check_element_factory_class(self, cls):
  2844. tb = ET.TreeBuilder(element_factory=cls)
  2845. parser = ET.XMLParser(target=tb)
  2846. parser.feed(self.sample1)
  2847. e = parser.close()
  2848. self.assertIsInstance(e, cls)
  2849. self._check_sample1_element(e)
  2850. def test_element_factory_subclass(self):
  2851. class MyElement(ET.Element):
  2852. pass
  2853. self._check_element_factory_class(MyElement)
  2854. def test_element_factory_pure_python_subclass(self):
  2855. # Mimic SimpleTAL's behaviour (issue #16089): both versions of
  2856. # TreeBuilder should be able to cope with a subclass of the
  2857. # pure Python Element class.
  2858. base = ET._Element_Py
  2859. # Not from a C extension
  2860. self.assertEqual(base.__module__, 'xml.etree.ElementTree')
  2861. # Force some multiple inheritance with a C class to make things
  2862. # more interesting.
  2863. class MyElement(base, ValueError):
  2864. pass
  2865. self._check_element_factory_class(MyElement)
  2866. def test_doctype(self):
  2867. class DoctypeParser:
  2868. _doctype = None
  2869. def doctype(self, name, pubid, system):
  2870. self._doctype = (name, pubid, system)
  2871. def close(self):
  2872. return self._doctype
  2873. parser = ET.XMLParser(target=DoctypeParser())
  2874. parser.feed(self.sample1)
  2875. self.assertEqual(parser.close(),
  2876. ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
  2877. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
  2878. def test_builder_lookup_errors(self):
  2879. class RaisingBuilder:
  2880. def __init__(self, raise_in=None, what=ValueError):
  2881. self.raise_in = raise_in
  2882. self.what = what
  2883. def __getattr__(self, name):
  2884. if name == self.raise_in:
  2885. raise self.what(self.raise_in)
  2886. def handle(*args):
  2887. pass
  2888. return handle
  2889. ET.XMLParser(target=RaisingBuilder())
  2890. # cET also checks for 'close' and 'doctype', PyET does it only at need
  2891. for event in ('start', 'data', 'end', 'comment', 'pi'):
  2892. with self.assertRaisesRegex(ValueError, event):
  2893. ET.XMLParser(target=RaisingBuilder(event))
  2894. ET.XMLParser(target=RaisingBuilder(what=AttributeError))
  2895. for event in ('start', 'data', 'end', 'comment', 'pi'):
  2896. parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
  2897. parser.feed(self.sample1)
  2898. self.assertIsNone(parser.close())
  2899. class XMLParserTest(unittest.TestCase):
  2900. sample1 = b'<file><line>22</line></file>'
  2901. sample2 = (b'<!DOCTYPE html PUBLIC'
  2902. b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
  2903. b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
  2904. b'<html>text</html>')
  2905. sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
  2906. '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
  2907. def _check_sample_element(self, e):
  2908. self.assertEqual(e.tag, 'file')
  2909. self.assertEqual(e[0].tag, 'line')
  2910. self.assertEqual(e[0].text, '22')
  2911. def test_constructor_args(self):
  2912. parser2 = ET.XMLParser(encoding='utf-8',
  2913. target=ET.TreeBuilder())
  2914. parser2.feed(self.sample1)
  2915. self._check_sample_element(parser2.close())
  2916. def test_subclass(self):
  2917. class MyParser(ET.XMLParser):
  2918. pass
  2919. parser = MyParser()
  2920. parser.feed(self.sample1)
  2921. self._check_sample_element(parser.close())
  2922. def test_doctype_warning(self):
  2923. with warnings.catch_warnings():
  2924. warnings.simplefilter('error', DeprecationWarning)
  2925. parser = ET.XMLParser()
  2926. parser.feed(self.sample2)
  2927. parser.close()
  2928. def test_subclass_doctype(self):
  2929. _doctype = None
  2930. class MyParserWithDoctype(ET.XMLParser):
  2931. def doctype(self, *args, **kwargs):
  2932. nonlocal _doctype
  2933. _doctype = (args, kwargs)
  2934. parser = MyParserWithDoctype()
  2935. with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
  2936. parser.feed(self.sample2)
  2937. parser.close()
  2938. self.assertIsNone(_doctype)
  2939. _doctype = _doctype2 = None
  2940. with warnings.catch_warnings():
  2941. warnings.simplefilter('error', DeprecationWarning)
  2942. warnings.simplefilter('error', RuntimeWarning)
  2943. class DoctypeParser:
  2944. def doctype(self, name, pubid, system):
  2945. nonlocal _doctype2
  2946. _doctype2 = (name, pubid, system)
  2947. parser = MyParserWithDoctype(target=DoctypeParser())
  2948. parser.feed(self.sample2)
  2949. parser.close()
  2950. self.assertIsNone(_doctype)
  2951. self.assertEqual(_doctype2,
  2952. ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
  2953. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
  2954. def test_inherited_doctype(self):
  2955. '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
  2956. with warnings.catch_warnings():
  2957. warnings.simplefilter('error', DeprecationWarning)
  2958. warnings.simplefilter('error', RuntimeWarning)
  2959. class MyParserWithoutDoctype(ET.XMLParser):
  2960. pass
  2961. parser = MyParserWithoutDoctype()
  2962. parser.feed(self.sample2)
  2963. parser.close()
  2964. def test_parse_string(self):
  2965. parser = ET.XMLParser(target=ET.TreeBuilder())
  2966. parser.feed(self.sample3)
  2967. e = parser.close()
  2968. self.assertEqual(e.tag, 'money')
  2969. self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
  2970. self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
  2971. class NamespaceParseTest(unittest.TestCase):
  2972. def test_find_with_namespace(self):
  2973. nsmap = {'h': 'hello', 'f': 'foo'}
  2974. doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
  2975. self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
  2976. self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
  2977. self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
  2978. class ElementSlicingTest(unittest.TestCase):
  2979. def _elem_tags(self, elemlist):
  2980. return [e.tag for e in elemlist]
  2981. def _subelem_tags(self, elem):
  2982. return self._elem_tags(list(elem))
  2983. def _make_elem_with_children(self, numchildren):
  2984. """Create an Element with a tag 'a', with the given amount of children
  2985. named 'a0', 'a1' ... and so on.
  2986. """
  2987. e = ET.Element('a')
  2988. for i in range(numchildren):
  2989. ET.SubElement(e, 'a%s' % i)
  2990. return e
  2991. def test_getslice_single_index(self):
  2992. e = self._make_elem_with_children(10)
  2993. self.assertEqual(e[1].tag, 'a1')
  2994. self.assertEqual(e[-2].tag, 'a8')
  2995. self.assertRaises(IndexError, lambda: e[12])
  2996. self.assertRaises(IndexError, lambda: e[-12])
  2997. def test_getslice_range(self):
  2998. e = self._make_elem_with_children(6)
  2999. self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
  3000. self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
  3001. self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
  3002. self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
  3003. self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
  3004. self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
  3005. def test_getslice_steps(self):
  3006. e = self._make_elem_with_children(10)
  3007. self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
  3008. self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
  3009. self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
  3010. self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
  3011. self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
  3012. self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
  3013. def test_getslice_negative_steps(self):
  3014. e = self._make_elem_with_children(4)
  3015. self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
  3016. self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
  3017. self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
  3018. self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
  3019. self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
  3020. def test_delslice(self):
  3021. e = self._make_elem_with_children(4)
  3022. del e[0:2]
  3023. self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
  3024. e = self._make_elem_with_children(4)
  3025. del e[0:]
  3026. self.assertEqual(self._subelem_tags(e), [])
  3027. e = self._make_elem_with_children(4)
  3028. del e[::-1]
  3029. self.assertEqual(self._subelem_tags(e), [])
  3030. e = self._make_elem_with_children(4)
  3031. del e[::-2]
  3032. self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
  3033. e = self._make_elem_with_children(4)
  3034. del e[1::2]
  3035. self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
  3036. e = self._make_elem_with_children(2)
  3037. del e[::2]
  3038. self.assertEqual(self._subelem_tags(e), ['a1'])
  3039. def test_setslice_single_index(self):
  3040. e = self._make_elem_with_children(4)
  3041. e[1] = ET.Element('b')
  3042. self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
  3043. e[-2] = ET.Element('c')
  3044. self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
  3045. with self.assertRaises(IndexError):
  3046. e[5] = ET.Element('d')
  3047. with self.assertRaises(IndexError):
  3048. e[-5] = ET.Element('d')
  3049. self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
  3050. def test_setslice_range(self):
  3051. e = self._make_elem_with_children(4)
  3052. e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
  3053. self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
  3054. e = self._make_elem_with_children(4)
  3055. e[1:3] = [ET.Element('b')]
  3056. self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
  3057. e = self._make_elem_with_children(4)
  3058. e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
  3059. self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
  3060. def test_setslice_steps(self):
  3061. e = self._make_elem_with_children(6)
  3062. e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
  3063. self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
  3064. e = self._make_elem_with_children(6)
  3065. with self.assertRaises(ValueError):
  3066. e[1:5:2] = [ET.Element('b')]
  3067. with self.assertRaises(ValueError):
  3068. e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
  3069. with self.assertRaises(ValueError):
  3070. e[1:5:2] = []
  3071. self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
  3072. e = self._make_elem_with_children(4)
  3073. e[1::sys.maxsize] = [ET.Element('b')]
  3074. self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
  3075. e[1::sys.maxsize<<64] = [ET.Element('c')]
  3076. self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
  3077. def test_setslice_negative_steps(self):
  3078. e = self._make_elem_with_children(4)
  3079. e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
  3080. self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
  3081. e = self._make_elem_with_children(4)
  3082. with self.assertRaises(ValueError):
  3083. e[2:0:-1] = [ET.Element('b')]
  3084. with self.assertRaises(ValueError):
  3085. e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
  3086. with self.assertRaises(ValueError):
  3087. e[2:0:-1] = []
  3088. self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
  3089. e = self._make_elem_with_children(4)
  3090. e[1::-sys.maxsize] = [ET.Element('b')]
  3091. self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
  3092. e[1::-sys.maxsize-1] = [ET.Element('c')]
  3093. self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
  3094. e[1::-sys.maxsize<<64] = [ET.Element('d')]
  3095. self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
  3096. class IOTest(unittest.TestCase):
  3097. def test_encoding(self):
  3098. # Test encoding issues.
  3099. elem = ET.Element("tag")
  3100. elem.text = "abc"
  3101. self.assertEqual(serialize(elem), '<tag>abc</tag>')
  3102. for enc in ("utf-8", "us-ascii"):
  3103. with self.subTest(enc):
  3104. self.assertEqual(serialize(elem, encoding=enc),
  3105. b'<tag>abc</tag>')
  3106. self.assertEqual(serialize(elem, encoding=enc.upper()),
  3107. b'<tag>abc</tag>')
  3108. for enc in ("iso-8859-1", "utf-16", "utf-32"):
  3109. with self.subTest(enc):
  3110. self.assertEqual(serialize(elem, encoding=enc),
  3111. ("<?xml version='1.0' encoding='%s'?>\n"
  3112. "<tag>abc</tag>" % enc).encode(enc))
  3113. upper = enc.upper()
  3114. self.assertEqual(serialize(elem, encoding=upper),
  3115. ("<?xml version='1.0' encoding='%s'?>\n"
  3116. "<tag>abc</tag>" % upper).encode(enc))
  3117. elem = ET.Element("tag")
  3118. elem.text = "<&\"\'>"
  3119. self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
  3120. self.assertEqual(serialize(elem, encoding="utf-8"),
  3121. b'<tag>&lt;&amp;"\'&gt;</tag>')
  3122. self.assertEqual(serialize(elem, encoding="us-ascii"),
  3123. b'<tag>&lt;&amp;"\'&gt;</tag>')
  3124. for enc in ("iso-8859-1", "utf-16", "utf-32"):
  3125. self.assertEqual(serialize(elem, encoding=enc),
  3126. ("<?xml version='1.0' encoding='%s'?>\n"
  3127. "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
  3128. elem = ET.Element("tag")
  3129. elem.attrib["key"] = "<&\"\'>"
  3130. self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
  3131. self.assertEqual(serialize(elem, encoding="utf-8"),
  3132. b'<tag key="&lt;&amp;&quot;\'&gt;" />')
  3133. self.assertEqual(serialize(elem, encoding="us-ascii"),
  3134. b'<tag key="&lt;&amp;&quot;\'&gt;" />')
  3135. for enc in ("iso-8859-1", "utf-16", "utf-32"):
  3136. self.assertEqual(serialize(elem, encoding=enc),
  3137. ("<?xml version='1.0' encoding='%s'?>\n"
  3138. "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
  3139. elem = ET.Element("tag")
  3140. elem.text = '\xe5\xf6\xf6<>'
  3141. self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
  3142. self.assertEqual(serialize(elem, encoding="utf-8"),
  3143. b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
  3144. self.assertEqual(serialize(elem, encoding="us-ascii"),
  3145. b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
  3146. for enc in ("iso-8859-1", "utf-16", "utf-32"):
  3147. self.assertEqual(serialize(elem, encoding=enc),
  3148. ("<?xml version='1.0' encoding='%s'?>\n"
  3149. "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
  3150. elem = ET.Element("tag")
  3151. elem.attrib["key"] = '\xe5\xf6\xf6<>'
  3152. self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
  3153. self.assertEqual(serialize(elem, encoding="utf-8"),
  3154. b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
  3155. self.assertEqual(serialize(elem, encoding="us-ascii"),
  3156. b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
  3157. for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
  3158. self.assertEqual(serialize(elem, encoding=enc),
  3159. ("<?xml version='1.0' encoding='%s'?>\n"
  3160. "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
  3161. def test_write_to_filename(self):
  3162. self.addCleanup(os_helper.unlink, TESTFN)
  3163. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3164. tree.write(TESTFN)
  3165. with open(TESTFN, 'rb') as f:
  3166. self.assertEqual(f.read(), b'''<site>&#248;</site>''')
  3167. def test_write_to_filename_with_encoding(self):
  3168. self.addCleanup(os_helper.unlink, TESTFN)
  3169. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3170. tree.write(TESTFN, encoding='utf-8')
  3171. with open(TESTFN, 'rb') as f:
  3172. self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
  3173. tree.write(TESTFN, encoding='ISO-8859-1')
  3174. with open(TESTFN, 'rb') as f:
  3175. self.assertEqual(f.read(), convlinesep(
  3176. b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
  3177. b'''<site>\xf8</site>'''))
  3178. def test_write_to_filename_as_unicode(self):
  3179. self.addCleanup(os_helper.unlink, TESTFN)
  3180. with open(TESTFN, 'w') as f:
  3181. encoding = f.encoding
  3182. os_helper.unlink(TESTFN)
  3183. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3184. tree.write(TESTFN, encoding='unicode')
  3185. with open(TESTFN, 'rb') as f:
  3186. self.assertEqual(f.read(), b"<site>\xc3\xb8</site>")
  3187. def test_write_to_text_file(self):
  3188. self.addCleanup(os_helper.unlink, TESTFN)
  3189. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3190. with open(TESTFN, 'w', encoding='utf-8') as f:
  3191. tree.write(f, encoding='unicode')
  3192. self.assertFalse(f.closed)
  3193. with open(TESTFN, 'rb') as f:
  3194. self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
  3195. with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f:
  3196. tree.write(f, encoding='unicode')
  3197. self.assertFalse(f.closed)
  3198. with open(TESTFN, 'rb') as f:
  3199. self.assertEqual(f.read(), b'''<site>&#248;</site>''')
  3200. with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
  3201. tree.write(f, encoding='unicode')
  3202. self.assertFalse(f.closed)
  3203. with open(TESTFN, 'rb') as f:
  3204. self.assertEqual(f.read(), b'''<site>\xf8</site>''')
  3205. def test_write_to_binary_file(self):
  3206. self.addCleanup(os_helper.unlink, TESTFN)
  3207. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3208. with open(TESTFN, 'wb') as f:
  3209. tree.write(f)
  3210. self.assertFalse(f.closed)
  3211. with open(TESTFN, 'rb') as f:
  3212. self.assertEqual(f.read(), b'''<site>&#248;</site>''')
  3213. def test_write_to_binary_file_with_encoding(self):
  3214. self.addCleanup(os_helper.unlink, TESTFN)
  3215. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3216. with open(TESTFN, 'wb') as f:
  3217. tree.write(f, encoding='utf-8')
  3218. self.assertFalse(f.closed)
  3219. with open(TESTFN, 'rb') as f:
  3220. self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
  3221. with open(TESTFN, 'wb') as f:
  3222. tree.write(f, encoding='ISO-8859-1')
  3223. self.assertFalse(f.closed)
  3224. with open(TESTFN, 'rb') as f:
  3225. self.assertEqual(f.read(),
  3226. b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
  3227. b'''<site>\xf8</site>''')
  3228. def test_write_to_binary_file_with_bom(self):
  3229. self.addCleanup(os_helper.unlink, TESTFN)
  3230. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3231. # test BOM writing to buffered file
  3232. with open(TESTFN, 'wb') as f:
  3233. tree.write(f, encoding='utf-16')
  3234. self.assertFalse(f.closed)
  3235. with open(TESTFN, 'rb') as f:
  3236. self.assertEqual(f.read(),
  3237. '''<?xml version='1.0' encoding='utf-16'?>\n'''
  3238. '''<site>\xf8</site>'''.encode("utf-16"))
  3239. # test BOM writing to non-buffered file
  3240. with open(TESTFN, 'wb', buffering=0) as f:
  3241. tree.write(f, encoding='utf-16')
  3242. self.assertFalse(f.closed)
  3243. with open(TESTFN, 'rb') as f:
  3244. self.assertEqual(f.read(),
  3245. '''<?xml version='1.0' encoding='utf-16'?>\n'''
  3246. '''<site>\xf8</site>'''.encode("utf-16"))
  3247. def test_read_from_stringio(self):
  3248. tree = ET.ElementTree()
  3249. stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
  3250. tree.parse(stream)
  3251. self.assertEqual(tree.getroot().tag, 'site')
  3252. def test_write_to_stringio(self):
  3253. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3254. stream = io.StringIO()
  3255. tree.write(stream, encoding='unicode')
  3256. self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
  3257. def test_read_from_bytesio(self):
  3258. tree = ET.ElementTree()
  3259. raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
  3260. tree.parse(raw)
  3261. self.assertEqual(tree.getroot().tag, 'site')
  3262. def test_write_to_bytesio(self):
  3263. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3264. raw = io.BytesIO()
  3265. tree.write(raw)
  3266. self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
  3267. class dummy:
  3268. pass
  3269. def test_read_from_user_text_reader(self):
  3270. stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
  3271. reader = self.dummy()
  3272. reader.read = stream.read
  3273. tree = ET.ElementTree()
  3274. tree.parse(reader)
  3275. self.assertEqual(tree.getroot().tag, 'site')
  3276. def test_write_to_user_text_writer(self):
  3277. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3278. stream = io.StringIO()
  3279. writer = self.dummy()
  3280. writer.write = stream.write
  3281. tree.write(writer, encoding='unicode')
  3282. self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
  3283. def test_read_from_user_binary_reader(self):
  3284. raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
  3285. reader = self.dummy()
  3286. reader.read = raw.read
  3287. tree = ET.ElementTree()
  3288. tree.parse(reader)
  3289. self.assertEqual(tree.getroot().tag, 'site')
  3290. tree = ET.ElementTree()
  3291. def test_write_to_user_binary_writer(self):
  3292. tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
  3293. raw = io.BytesIO()
  3294. writer = self.dummy()
  3295. writer.write = raw.write
  3296. tree.write(writer)
  3297. self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
  3298. def test_write_to_user_binary_writer_with_bom(self):
  3299. tree = ET.ElementTree(ET.XML('''<site />'''))
  3300. raw = io.BytesIO()
  3301. writer = self.dummy()
  3302. writer.write = raw.write
  3303. writer.seekable = lambda: True
  3304. writer.tell = raw.tell
  3305. tree.write(writer, encoding="utf-16")
  3306. self.assertEqual(raw.getvalue(),
  3307. '''<?xml version='1.0' encoding='utf-16'?>\n'''
  3308. '''<site />'''.encode("utf-16"))
  3309. def test_tostringlist_invariant(self):
  3310. root = ET.fromstring('<tag>foo</tag>')
  3311. self.assertEqual(
  3312. ET.tostring(root, 'unicode'),
  3313. ''.join(ET.tostringlist(root, 'unicode')))
  3314. self.assertEqual(
  3315. ET.tostring(root, 'utf-16'),
  3316. b''.join(ET.tostringlist(root, 'utf-16')))
  3317. def test_short_empty_elements(self):
  3318. root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
  3319. self.assertEqual(
  3320. ET.tostring(root, 'unicode'),
  3321. '<tag>a<x />b<y />c</tag>')
  3322. self.assertEqual(
  3323. ET.tostring(root, 'unicode', short_empty_elements=True),
  3324. '<tag>a<x />b<y />c</tag>')
  3325. self.assertEqual(
  3326. ET.tostring(root, 'unicode', short_empty_elements=False),
  3327. '<tag>a<x></x>b<y></y>c</tag>')
  3328. class ParseErrorTest(unittest.TestCase):
  3329. def test_subclass(self):
  3330. self.assertIsInstance(ET.ParseError(), SyntaxError)
  3331. def _get_error(self, s):
  3332. try:
  3333. ET.fromstring(s)
  3334. except ET.ParseError as e:
  3335. return e
  3336. def test_error_position(self):
  3337. self.assertEqual(self._get_error('foo').position, (1, 0))
  3338. self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
  3339. self.assertEqual(self._get_error('foobar<').position, (1, 6))
  3340. def test_error_code(self):
  3341. import xml.parsers.expat.errors as ERRORS
  3342. self.assertEqual(self._get_error('foo').code,
  3343. ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
  3344. class KeywordArgsTest(unittest.TestCase):
  3345. # Test various issues with keyword arguments passed to ET.Element
  3346. # constructor and methods
  3347. def test_issue14818(self):
  3348. x = ET.XML("<a>foo</a>")
  3349. self.assertEqual(x.find('a', None),
  3350. x.find(path='a', namespaces=None))
  3351. self.assertEqual(x.findtext('a', None, None),
  3352. x.findtext(path='a', default=None, namespaces=None))
  3353. self.assertEqual(x.findall('a', None),
  3354. x.findall(path='a', namespaces=None))
  3355. self.assertEqual(list(x.iterfind('a', None)),
  3356. list(x.iterfind(path='a', namespaces=None)))
  3357. self.assertEqual(ET.Element('a').attrib, {})
  3358. elements = [
  3359. ET.Element('a', dict(href="#", id="foo")),
  3360. ET.Element('a', attrib=dict(href="#", id="foo")),
  3361. ET.Element('a', dict(href="#"), id="foo"),
  3362. ET.Element('a', href="#", id="foo"),
  3363. ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
  3364. ]
  3365. for e in elements:
  3366. self.assertEqual(e.tag, 'a')
  3367. self.assertEqual(e.attrib, dict(href="#", id="foo"))
  3368. e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
  3369. self.assertEqual(e2.attrib['key1'], 'value1')
  3370. with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
  3371. ET.Element('a', "I'm not a dict")
  3372. with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
  3373. ET.Element('a', attrib="I'm not a dict")
  3374. # --------------------------------------------------------------------
  3375. class NoAcceleratorTest(unittest.TestCase):
  3376. def setUp(self):
  3377. if not pyET:
  3378. raise unittest.SkipTest('only for the Python version')
  3379. # Test that the C accelerator was not imported for pyET
  3380. def test_correct_import_pyET(self):
  3381. # The type of methods defined in Python code is types.FunctionType,
  3382. # while the type of methods defined inside _elementtree is
  3383. # <class 'wrapper_descriptor'>
  3384. self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
  3385. self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
  3386. # --------------------------------------------------------------------
  3387. def c14n_roundtrip(xml, **options):
  3388. return pyET.canonicalize(xml, **options)
  3389. class C14NTest(unittest.TestCase):
  3390. maxDiff = None
  3391. #
  3392. # simple roundtrip tests (from c14n.py)
  3393. def test_simple_roundtrip(self):
  3394. # Basics
  3395. self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
  3396. self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
  3397. '<doc xmlns="uri"></doc>')
  3398. self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
  3399. '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
  3400. self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
  3401. '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
  3402. self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
  3403. '<elem></elem>')
  3404. # C14N spec
  3405. self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
  3406. '<doc>Hello, world!</doc>')
  3407. self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
  3408. '<value>2</value>')
  3409. self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
  3410. '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
  3411. self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
  3412. '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
  3413. self.assertEqual(c14n_roundtrip("<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
  3414. '<norm attr=" \' &#xD;&#xA;&#x9; \' "></norm>')
  3415. self.assertEqual(c14n_roundtrip("<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>"),
  3416. '<normNames attr=" A &#xD;&#xA;&#x9; B "></normNames>')
  3417. self.assertEqual(c14n_roundtrip("<normId id=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
  3418. '<normId id=" \' &#xD;&#xA;&#x9; \' "></normId>')
  3419. # fragments from PJ's tests
  3420. #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
  3421. #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
  3422. # Namespace issues
  3423. xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>'
  3424. self.assertEqual(c14n_roundtrip(xml), xml)
  3425. xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>'
  3426. self.assertEqual(c14n_roundtrip(xml), xml)
  3427. xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>'
  3428. self.assertEqual(c14n_roundtrip(xml), xml)
  3429. def test_c14n_exclusion(self):
  3430. xml = textwrap.dedent("""\
  3431. <root xmlns:x="http://example.com/x">
  3432. <a x:attr="attrx">
  3433. <b>abtext</b>
  3434. </a>
  3435. <b>btext</b>
  3436. <c>
  3437. <x:d>dtext</x:d>
  3438. </c>
  3439. </root>
  3440. """)
  3441. self.assertEqual(
  3442. c14n_roundtrip(xml, strip_text=True),
  3443. '<root>'
  3444. '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
  3445. '<b>btext</b>'
  3446. '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
  3447. '</root>')
  3448. self.assertEqual(
  3449. c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
  3450. '<root>'
  3451. '<a><b>abtext</b></a>'
  3452. '<b>btext</b>'
  3453. '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
  3454. '</root>')
  3455. self.assertEqual(
  3456. c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
  3457. '<root>'
  3458. '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
  3459. '<b>btext</b>'
  3460. '<c></c>'
  3461. '</root>')
  3462. self.assertEqual(
  3463. c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
  3464. exclude_tags=['{http://example.com/x}d']),
  3465. '<root>'
  3466. '<a><b>abtext</b></a>'
  3467. '<b>btext</b>'
  3468. '<c></c>'
  3469. '</root>')
  3470. self.assertEqual(
  3471. c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
  3472. '<root>'
  3473. '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
  3474. '</root>')
  3475. self.assertEqual(
  3476. c14n_roundtrip(xml, exclude_tags=['a', 'b']),
  3477. '<root>\n'
  3478. ' \n'
  3479. ' \n'
  3480. ' <c>\n'
  3481. ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
  3482. ' </c>\n'
  3483. '</root>')
  3484. self.assertEqual(
  3485. c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
  3486. '<root>'
  3487. '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
  3488. '<c></c>'
  3489. '</root>')
  3490. self.assertEqual(
  3491. c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
  3492. '<root>\n'
  3493. ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
  3494. ' \n'
  3495. ' </a>\n'
  3496. ' \n'
  3497. ' <c>\n'
  3498. ' \n'
  3499. ' </c>\n'
  3500. '</root>')
  3501. #
  3502. # basic method=c14n tests from the c14n 2.0 specification. uses
  3503. # test files under xmltestdata/c14n-20.
  3504. # note that this uses generated C14N versions of the standard ET.write
  3505. # output, not roundtripped C14N (see above).
  3506. def test_xml_c14n2(self):
  3507. datadir = findfile("c14n-20", subdir="xmltestdata")
  3508. full_path = partial(os.path.join, datadir)
  3509. files = [filename[:-4] for filename in sorted(os.listdir(datadir))
  3510. if filename.endswith('.xml')]
  3511. input_files = [
  3512. filename for filename in files
  3513. if filename.startswith('in')
  3514. ]
  3515. configs = {
  3516. filename: {
  3517. # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
  3518. option.tag.split('}')[-1]: ((option.text or '').strip(), option)
  3519. for option in ET.parse(full_path(filename) + ".xml").getroot()
  3520. }
  3521. for filename in files
  3522. if filename.startswith('c14n')
  3523. }
  3524. tests = {
  3525. input_file: [
  3526. (filename, configs[filename.rsplit('_', 1)[-1]])
  3527. for filename in files
  3528. if filename.startswith(f'out_{input_file}_')
  3529. and filename.rsplit('_', 1)[-1] in configs
  3530. ]
  3531. for input_file in input_files
  3532. }
  3533. # Make sure we found all test cases.
  3534. self.assertEqual(30, len([
  3535. output_file for output_files in tests.values()
  3536. for output_file in output_files]))
  3537. def get_option(config, option_name, default=None):
  3538. return config.get(option_name, (default, ()))[0]
  3539. for input_file, output_files in tests.items():
  3540. for output_file, config in output_files:
  3541. keep_comments = get_option(
  3542. config, 'IgnoreComments') == 'true' # no, it's right :)
  3543. strip_text = get_option(
  3544. config, 'TrimTextNodes') == 'true'
  3545. rewrite_prefixes = get_option(
  3546. config, 'PrefixRewrite') == 'sequential'
  3547. if 'QNameAware' in config:
  3548. qattrs = [
  3549. f"{{{el.get('NS')}}}{el.get('Name')}"
  3550. for el in config['QNameAware'][1].findall(
  3551. '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
  3552. ]
  3553. qtags = [
  3554. f"{{{el.get('NS')}}}{el.get('Name')}"
  3555. for el in config['QNameAware'][1].findall(
  3556. '{http://www.w3.org/2010/xml-c14n2}Element')
  3557. ]
  3558. else:
  3559. qtags = qattrs = None
  3560. # Build subtest description from config.
  3561. config_descr = ','.join(
  3562. f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
  3563. for name, (value, children) in sorted(config.items())
  3564. )
  3565. with self.subTest(f"{output_file}({config_descr})"):
  3566. if input_file == 'inNsRedecl' and not rewrite_prefixes:
  3567. self.skipTest(
  3568. f"Redeclared namespace handling is not supported in {output_file}")
  3569. if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
  3570. self.skipTest(
  3571. f"Redeclared namespace handling is not supported in {output_file}")
  3572. if 'QNameAware' in config and config['QNameAware'][1].find(
  3573. '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
  3574. self.skipTest(
  3575. f"QName rewriting in XPath text is not supported in {output_file}")
  3576. f = full_path(input_file + ".xml")
  3577. if input_file == 'inC14N5':
  3578. # Hack: avoid setting up external entity resolution in the parser.
  3579. with open(full_path('world.txt'), 'rb') as entity_file:
  3580. with open(f, 'rb') as f:
  3581. f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
  3582. text = ET.canonicalize(
  3583. from_file=f,
  3584. with_comments=keep_comments,
  3585. strip_text=strip_text,
  3586. rewrite_prefixes=rewrite_prefixes,
  3587. qname_aware_tags=qtags, qname_aware_attrs=qattrs)
  3588. with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
  3589. expected = f.read()
  3590. if input_file == 'inC14N3':
  3591. # FIXME: cET resolves default attributes but ET does not!
  3592. expected = expected.replace(' attr="default"', '')
  3593. text = text.replace(' attr="default"', '')
  3594. self.assertEqual(expected, text)
  3595. # --------------------------------------------------------------------
  3596. def test_main(module=None):
  3597. # When invoked without a module, runs the Python ET tests by loading pyET.
  3598. # Otherwise, uses the given module as the ET.
  3599. global pyET
  3600. pyET = import_fresh_module('xml.etree.ElementTree',
  3601. blocked=['_elementtree'])
  3602. if module is None:
  3603. module = pyET
  3604. global ET
  3605. ET = module
  3606. test_classes = [
  3607. ModuleTest,
  3608. ElementSlicingTest,
  3609. BasicElementTest,
  3610. BadElementTest,
  3611. BadElementPathTest,
  3612. ElementTreeTest,
  3613. IOTest,
  3614. ParseErrorTest,
  3615. XIncludeTest,
  3616. ElementTreeTypeTest,
  3617. ElementFindTest,
  3618. ElementIterTest,
  3619. TreeBuilderTest,
  3620. XMLParserTest,
  3621. XMLPullParserTest,
  3622. BugsTest,
  3623. KeywordArgsTest,
  3624. C14NTest,
  3625. ]
  3626. # These tests will only run for the pure-Python version that doesn't import
  3627. # _elementtree. We can't use skipUnless here, because pyET is filled in only
  3628. # after the module is loaded.
  3629. if pyET is not ET:
  3630. test_classes.extend([
  3631. NoAcceleratorTest,
  3632. ])
  3633. # Provide default namespace mapping and path cache.
  3634. from xml.etree import ElementPath
  3635. nsmap = ET.register_namespace._namespace_map
  3636. # Copy the default namespace mapping
  3637. nsmap_copy = nsmap.copy()
  3638. # Copy the path cache (should be empty)
  3639. path_cache = ElementPath._cache
  3640. ElementPath._cache = path_cache.copy()
  3641. # Align the Comment/PI factories.
  3642. if hasattr(ET, '_set_factories'):
  3643. old_factories = ET._set_factories(ET.Comment, ET.PI)
  3644. else:
  3645. old_factories = None
  3646. try:
  3647. support.run_unittest(*test_classes)
  3648. finally:
  3649. from xml.etree import ElementPath
  3650. # Restore mapping and path cache
  3651. nsmap.clear()
  3652. nsmap.update(nsmap_copy)
  3653. ElementPath._cache = path_cache
  3654. if old_factories is not None:
  3655. ET._set_factories(*old_factories)
  3656. # don't interfere with subsequent tests
  3657. ET = pyET = None
  3658. if __name__ == '__main__':
  3659. test_main()