test_tokenize.py 99 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651
  1. from test import support
  2. from test.support import os_helper
  3. from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
  4. STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
  5. open as tokenize_open, Untokenizer, generate_tokens,
  6. NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT)
  7. from io import BytesIO, StringIO
  8. import unittest
  9. from textwrap import dedent
  10. from unittest import TestCase, mock
  11. from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
  12. INVALID_UNDERSCORE_LITERALS)
  13. from test.support import os_helper
  14. from test.support.script_helper import run_test_script, make_script
  15. import os
  16. import token
  17. # Converts a source string into a list of textual representation
  18. # of the tokens such as:
  19. # ` NAME 'if' (1, 0) (1, 2)`
  20. # to make writing tests easier.
  21. def stringify_tokens_from_source(token_generator, source_string):
  22. result = []
  23. num_lines = len(source_string.splitlines())
  24. missing_trailing_nl = source_string[-1] not in '\r\n'
  25. for type, token, start, end, line in token_generator:
  26. if type == ENDMARKER:
  27. break
  28. # Ignore the new line on the last line if the input lacks one
  29. if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
  30. continue
  31. type = tok_name[type]
  32. result.append(f" {type:10} {token!r:13} {start} {end}")
  33. return result
  34. class TokenizeTest(TestCase):
  35. # Tests for the tokenize module.
  36. # The tests can be really simple. Given a small fragment of source
  37. # code, print out a table with tokens. The ENDMARKER, ENCODING and
  38. # final NEWLINE are omitted for brevity.
  39. def check_tokenize(self, s, expected):
  40. # Format the tokens in s in a table format.
  41. # The ENDMARKER and final NEWLINE are omitted.
  42. f = BytesIO(s.encode('utf-8'))
  43. result = stringify_tokens_from_source(tokenize(f.readline), s)
  44. self.assertEqual(result,
  45. [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
  46. expected.rstrip().splitlines())
  47. def test_implicit_newline(self):
  48. # Make sure that the tokenizer puts in an implicit NEWLINE
  49. # when the input lacks a trailing new line.
  50. f = BytesIO("x".encode('utf-8'))
  51. tokens = list(tokenize(f.readline))
  52. self.assertEqual(tokens[-2].type, NEWLINE)
  53. self.assertEqual(tokens[-1].type, ENDMARKER)
  54. def test_basic(self):
  55. self.check_tokenize("1 + 1", """\
  56. NUMBER '1' (1, 0) (1, 1)
  57. OP '+' (1, 2) (1, 3)
  58. NUMBER '1' (1, 4) (1, 5)
  59. """)
  60. self.check_tokenize("if False:\n"
  61. " # NL\n"
  62. " \n"
  63. " True = False # NEWLINE\n", """\
  64. NAME 'if' (1, 0) (1, 2)
  65. NAME 'False' (1, 3) (1, 8)
  66. OP ':' (1, 8) (1, 9)
  67. NEWLINE '\\n' (1, 9) (1, 10)
  68. COMMENT '# NL' (2, 4) (2, 8)
  69. NL '\\n' (2, 8) (2, 9)
  70. NL '\\n' (3, 4) (3, 5)
  71. INDENT ' ' (4, 0) (4, 4)
  72. NAME 'True' (4, 4) (4, 8)
  73. OP '=' (4, 9) (4, 10)
  74. NAME 'False' (4, 11) (4, 16)
  75. COMMENT '# NEWLINE' (4, 17) (4, 26)
  76. NEWLINE '\\n' (4, 26) (4, 27)
  77. DEDENT '' (5, 0) (5, 0)
  78. """)
  79. indent_error_file = b"""\
  80. def k(x):
  81. x += 2
  82. x += 5
  83. """
  84. readline = BytesIO(indent_error_file).readline
  85. with self.assertRaisesRegex(IndentationError,
  86. "unindent does not match any "
  87. "outer indentation level"):
  88. for tok in tokenize(readline):
  89. pass
  90. def test_int(self):
  91. # Ordinary integers and binary operators
  92. self.check_tokenize("0xff <= 255", """\
  93. NUMBER '0xff' (1, 0) (1, 4)
  94. OP '<=' (1, 5) (1, 7)
  95. NUMBER '255' (1, 8) (1, 11)
  96. """)
  97. self.check_tokenize("0b10 <= 255", """\
  98. NUMBER '0b10' (1, 0) (1, 4)
  99. OP '<=' (1, 5) (1, 7)
  100. NUMBER '255' (1, 8) (1, 11)
  101. """)
  102. self.check_tokenize("0o123 <= 0O123", """\
  103. NUMBER '0o123' (1, 0) (1, 5)
  104. OP '<=' (1, 6) (1, 8)
  105. NUMBER '0O123' (1, 9) (1, 14)
  106. """)
  107. self.check_tokenize("1234567 > ~0x15", """\
  108. NUMBER '1234567' (1, 0) (1, 7)
  109. OP '>' (1, 8) (1, 9)
  110. OP '~' (1, 10) (1, 11)
  111. NUMBER '0x15' (1, 11) (1, 15)
  112. """)
  113. self.check_tokenize("2134568 != 1231515", """\
  114. NUMBER '2134568' (1, 0) (1, 7)
  115. OP '!=' (1, 8) (1, 10)
  116. NUMBER '1231515' (1, 11) (1, 18)
  117. """)
  118. self.check_tokenize("(-124561-1) & 200000000", """\
  119. OP '(' (1, 0) (1, 1)
  120. OP '-' (1, 1) (1, 2)
  121. NUMBER '124561' (1, 2) (1, 8)
  122. OP '-' (1, 8) (1, 9)
  123. NUMBER '1' (1, 9) (1, 10)
  124. OP ')' (1, 10) (1, 11)
  125. OP '&' (1, 12) (1, 13)
  126. NUMBER '200000000' (1, 14) (1, 23)
  127. """)
  128. self.check_tokenize("0xdeadbeef != -1", """\
  129. NUMBER '0xdeadbeef' (1, 0) (1, 10)
  130. OP '!=' (1, 11) (1, 13)
  131. OP '-' (1, 14) (1, 15)
  132. NUMBER '1' (1, 15) (1, 16)
  133. """)
  134. self.check_tokenize("0xdeadc0de & 12345", """\
  135. NUMBER '0xdeadc0de' (1, 0) (1, 10)
  136. OP '&' (1, 11) (1, 12)
  137. NUMBER '12345' (1, 13) (1, 18)
  138. """)
  139. self.check_tokenize("0xFF & 0x15 | 1234", """\
  140. NUMBER '0xFF' (1, 0) (1, 4)
  141. OP '&' (1, 5) (1, 6)
  142. NUMBER '0x15' (1, 7) (1, 11)
  143. OP '|' (1, 12) (1, 13)
  144. NUMBER '1234' (1, 14) (1, 18)
  145. """)
  146. def test_long(self):
  147. # Long integers
  148. self.check_tokenize("x = 0", """\
  149. NAME 'x' (1, 0) (1, 1)
  150. OP '=' (1, 2) (1, 3)
  151. NUMBER '0' (1, 4) (1, 5)
  152. """)
  153. self.check_tokenize("x = 0xfffffffffff", """\
  154. NAME 'x' (1, 0) (1, 1)
  155. OP '=' (1, 2) (1, 3)
  156. NUMBER '0xfffffffffff' (1, 4) (1, 17)
  157. """)
  158. self.check_tokenize("x = 123141242151251616110", """\
  159. NAME 'x' (1, 0) (1, 1)
  160. OP '=' (1, 2) (1, 3)
  161. NUMBER '123141242151251616110' (1, 4) (1, 25)
  162. """)
  163. self.check_tokenize("x = -15921590215012591", """\
  164. NAME 'x' (1, 0) (1, 1)
  165. OP '=' (1, 2) (1, 3)
  166. OP '-' (1, 4) (1, 5)
  167. NUMBER '15921590215012591' (1, 5) (1, 22)
  168. """)
  169. def test_float(self):
  170. # Floating point numbers
  171. self.check_tokenize("x = 3.14159", """\
  172. NAME 'x' (1, 0) (1, 1)
  173. OP '=' (1, 2) (1, 3)
  174. NUMBER '3.14159' (1, 4) (1, 11)
  175. """)
  176. self.check_tokenize("x = 314159.", """\
  177. NAME 'x' (1, 0) (1, 1)
  178. OP '=' (1, 2) (1, 3)
  179. NUMBER '314159.' (1, 4) (1, 11)
  180. """)
  181. self.check_tokenize("x = .314159", """\
  182. NAME 'x' (1, 0) (1, 1)
  183. OP '=' (1, 2) (1, 3)
  184. NUMBER '.314159' (1, 4) (1, 11)
  185. """)
  186. self.check_tokenize("x = 3e14159", """\
  187. NAME 'x' (1, 0) (1, 1)
  188. OP '=' (1, 2) (1, 3)
  189. NUMBER '3e14159' (1, 4) (1, 11)
  190. """)
  191. self.check_tokenize("x = 3E123", """\
  192. NAME 'x' (1, 0) (1, 1)
  193. OP '=' (1, 2) (1, 3)
  194. NUMBER '3E123' (1, 4) (1, 9)
  195. """)
  196. self.check_tokenize("x+y = 3e-1230", """\
  197. NAME 'x' (1, 0) (1, 1)
  198. OP '+' (1, 1) (1, 2)
  199. NAME 'y' (1, 2) (1, 3)
  200. OP '=' (1, 4) (1, 5)
  201. NUMBER '3e-1230' (1, 6) (1, 13)
  202. """)
  203. self.check_tokenize("x = 3.14e159", """\
  204. NAME 'x' (1, 0) (1, 1)
  205. OP '=' (1, 2) (1, 3)
  206. NUMBER '3.14e159' (1, 4) (1, 12)
  207. """)
  208. def test_underscore_literals(self):
  209. def number_token(s):
  210. f = BytesIO(s.encode('utf-8'))
  211. for toktype, token, start, end, line in tokenize(f.readline):
  212. if toktype == NUMBER:
  213. return token
  214. return 'invalid token'
  215. for lit in VALID_UNDERSCORE_LITERALS:
  216. if '(' in lit:
  217. # this won't work with compound complex inputs
  218. continue
  219. self.assertEqual(number_token(lit), lit)
  220. for lit in INVALID_UNDERSCORE_LITERALS:
  221. self.assertNotEqual(number_token(lit), lit)
  222. def test_string(self):
  223. # String literals
  224. self.check_tokenize("x = ''; y = \"\"", """\
  225. NAME 'x' (1, 0) (1, 1)
  226. OP '=' (1, 2) (1, 3)
  227. STRING "''" (1, 4) (1, 6)
  228. OP ';' (1, 6) (1, 7)
  229. NAME 'y' (1, 8) (1, 9)
  230. OP '=' (1, 10) (1, 11)
  231. STRING '""' (1, 12) (1, 14)
  232. """)
  233. self.check_tokenize("x = '\"'; y = \"'\"", """\
  234. NAME 'x' (1, 0) (1, 1)
  235. OP '=' (1, 2) (1, 3)
  236. STRING '\\'"\\'' (1, 4) (1, 7)
  237. OP ';' (1, 7) (1, 8)
  238. NAME 'y' (1, 9) (1, 10)
  239. OP '=' (1, 11) (1, 12)
  240. STRING '"\\'"' (1, 13) (1, 16)
  241. """)
  242. self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
  243. NAME 'x' (1, 0) (1, 1)
  244. OP '=' (1, 2) (1, 3)
  245. STRING '"doesn\\'t "' (1, 4) (1, 14)
  246. NAME 'shrink' (1, 14) (1, 20)
  247. STRING '", does it"' (1, 20) (1, 31)
  248. """)
  249. self.check_tokenize("x = 'abc' + 'ABC'", """\
  250. NAME 'x' (1, 0) (1, 1)
  251. OP '=' (1, 2) (1, 3)
  252. STRING "'abc'" (1, 4) (1, 9)
  253. OP '+' (1, 10) (1, 11)
  254. STRING "'ABC'" (1, 12) (1, 17)
  255. """)
  256. self.check_tokenize('y = "ABC" + "ABC"', """\
  257. NAME 'y' (1, 0) (1, 1)
  258. OP '=' (1, 2) (1, 3)
  259. STRING '"ABC"' (1, 4) (1, 9)
  260. OP '+' (1, 10) (1, 11)
  261. STRING '"ABC"' (1, 12) (1, 17)
  262. """)
  263. self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
  264. NAME 'x' (1, 0) (1, 1)
  265. OP '=' (1, 2) (1, 3)
  266. STRING "r'abc'" (1, 4) (1, 10)
  267. OP '+' (1, 11) (1, 12)
  268. STRING "r'ABC'" (1, 13) (1, 19)
  269. OP '+' (1, 20) (1, 21)
  270. STRING "R'ABC'" (1, 22) (1, 28)
  271. OP '+' (1, 29) (1, 30)
  272. STRING "R'ABC'" (1, 31) (1, 37)
  273. """)
  274. self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
  275. NAME 'y' (1, 0) (1, 1)
  276. OP '=' (1, 2) (1, 3)
  277. STRING 'r"abc"' (1, 4) (1, 10)
  278. OP '+' (1, 11) (1, 12)
  279. STRING 'r"ABC"' (1, 13) (1, 19)
  280. OP '+' (1, 20) (1, 21)
  281. STRING 'R"ABC"' (1, 22) (1, 28)
  282. OP '+' (1, 29) (1, 30)
  283. STRING 'R"ABC"' (1, 31) (1, 37)
  284. """)
  285. self.check_tokenize("u'abc' + U'abc'", """\
  286. STRING "u'abc'" (1, 0) (1, 6)
  287. OP '+' (1, 7) (1, 8)
  288. STRING "U'abc'" (1, 9) (1, 15)
  289. """)
  290. self.check_tokenize('u"abc" + U"abc"', """\
  291. STRING 'u"abc"' (1, 0) (1, 6)
  292. OP '+' (1, 7) (1, 8)
  293. STRING 'U"abc"' (1, 9) (1, 15)
  294. """)
  295. self.check_tokenize("b'abc' + B'abc'", """\
  296. STRING "b'abc'" (1, 0) (1, 6)
  297. OP '+' (1, 7) (1, 8)
  298. STRING "B'abc'" (1, 9) (1, 15)
  299. """)
  300. self.check_tokenize('b"abc" + B"abc"', """\
  301. STRING 'b"abc"' (1, 0) (1, 6)
  302. OP '+' (1, 7) (1, 8)
  303. STRING 'B"abc"' (1, 9) (1, 15)
  304. """)
  305. self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
  306. STRING "br'abc'" (1, 0) (1, 7)
  307. OP '+' (1, 8) (1, 9)
  308. STRING "bR'abc'" (1, 10) (1, 17)
  309. OP '+' (1, 18) (1, 19)
  310. STRING "Br'abc'" (1, 20) (1, 27)
  311. OP '+' (1, 28) (1, 29)
  312. STRING "BR'abc'" (1, 30) (1, 37)
  313. """)
  314. self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
  315. STRING 'br"abc"' (1, 0) (1, 7)
  316. OP '+' (1, 8) (1, 9)
  317. STRING 'bR"abc"' (1, 10) (1, 17)
  318. OP '+' (1, 18) (1, 19)
  319. STRING 'Br"abc"' (1, 20) (1, 27)
  320. OP '+' (1, 28) (1, 29)
  321. STRING 'BR"abc"' (1, 30) (1, 37)
  322. """)
  323. self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
  324. STRING "rb'abc'" (1, 0) (1, 7)
  325. OP '+' (1, 8) (1, 9)
  326. STRING "rB'abc'" (1, 10) (1, 17)
  327. OP '+' (1, 18) (1, 19)
  328. STRING "Rb'abc'" (1, 20) (1, 27)
  329. OP '+' (1, 28) (1, 29)
  330. STRING "RB'abc'" (1, 30) (1, 37)
  331. """)
  332. self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
  333. STRING 'rb"abc"' (1, 0) (1, 7)
  334. OP '+' (1, 8) (1, 9)
  335. STRING 'rB"abc"' (1, 10) (1, 17)
  336. OP '+' (1, 18) (1, 19)
  337. STRING 'Rb"abc"' (1, 20) (1, 27)
  338. OP '+' (1, 28) (1, 29)
  339. STRING 'RB"abc"' (1, 30) (1, 37)
  340. """)
  341. # Check 0, 1, and 2 character string prefixes.
  342. self.check_tokenize(r'"a\
  343. de\
  344. fg"', """\
  345. STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
  346. """)
  347. self.check_tokenize(r'u"a\
  348. de"', """\
  349. STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
  350. """)
  351. self.check_tokenize(r'rb"a\
  352. d"', """\
  353. STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
  354. """)
  355. self.check_tokenize(r'"""a\
  356. b"""', """\
  357. STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
  358. """)
  359. self.check_tokenize(r'u"""a\
  360. b"""', """\
  361. STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
  362. """)
  363. self.check_tokenize(r'rb"""a\
  364. b\
  365. c"""', """\
  366. STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
  367. """)
  368. self.check_tokenize('f"abc"', """\
  369. STRING 'f"abc"' (1, 0) (1, 6)
  370. """)
  371. self.check_tokenize('fR"a{b}c"', """\
  372. STRING 'fR"a{b}c"' (1, 0) (1, 9)
  373. """)
  374. self.check_tokenize('f"""abc"""', """\
  375. STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
  376. """)
  377. self.check_tokenize(r'f"abc\
  378. def"', """\
  379. STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
  380. """)
  381. self.check_tokenize(r'Rf"abc\
  382. def"', """\
  383. STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
  384. """)
  385. def test_function(self):
  386. self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
  387. NAME 'def' (1, 0) (1, 3)
  388. NAME 'd22' (1, 4) (1, 7)
  389. OP '(' (1, 7) (1, 8)
  390. NAME 'a' (1, 8) (1, 9)
  391. OP ',' (1, 9) (1, 10)
  392. NAME 'b' (1, 11) (1, 12)
  393. OP ',' (1, 12) (1, 13)
  394. NAME 'c' (1, 14) (1, 15)
  395. OP '=' (1, 15) (1, 16)
  396. NUMBER '2' (1, 16) (1, 17)
  397. OP ',' (1, 17) (1, 18)
  398. NAME 'd' (1, 19) (1, 20)
  399. OP '=' (1, 20) (1, 21)
  400. NUMBER '2' (1, 21) (1, 22)
  401. OP ',' (1, 22) (1, 23)
  402. OP '*' (1, 24) (1, 25)
  403. NAME 'k' (1, 25) (1, 26)
  404. OP ')' (1, 26) (1, 27)
  405. OP ':' (1, 27) (1, 28)
  406. NAME 'pass' (1, 29) (1, 33)
  407. """)
  408. self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
  409. NAME 'def' (1, 0) (1, 3)
  410. NAME 'd01v_' (1, 4) (1, 9)
  411. OP '(' (1, 9) (1, 10)
  412. NAME 'a' (1, 10) (1, 11)
  413. OP '=' (1, 11) (1, 12)
  414. NUMBER '1' (1, 12) (1, 13)
  415. OP ',' (1, 13) (1, 14)
  416. OP '*' (1, 15) (1, 16)
  417. NAME 'k' (1, 16) (1, 17)
  418. OP ',' (1, 17) (1, 18)
  419. OP '**' (1, 19) (1, 21)
  420. NAME 'w' (1, 21) (1, 22)
  421. OP ')' (1, 22) (1, 23)
  422. OP ':' (1, 23) (1, 24)
  423. NAME 'pass' (1, 25) (1, 29)
  424. """)
  425. self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
  426. NAME 'def' (1, 0) (1, 3)
  427. NAME 'd23' (1, 4) (1, 7)
  428. OP '(' (1, 7) (1, 8)
  429. NAME 'a' (1, 8) (1, 9)
  430. OP ':' (1, 9) (1, 10)
  431. NAME 'str' (1, 11) (1, 14)
  432. OP ',' (1, 14) (1, 15)
  433. NAME 'b' (1, 16) (1, 17)
  434. OP ':' (1, 17) (1, 18)
  435. NAME 'int' (1, 19) (1, 22)
  436. OP '=' (1, 22) (1, 23)
  437. NUMBER '3' (1, 23) (1, 24)
  438. OP ')' (1, 24) (1, 25)
  439. OP '->' (1, 26) (1, 28)
  440. NAME 'int' (1, 29) (1, 32)
  441. OP ':' (1, 32) (1, 33)
  442. NAME 'pass' (1, 34) (1, 38)
  443. """)
  444. def test_comparison(self):
  445. # Comparison
  446. self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
  447. "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
  448. NAME 'if' (1, 0) (1, 2)
  449. NUMBER '1' (1, 3) (1, 4)
  450. OP '<' (1, 5) (1, 6)
  451. NUMBER '1' (1, 7) (1, 8)
  452. OP '>' (1, 9) (1, 10)
  453. NUMBER '1' (1, 11) (1, 12)
  454. OP '==' (1, 13) (1, 15)
  455. NUMBER '1' (1, 16) (1, 17)
  456. OP '>=' (1, 18) (1, 20)
  457. NUMBER '5' (1, 21) (1, 22)
  458. OP '<=' (1, 23) (1, 25)
  459. NUMBER '0x15' (1, 26) (1, 30)
  460. OP '<=' (1, 31) (1, 33)
  461. NUMBER '0x12' (1, 34) (1, 38)
  462. OP '!=' (1, 39) (1, 41)
  463. NUMBER '1' (1, 42) (1, 43)
  464. NAME 'and' (1, 44) (1, 47)
  465. NUMBER '5' (1, 48) (1, 49)
  466. NAME 'in' (1, 50) (1, 52)
  467. NUMBER '1' (1, 53) (1, 54)
  468. NAME 'not' (1, 55) (1, 58)
  469. NAME 'in' (1, 59) (1, 61)
  470. NUMBER '1' (1, 62) (1, 63)
  471. NAME 'is' (1, 64) (1, 66)
  472. NUMBER '1' (1, 67) (1, 68)
  473. NAME 'or' (1, 69) (1, 71)
  474. NUMBER '5' (1, 72) (1, 73)
  475. NAME 'is' (1, 74) (1, 76)
  476. NAME 'not' (1, 77) (1, 80)
  477. NUMBER '1' (1, 81) (1, 82)
  478. OP ':' (1, 82) (1, 83)
  479. NAME 'pass' (1, 84) (1, 88)
  480. """)
  481. def test_shift(self):
  482. # Shift
  483. self.check_tokenize("x = 1 << 1 >> 5", """\
  484. NAME 'x' (1, 0) (1, 1)
  485. OP '=' (1, 2) (1, 3)
  486. NUMBER '1' (1, 4) (1, 5)
  487. OP '<<' (1, 6) (1, 8)
  488. NUMBER '1' (1, 9) (1, 10)
  489. OP '>>' (1, 11) (1, 13)
  490. NUMBER '5' (1, 14) (1, 15)
  491. """)
  492. def test_additive(self):
  493. # Additive
  494. self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
  495. NAME 'x' (1, 0) (1, 1)
  496. OP '=' (1, 2) (1, 3)
  497. NUMBER '1' (1, 4) (1, 5)
  498. OP '-' (1, 6) (1, 7)
  499. NAME 'y' (1, 8) (1, 9)
  500. OP '+' (1, 10) (1, 11)
  501. NUMBER '15' (1, 12) (1, 14)
  502. OP '-' (1, 15) (1, 16)
  503. NUMBER '1' (1, 17) (1, 18)
  504. OP '+' (1, 19) (1, 20)
  505. NUMBER '0x124' (1, 21) (1, 26)
  506. OP '+' (1, 27) (1, 28)
  507. NAME 'z' (1, 29) (1, 30)
  508. OP '+' (1, 31) (1, 32)
  509. NAME 'a' (1, 33) (1, 34)
  510. OP '[' (1, 34) (1, 35)
  511. NUMBER '5' (1, 35) (1, 36)
  512. OP ']' (1, 36) (1, 37)
  513. """)
  514. def test_multiplicative(self):
  515. # Multiplicative
  516. self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
  517. NAME 'x' (1, 0) (1, 1)
  518. OP '=' (1, 2) (1, 3)
  519. NUMBER '1' (1, 4) (1, 5)
  520. OP '//' (1, 5) (1, 7)
  521. NUMBER '1' (1, 7) (1, 8)
  522. OP '*' (1, 8) (1, 9)
  523. NUMBER '1' (1, 9) (1, 10)
  524. OP '/' (1, 10) (1, 11)
  525. NUMBER '5' (1, 11) (1, 12)
  526. OP '*' (1, 12) (1, 13)
  527. NUMBER '12' (1, 13) (1, 15)
  528. OP '%' (1, 15) (1, 16)
  529. NUMBER '0x12' (1, 16) (1, 20)
  530. OP '@' (1, 20) (1, 21)
  531. NUMBER '42' (1, 21) (1, 23)
  532. """)
  533. def test_unary(self):
  534. # Unary
  535. self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
  536. OP '~' (1, 0) (1, 1)
  537. NUMBER '1' (1, 1) (1, 2)
  538. OP '^' (1, 3) (1, 4)
  539. NUMBER '1' (1, 5) (1, 6)
  540. OP '&' (1, 7) (1, 8)
  541. NUMBER '1' (1, 9) (1, 10)
  542. OP '|' (1, 11) (1, 12)
  543. NUMBER '1' (1, 12) (1, 13)
  544. OP '^' (1, 14) (1, 15)
  545. OP '-' (1, 16) (1, 17)
  546. NUMBER '1' (1, 17) (1, 18)
  547. """)
  548. self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
  549. OP '-' (1, 0) (1, 1)
  550. NUMBER '1' (1, 1) (1, 2)
  551. OP '*' (1, 2) (1, 3)
  552. NUMBER '1' (1, 3) (1, 4)
  553. OP '/' (1, 4) (1, 5)
  554. NUMBER '1' (1, 5) (1, 6)
  555. OP '+' (1, 6) (1, 7)
  556. NUMBER '1' (1, 7) (1, 8)
  557. OP '*' (1, 8) (1, 9)
  558. NUMBER '1' (1, 9) (1, 10)
  559. OP '//' (1, 10) (1, 12)
  560. NUMBER '1' (1, 12) (1, 13)
  561. OP '-' (1, 14) (1, 15)
  562. OP '-' (1, 16) (1, 17)
  563. OP '-' (1, 17) (1, 18)
  564. OP '-' (1, 18) (1, 19)
  565. NUMBER '1' (1, 19) (1, 20)
  566. OP '**' (1, 20) (1, 22)
  567. NUMBER '1' (1, 22) (1, 23)
  568. """)
  569. def test_selector(self):
  570. # Selector
  571. self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
  572. NAME 'import' (1, 0) (1, 6)
  573. NAME 'sys' (1, 7) (1, 10)
  574. OP ',' (1, 10) (1, 11)
  575. NAME 'time' (1, 12) (1, 16)
  576. NEWLINE '\\n' (1, 16) (1, 17)
  577. NAME 'x' (2, 0) (2, 1)
  578. OP '=' (2, 2) (2, 3)
  579. NAME 'sys' (2, 4) (2, 7)
  580. OP '.' (2, 7) (2, 8)
  581. NAME 'modules' (2, 8) (2, 15)
  582. OP '[' (2, 15) (2, 16)
  583. STRING "'time'" (2, 16) (2, 22)
  584. OP ']' (2, 22) (2, 23)
  585. OP '.' (2, 23) (2, 24)
  586. NAME 'time' (2, 24) (2, 28)
  587. OP '(' (2, 28) (2, 29)
  588. OP ')' (2, 29) (2, 30)
  589. """)
  590. def test_method(self):
  591. # Methods
  592. self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
  593. OP '@' (1, 0) (1, 1)
  594. NAME 'staticmethod' (1, 1) (1, 13)
  595. NEWLINE '\\n' (1, 13) (1, 14)
  596. NAME 'def' (2, 0) (2, 3)
  597. NAME 'foo' (2, 4) (2, 7)
  598. OP '(' (2, 7) (2, 8)
  599. NAME 'x' (2, 8) (2, 9)
  600. OP ',' (2, 9) (2, 10)
  601. NAME 'y' (2, 10) (2, 11)
  602. OP ')' (2, 11) (2, 12)
  603. OP ':' (2, 12) (2, 13)
  604. NAME 'pass' (2, 14) (2, 18)
  605. """)
  606. def test_tabs(self):
  607. # Evil tabs
  608. self.check_tokenize("def f():\n"
  609. "\tif x\n"
  610. " \tpass", """\
  611. NAME 'def' (1, 0) (1, 3)
  612. NAME 'f' (1, 4) (1, 5)
  613. OP '(' (1, 5) (1, 6)
  614. OP ')' (1, 6) (1, 7)
  615. OP ':' (1, 7) (1, 8)
  616. NEWLINE '\\n' (1, 8) (1, 9)
  617. INDENT '\\t' (2, 0) (2, 1)
  618. NAME 'if' (2, 1) (2, 3)
  619. NAME 'x' (2, 4) (2, 5)
  620. NEWLINE '\\n' (2, 5) (2, 6)
  621. INDENT ' \\t' (3, 0) (3, 9)
  622. NAME 'pass' (3, 9) (3, 13)
  623. DEDENT '' (4, 0) (4, 0)
  624. DEDENT '' (4, 0) (4, 0)
  625. """)
  626. def test_non_ascii_identifiers(self):
  627. # Non-ascii identifiers
  628. self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
  629. NAME 'Örter' (1, 0) (1, 5)
  630. OP '=' (1, 6) (1, 7)
  631. STRING "'places'" (1, 8) (1, 16)
  632. NEWLINE '\\n' (1, 16) (1, 17)
  633. NAME 'grün' (2, 0) (2, 4)
  634. OP '=' (2, 5) (2, 6)
  635. STRING "'green'" (2, 7) (2, 14)
  636. """)
  637. def test_unicode(self):
  638. # Legacy unicode literals:
  639. self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
  640. NAME 'Örter' (1, 0) (1, 5)
  641. OP '=' (1, 6) (1, 7)
  642. STRING "u'places'" (1, 8) (1, 17)
  643. NEWLINE '\\n' (1, 17) (1, 18)
  644. NAME 'grün' (2, 0) (2, 4)
  645. OP '=' (2, 5) (2, 6)
  646. STRING "U'green'" (2, 7) (2, 15)
  647. """)
  648. def test_async(self):
  649. # Async/await extension:
  650. self.check_tokenize("async = 1", """\
  651. NAME 'async' (1, 0) (1, 5)
  652. OP '=' (1, 6) (1, 7)
  653. NUMBER '1' (1, 8) (1, 9)
  654. """)
  655. self.check_tokenize("a = (async = 1)", """\
  656. NAME 'a' (1, 0) (1, 1)
  657. OP '=' (1, 2) (1, 3)
  658. OP '(' (1, 4) (1, 5)
  659. NAME 'async' (1, 5) (1, 10)
  660. OP '=' (1, 11) (1, 12)
  661. NUMBER '1' (1, 13) (1, 14)
  662. OP ')' (1, 14) (1, 15)
  663. """)
  664. self.check_tokenize("async()", """\
  665. NAME 'async' (1, 0) (1, 5)
  666. OP '(' (1, 5) (1, 6)
  667. OP ')' (1, 6) (1, 7)
  668. """)
  669. self.check_tokenize("class async(Bar):pass", """\
  670. NAME 'class' (1, 0) (1, 5)
  671. NAME 'async' (1, 6) (1, 11)
  672. OP '(' (1, 11) (1, 12)
  673. NAME 'Bar' (1, 12) (1, 15)
  674. OP ')' (1, 15) (1, 16)
  675. OP ':' (1, 16) (1, 17)
  676. NAME 'pass' (1, 17) (1, 21)
  677. """)
  678. self.check_tokenize("class async:pass", """\
  679. NAME 'class' (1, 0) (1, 5)
  680. NAME 'async' (1, 6) (1, 11)
  681. OP ':' (1, 11) (1, 12)
  682. NAME 'pass' (1, 12) (1, 16)
  683. """)
  684. self.check_tokenize("await = 1", """\
  685. NAME 'await' (1, 0) (1, 5)
  686. OP '=' (1, 6) (1, 7)
  687. NUMBER '1' (1, 8) (1, 9)
  688. """)
  689. self.check_tokenize("foo.async", """\
  690. NAME 'foo' (1, 0) (1, 3)
  691. OP '.' (1, 3) (1, 4)
  692. NAME 'async' (1, 4) (1, 9)
  693. """)
  694. self.check_tokenize("async for a in b: pass", """\
  695. NAME 'async' (1, 0) (1, 5)
  696. NAME 'for' (1, 6) (1, 9)
  697. NAME 'a' (1, 10) (1, 11)
  698. NAME 'in' (1, 12) (1, 14)
  699. NAME 'b' (1, 15) (1, 16)
  700. OP ':' (1, 16) (1, 17)
  701. NAME 'pass' (1, 18) (1, 22)
  702. """)
  703. self.check_tokenize("async with a as b: pass", """\
  704. NAME 'async' (1, 0) (1, 5)
  705. NAME 'with' (1, 6) (1, 10)
  706. NAME 'a' (1, 11) (1, 12)
  707. NAME 'as' (1, 13) (1, 15)
  708. NAME 'b' (1, 16) (1, 17)
  709. OP ':' (1, 17) (1, 18)
  710. NAME 'pass' (1, 19) (1, 23)
  711. """)
  712. self.check_tokenize("async.foo", """\
  713. NAME 'async' (1, 0) (1, 5)
  714. OP '.' (1, 5) (1, 6)
  715. NAME 'foo' (1, 6) (1, 9)
  716. """)
  717. self.check_tokenize("async", """\
  718. NAME 'async' (1, 0) (1, 5)
  719. """)
  720. self.check_tokenize("async\n#comment\nawait", """\
  721. NAME 'async' (1, 0) (1, 5)
  722. NEWLINE '\\n' (1, 5) (1, 6)
  723. COMMENT '#comment' (2, 0) (2, 8)
  724. NL '\\n' (2, 8) (2, 9)
  725. NAME 'await' (3, 0) (3, 5)
  726. """)
  727. self.check_tokenize("async\n...\nawait", """\
  728. NAME 'async' (1, 0) (1, 5)
  729. NEWLINE '\\n' (1, 5) (1, 6)
  730. OP '...' (2, 0) (2, 3)
  731. NEWLINE '\\n' (2, 3) (2, 4)
  732. NAME 'await' (3, 0) (3, 5)
  733. """)
  734. self.check_tokenize("async\nawait", """\
  735. NAME 'async' (1, 0) (1, 5)
  736. NEWLINE '\\n' (1, 5) (1, 6)
  737. NAME 'await' (2, 0) (2, 5)
  738. """)
  739. self.check_tokenize("foo.async + 1", """\
  740. NAME 'foo' (1, 0) (1, 3)
  741. OP '.' (1, 3) (1, 4)
  742. NAME 'async' (1, 4) (1, 9)
  743. OP '+' (1, 10) (1, 11)
  744. NUMBER '1' (1, 12) (1, 13)
  745. """)
  746. self.check_tokenize("async def foo(): pass", """\
  747. NAME 'async' (1, 0) (1, 5)
  748. NAME 'def' (1, 6) (1, 9)
  749. NAME 'foo' (1, 10) (1, 13)
  750. OP '(' (1, 13) (1, 14)
  751. OP ')' (1, 14) (1, 15)
  752. OP ':' (1, 15) (1, 16)
  753. NAME 'pass' (1, 17) (1, 21)
  754. """)
  755. self.check_tokenize('''\
  756. async def foo():
  757. def foo(await):
  758. await = 1
  759. if 1:
  760. await
  761. async += 1
  762. ''', """\
  763. NAME 'async' (1, 0) (1, 5)
  764. NAME 'def' (1, 6) (1, 9)
  765. NAME 'foo' (1, 10) (1, 13)
  766. OP '(' (1, 13) (1, 14)
  767. OP ')' (1, 14) (1, 15)
  768. OP ':' (1, 15) (1, 16)
  769. NEWLINE '\\n' (1, 16) (1, 17)
  770. INDENT ' ' (2, 0) (2, 2)
  771. NAME 'def' (2, 2) (2, 5)
  772. NAME 'foo' (2, 6) (2, 9)
  773. OP '(' (2, 9) (2, 10)
  774. NAME 'await' (2, 10) (2, 15)
  775. OP ')' (2, 15) (2, 16)
  776. OP ':' (2, 16) (2, 17)
  777. NEWLINE '\\n' (2, 17) (2, 18)
  778. INDENT ' ' (3, 0) (3, 4)
  779. NAME 'await' (3, 4) (3, 9)
  780. OP '=' (3, 10) (3, 11)
  781. NUMBER '1' (3, 12) (3, 13)
  782. NEWLINE '\\n' (3, 13) (3, 14)
  783. DEDENT '' (4, 2) (4, 2)
  784. NAME 'if' (4, 2) (4, 4)
  785. NUMBER '1' (4, 5) (4, 6)
  786. OP ':' (4, 6) (4, 7)
  787. NEWLINE '\\n' (4, 7) (4, 8)
  788. INDENT ' ' (5, 0) (5, 4)
  789. NAME 'await' (5, 4) (5, 9)
  790. NEWLINE '\\n' (5, 9) (5, 10)
  791. DEDENT '' (6, 0) (6, 0)
  792. DEDENT '' (6, 0) (6, 0)
  793. NAME 'async' (6, 0) (6, 5)
  794. OP '+=' (6, 6) (6, 8)
  795. NUMBER '1' (6, 9) (6, 10)
  796. NEWLINE '\\n' (6, 10) (6, 11)
  797. """)
  798. self.check_tokenize('''\
  799. async def foo():
  800. async for i in 1: pass''', """\
  801. NAME 'async' (1, 0) (1, 5)
  802. NAME 'def' (1, 6) (1, 9)
  803. NAME 'foo' (1, 10) (1, 13)
  804. OP '(' (1, 13) (1, 14)
  805. OP ')' (1, 14) (1, 15)
  806. OP ':' (1, 15) (1, 16)
  807. NEWLINE '\\n' (1, 16) (1, 17)
  808. INDENT ' ' (2, 0) (2, 2)
  809. NAME 'async' (2, 2) (2, 7)
  810. NAME 'for' (2, 8) (2, 11)
  811. NAME 'i' (2, 12) (2, 13)
  812. NAME 'in' (2, 14) (2, 16)
  813. NUMBER '1' (2, 17) (2, 18)
  814. OP ':' (2, 18) (2, 19)
  815. NAME 'pass' (2, 20) (2, 24)
  816. DEDENT '' (3, 0) (3, 0)
  817. """)
  818. self.check_tokenize('''async def foo(async): await''', """\
  819. NAME 'async' (1, 0) (1, 5)
  820. NAME 'def' (1, 6) (1, 9)
  821. NAME 'foo' (1, 10) (1, 13)
  822. OP '(' (1, 13) (1, 14)
  823. NAME 'async' (1, 14) (1, 19)
  824. OP ')' (1, 19) (1, 20)
  825. OP ':' (1, 20) (1, 21)
  826. NAME 'await' (1, 22) (1, 27)
  827. """)
  828. self.check_tokenize('''\
  829. def f():
  830. def baz(): pass
  831. async def bar(): pass
  832. await = 2''', """\
  833. NAME 'def' (1, 0) (1, 3)
  834. NAME 'f' (1, 4) (1, 5)
  835. OP '(' (1, 5) (1, 6)
  836. OP ')' (1, 6) (1, 7)
  837. OP ':' (1, 7) (1, 8)
  838. NEWLINE '\\n' (1, 8) (1, 9)
  839. NL '\\n' (2, 0) (2, 1)
  840. INDENT ' ' (3, 0) (3, 2)
  841. NAME 'def' (3, 2) (3, 5)
  842. NAME 'baz' (3, 6) (3, 9)
  843. OP '(' (3, 9) (3, 10)
  844. OP ')' (3, 10) (3, 11)
  845. OP ':' (3, 11) (3, 12)
  846. NAME 'pass' (3, 13) (3, 17)
  847. NEWLINE '\\n' (3, 17) (3, 18)
  848. NAME 'async' (4, 2) (4, 7)
  849. NAME 'def' (4, 8) (4, 11)
  850. NAME 'bar' (4, 12) (4, 15)
  851. OP '(' (4, 15) (4, 16)
  852. OP ')' (4, 16) (4, 17)
  853. OP ':' (4, 17) (4, 18)
  854. NAME 'pass' (4, 19) (4, 23)
  855. NEWLINE '\\n' (4, 23) (4, 24)
  856. NL '\\n' (5, 0) (5, 1)
  857. NAME 'await' (6, 2) (6, 7)
  858. OP '=' (6, 8) (6, 9)
  859. NUMBER '2' (6, 10) (6, 11)
  860. DEDENT '' (7, 0) (7, 0)
  861. """)
  862. self.check_tokenize('''\
  863. async def f():
  864. def baz(): pass
  865. async def bar(): pass
  866. await = 2''', """\
  867. NAME 'async' (1, 0) (1, 5)
  868. NAME 'def' (1, 6) (1, 9)
  869. NAME 'f' (1, 10) (1, 11)
  870. OP '(' (1, 11) (1, 12)
  871. OP ')' (1, 12) (1, 13)
  872. OP ':' (1, 13) (1, 14)
  873. NEWLINE '\\n' (1, 14) (1, 15)
  874. NL '\\n' (2, 0) (2, 1)
  875. INDENT ' ' (3, 0) (3, 2)
  876. NAME 'def' (3, 2) (3, 5)
  877. NAME 'baz' (3, 6) (3, 9)
  878. OP '(' (3, 9) (3, 10)
  879. OP ')' (3, 10) (3, 11)
  880. OP ':' (3, 11) (3, 12)
  881. NAME 'pass' (3, 13) (3, 17)
  882. NEWLINE '\\n' (3, 17) (3, 18)
  883. NAME 'async' (4, 2) (4, 7)
  884. NAME 'def' (4, 8) (4, 11)
  885. NAME 'bar' (4, 12) (4, 15)
  886. OP '(' (4, 15) (4, 16)
  887. OP ')' (4, 16) (4, 17)
  888. OP ':' (4, 17) (4, 18)
  889. NAME 'pass' (4, 19) (4, 23)
  890. NEWLINE '\\n' (4, 23) (4, 24)
  891. NL '\\n' (5, 0) (5, 1)
  892. NAME 'await' (6, 2) (6, 7)
  893. OP '=' (6, 8) (6, 9)
  894. NUMBER '2' (6, 10) (6, 11)
  895. DEDENT '' (7, 0) (7, 0)
  896. """)
  897. class GenerateTokensTest(TokenizeTest):
  898. def check_tokenize(self, s, expected):
  899. # Format the tokens in s in a table format.
  900. # The ENDMARKER and final NEWLINE are omitted.
  901. f = StringIO(s)
  902. result = stringify_tokens_from_source(generate_tokens(f.readline), s)
  903. self.assertEqual(result, expected.rstrip().splitlines())
  904. def decistmt(s):
  905. result = []
  906. g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
  907. for toknum, tokval, _, _, _ in g:
  908. if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
  909. result.extend([
  910. (NAME, 'Decimal'),
  911. (OP, '('),
  912. (STRING, repr(tokval)),
  913. (OP, ')')
  914. ])
  915. else:
  916. result.append((toknum, tokval))
  917. return untokenize(result).decode('utf-8')
  918. class TestMisc(TestCase):
  919. def test_decistmt(self):
  920. # Substitute Decimals for floats in a string of statements.
  921. # This is an example from the docs.
  922. from decimal import Decimal
  923. s = '+21.3e-5*-.1234/81.7'
  924. self.assertEqual(decistmt(s),
  925. "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
  926. # The format of the exponent is inherited from the platform C library.
  927. # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
  928. # we're only showing 11 digits, and the 12th isn't close to 5, the
  929. # rest of the output should be platform-independent.
  930. self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
  931. # Output from calculations with Decimal should be identical across all
  932. # platforms.
  933. self.assertEqual(eval(decistmt(s)),
  934. Decimal('-3.217160342717258261933904529E-7'))
  935. class TestTokenizerAdheresToPep0263(TestCase):
  936. """
  937. Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
  938. """
  939. def _testFile(self, filename):
  940. path = os.path.join(os.path.dirname(__file__), filename)
  941. TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
  942. def test_utf8_coding_cookie_and_no_utf8_bom(self):
  943. f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
  944. self._testFile(f)
  945. def test_latin1_coding_cookie_and_utf8_bom(self):
  946. """
  947. As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
  948. allowed encoding for the comment is 'utf-8'. The text file used in
  949. this test starts with a BOM signature, but specifies latin1 as the
  950. coding, so verify that a SyntaxError is raised, which matches the
  951. behaviour of the interpreter when it encounters a similar condition.
  952. """
  953. f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
  954. self.assertRaises(SyntaxError, self._testFile, f)
  955. def test_no_coding_cookie_and_utf8_bom(self):
  956. f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
  957. self._testFile(f)
  958. def test_utf8_coding_cookie_and_utf8_bom(self):
  959. f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
  960. self._testFile(f)
  961. def test_bad_coding_cookie(self):
  962. self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
  963. self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
  964. class Test_Tokenize(TestCase):
  965. def test__tokenize_decodes_with_specified_encoding(self):
  966. literal = '"ЉЊЈЁЂ"'
  967. line = literal.encode('utf-8')
  968. first = False
  969. def readline():
  970. nonlocal first
  971. if not first:
  972. first = True
  973. return line
  974. else:
  975. return b''
  976. # skip the initial encoding token and the end tokens
  977. tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
  978. expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
  979. self.assertEqual(tokens, expected_tokens,
  980. "bytes not decoded with encoding")
  981. def test__tokenize_does_not_decode_with_encoding_none(self):
  982. literal = '"ЉЊЈЁЂ"'
  983. first = False
  984. def readline():
  985. nonlocal first
  986. if not first:
  987. first = True
  988. return literal
  989. else:
  990. return b''
  991. # skip the end tokens
  992. tokens = list(_tokenize(readline, encoding=None))[:-2]
  993. expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
  994. self.assertEqual(tokens, expected_tokens,
  995. "string not tokenized when encoding is None")
  996. class TestDetectEncoding(TestCase):
  997. def get_readline(self, lines):
  998. index = 0
  999. def readline():
  1000. nonlocal index
  1001. if index == len(lines):
  1002. raise StopIteration
  1003. line = lines[index]
  1004. index += 1
  1005. return line
  1006. return readline
  1007. def test_no_bom_no_encoding_cookie(self):
  1008. lines = (
  1009. b'# something\n',
  1010. b'print(something)\n',
  1011. b'do_something(else)\n'
  1012. )
  1013. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1014. self.assertEqual(encoding, 'utf-8')
  1015. self.assertEqual(consumed_lines, list(lines[:2]))
  1016. def test_bom_no_cookie(self):
  1017. lines = (
  1018. b'\xef\xbb\xbf# something\n',
  1019. b'print(something)\n',
  1020. b'do_something(else)\n'
  1021. )
  1022. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1023. self.assertEqual(encoding, 'utf-8-sig')
  1024. self.assertEqual(consumed_lines,
  1025. [b'# something\n', b'print(something)\n'])
  1026. def test_cookie_first_line_no_bom(self):
  1027. lines = (
  1028. b'# -*- coding: latin-1 -*-\n',
  1029. b'print(something)\n',
  1030. b'do_something(else)\n'
  1031. )
  1032. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1033. self.assertEqual(encoding, 'iso-8859-1')
  1034. self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
  1035. def test_matched_bom_and_cookie_first_line(self):
  1036. lines = (
  1037. b'\xef\xbb\xbf# coding=utf-8\n',
  1038. b'print(something)\n',
  1039. b'do_something(else)\n'
  1040. )
  1041. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1042. self.assertEqual(encoding, 'utf-8-sig')
  1043. self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
  1044. def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
  1045. lines = (
  1046. b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
  1047. b'print(something)\n',
  1048. b'do_something(else)\n'
  1049. )
  1050. readline = self.get_readline(lines)
  1051. self.assertRaises(SyntaxError, detect_encoding, readline)
  1052. def test_cookie_second_line_no_bom(self):
  1053. lines = (
  1054. b'#! something\n',
  1055. b'# vim: set fileencoding=ascii :\n',
  1056. b'print(something)\n',
  1057. b'do_something(else)\n'
  1058. )
  1059. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1060. self.assertEqual(encoding, 'ascii')
  1061. expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
  1062. self.assertEqual(consumed_lines, expected)
  1063. def test_matched_bom_and_cookie_second_line(self):
  1064. lines = (
  1065. b'\xef\xbb\xbf#! something\n',
  1066. b'f# coding=utf-8\n',
  1067. b'print(something)\n',
  1068. b'do_something(else)\n'
  1069. )
  1070. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1071. self.assertEqual(encoding, 'utf-8-sig')
  1072. self.assertEqual(consumed_lines,
  1073. [b'#! something\n', b'f# coding=utf-8\n'])
  1074. def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
  1075. lines = (
  1076. b'\xef\xbb\xbf#! something\n',
  1077. b'# vim: set fileencoding=ascii :\n',
  1078. b'print(something)\n',
  1079. b'do_something(else)\n'
  1080. )
  1081. readline = self.get_readline(lines)
  1082. self.assertRaises(SyntaxError, detect_encoding, readline)
  1083. def test_cookie_second_line_noncommented_first_line(self):
  1084. lines = (
  1085. b"print('\xc2\xa3')\n",
  1086. b'# vim: set fileencoding=iso8859-15 :\n',
  1087. b"print('\xe2\x82\xac')\n"
  1088. )
  1089. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1090. self.assertEqual(encoding, 'utf-8')
  1091. expected = [b"print('\xc2\xa3')\n"]
  1092. self.assertEqual(consumed_lines, expected)
  1093. def test_cookie_second_line_commented_first_line(self):
  1094. lines = (
  1095. b"#print('\xc2\xa3')\n",
  1096. b'# vim: set fileencoding=iso8859-15 :\n',
  1097. b"print('\xe2\x82\xac')\n"
  1098. )
  1099. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1100. self.assertEqual(encoding, 'iso8859-15')
  1101. expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
  1102. self.assertEqual(consumed_lines, expected)
  1103. def test_cookie_second_line_empty_first_line(self):
  1104. lines = (
  1105. b'\n',
  1106. b'# vim: set fileencoding=iso8859-15 :\n',
  1107. b"print('\xe2\x82\xac')\n"
  1108. )
  1109. encoding, consumed_lines = detect_encoding(self.get_readline(lines))
  1110. self.assertEqual(encoding, 'iso8859-15')
  1111. expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
  1112. self.assertEqual(consumed_lines, expected)
  1113. def test_latin1_normalization(self):
  1114. # See get_normal_name() in tokenizer.c.
  1115. encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
  1116. "iso-8859-1-unix", "iso-latin-1-mac")
  1117. for encoding in encodings:
  1118. for rep in ("-", "_"):
  1119. enc = encoding.replace("-", rep)
  1120. lines = (b"#!/usr/bin/python\n",
  1121. b"# coding: " + enc.encode("ascii") + b"\n",
  1122. b"print(things)\n",
  1123. b"do_something += 4\n")
  1124. rl = self.get_readline(lines)
  1125. found, consumed_lines = detect_encoding(rl)
  1126. self.assertEqual(found, "iso-8859-1")
  1127. def test_syntaxerror_latin1(self):
  1128. # Issue 14629: need to raise SyntaxError if the first
  1129. # line(s) have non-UTF-8 characters
  1130. lines = (
  1131. b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
  1132. )
  1133. readline = self.get_readline(lines)
  1134. self.assertRaises(SyntaxError, detect_encoding, readline)
  1135. def test_utf8_normalization(self):
  1136. # See get_normal_name() in tokenizer.c.
  1137. encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
  1138. for encoding in encodings:
  1139. for rep in ("-", "_"):
  1140. enc = encoding.replace("-", rep)
  1141. lines = (b"#!/usr/bin/python\n",
  1142. b"# coding: " + enc.encode("ascii") + b"\n",
  1143. b"1 + 3\n")
  1144. rl = self.get_readline(lines)
  1145. found, consumed_lines = detect_encoding(rl)
  1146. self.assertEqual(found, "utf-8")
  1147. def test_short_files(self):
  1148. readline = self.get_readline((b'print(something)\n',))
  1149. encoding, consumed_lines = detect_encoding(readline)
  1150. self.assertEqual(encoding, 'utf-8')
  1151. self.assertEqual(consumed_lines, [b'print(something)\n'])
  1152. encoding, consumed_lines = detect_encoding(self.get_readline(()))
  1153. self.assertEqual(encoding, 'utf-8')
  1154. self.assertEqual(consumed_lines, [])
  1155. readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
  1156. encoding, consumed_lines = detect_encoding(readline)
  1157. self.assertEqual(encoding, 'utf-8-sig')
  1158. self.assertEqual(consumed_lines, [b'print(something)\n'])
  1159. readline = self.get_readline((b'\xef\xbb\xbf',))
  1160. encoding, consumed_lines = detect_encoding(readline)
  1161. self.assertEqual(encoding, 'utf-8-sig')
  1162. self.assertEqual(consumed_lines, [])
  1163. readline = self.get_readline((b'# coding: bad\n',))
  1164. self.assertRaises(SyntaxError, detect_encoding, readline)
  1165. def test_false_encoding(self):
  1166. # Issue 18873: "Encoding" detected in non-comment lines
  1167. readline = self.get_readline((b'print("#coding=fake")',))
  1168. encoding, consumed_lines = detect_encoding(readline)
  1169. self.assertEqual(encoding, 'utf-8')
  1170. self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
  1171. def test_open(self):
  1172. filename = os_helper.TESTFN + '.py'
  1173. self.addCleanup(os_helper.unlink, filename)
  1174. # test coding cookie
  1175. for encoding in ('iso-8859-15', 'utf-8'):
  1176. with open(filename, 'w', encoding=encoding) as fp:
  1177. print("# coding: %s" % encoding, file=fp)
  1178. print("print('euro:\u20ac')", file=fp)
  1179. with tokenize_open(filename) as fp:
  1180. self.assertEqual(fp.encoding, encoding)
  1181. self.assertEqual(fp.mode, 'r')
  1182. # test BOM (no coding cookie)
  1183. with open(filename, 'w', encoding='utf-8-sig') as fp:
  1184. print("print('euro:\u20ac')", file=fp)
  1185. with tokenize_open(filename) as fp:
  1186. self.assertEqual(fp.encoding, 'utf-8-sig')
  1187. self.assertEqual(fp.mode, 'r')
  1188. def test_filename_in_exception(self):
  1189. # When possible, include the file name in the exception.
  1190. path = 'some_file_path'
  1191. lines = (
  1192. b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
  1193. )
  1194. class Bunk:
  1195. def __init__(self, lines, path):
  1196. self.name = path
  1197. self._lines = lines
  1198. self._index = 0
  1199. def readline(self):
  1200. if self._index == len(lines):
  1201. raise StopIteration
  1202. line = lines[self._index]
  1203. self._index += 1
  1204. return line
  1205. with self.assertRaises(SyntaxError):
  1206. ins = Bunk(lines, path)
  1207. # Make sure lacking a name isn't an issue.
  1208. del ins.name
  1209. detect_encoding(ins.readline)
  1210. with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
  1211. ins = Bunk(lines, path)
  1212. detect_encoding(ins.readline)
  1213. def test_open_error(self):
  1214. # Issue #23840: open() must close the binary file on error
  1215. m = BytesIO(b'#coding:xxx')
  1216. with mock.patch('tokenize._builtin_open', return_value=m):
  1217. self.assertRaises(SyntaxError, tokenize_open, 'foobar')
  1218. self.assertTrue(m.closed)
  1219. class TestTokenize(TestCase):
  1220. def test_tokenize(self):
  1221. import tokenize as tokenize_module
  1222. encoding = object()
  1223. encoding_used = None
  1224. def mock_detect_encoding(readline):
  1225. return encoding, [b'first', b'second']
  1226. def mock__tokenize(readline, encoding):
  1227. nonlocal encoding_used
  1228. encoding_used = encoding
  1229. out = []
  1230. while True:
  1231. next_line = readline()
  1232. if next_line:
  1233. out.append(next_line)
  1234. continue
  1235. return out
  1236. counter = 0
  1237. def mock_readline():
  1238. nonlocal counter
  1239. counter += 1
  1240. if counter == 5:
  1241. return b''
  1242. return str(counter).encode()
  1243. orig_detect_encoding = tokenize_module.detect_encoding
  1244. orig__tokenize = tokenize_module._tokenize
  1245. tokenize_module.detect_encoding = mock_detect_encoding
  1246. tokenize_module._tokenize = mock__tokenize
  1247. try:
  1248. results = tokenize(mock_readline)
  1249. self.assertEqual(list(results),
  1250. [b'first', b'second', b'1', b'2', b'3', b'4'])
  1251. finally:
  1252. tokenize_module.detect_encoding = orig_detect_encoding
  1253. tokenize_module._tokenize = orig__tokenize
  1254. self.assertEqual(encoding_used, encoding)
  1255. def test_oneline_defs(self):
  1256. buf = []
  1257. for i in range(500):
  1258. buf.append('def i{i}(): return {i}'.format(i=i))
  1259. buf.append('OK')
  1260. buf = '\n'.join(buf)
  1261. # Test that 500 consequent, one-line defs is OK
  1262. toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
  1263. self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
  1264. # [-2] is always NEWLINE
  1265. def assertExactTypeEqual(self, opstr, *optypes):
  1266. tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
  1267. num_optypes = len(optypes)
  1268. self.assertEqual(len(tokens), 3 + num_optypes)
  1269. self.assertEqual(tok_name[tokens[0].exact_type],
  1270. tok_name[ENCODING])
  1271. for i in range(num_optypes):
  1272. self.assertEqual(tok_name[tokens[i + 1].exact_type],
  1273. tok_name[optypes[i]])
  1274. self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
  1275. tok_name[token.NEWLINE])
  1276. self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
  1277. tok_name[token.ENDMARKER])
  1278. def test_exact_type(self):
  1279. self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
  1280. self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
  1281. self.assertExactTypeEqual(':', token.COLON)
  1282. self.assertExactTypeEqual(',', token.COMMA)
  1283. self.assertExactTypeEqual(';', token.SEMI)
  1284. self.assertExactTypeEqual('+', token.PLUS)
  1285. self.assertExactTypeEqual('-', token.MINUS)
  1286. self.assertExactTypeEqual('*', token.STAR)
  1287. self.assertExactTypeEqual('/', token.SLASH)
  1288. self.assertExactTypeEqual('|', token.VBAR)
  1289. self.assertExactTypeEqual('&', token.AMPER)
  1290. self.assertExactTypeEqual('<', token.LESS)
  1291. self.assertExactTypeEqual('>', token.GREATER)
  1292. self.assertExactTypeEqual('=', token.EQUAL)
  1293. self.assertExactTypeEqual('.', token.DOT)
  1294. self.assertExactTypeEqual('%', token.PERCENT)
  1295. self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
  1296. self.assertExactTypeEqual('==', token.EQEQUAL)
  1297. self.assertExactTypeEqual('!=', token.NOTEQUAL)
  1298. self.assertExactTypeEqual('<=', token.LESSEQUAL)
  1299. self.assertExactTypeEqual('>=', token.GREATEREQUAL)
  1300. self.assertExactTypeEqual('~', token.TILDE)
  1301. self.assertExactTypeEqual('^', token.CIRCUMFLEX)
  1302. self.assertExactTypeEqual('<<', token.LEFTSHIFT)
  1303. self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
  1304. self.assertExactTypeEqual('**', token.DOUBLESTAR)
  1305. self.assertExactTypeEqual('+=', token.PLUSEQUAL)
  1306. self.assertExactTypeEqual('-=', token.MINEQUAL)
  1307. self.assertExactTypeEqual('*=', token.STAREQUAL)
  1308. self.assertExactTypeEqual('/=', token.SLASHEQUAL)
  1309. self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
  1310. self.assertExactTypeEqual('&=', token.AMPEREQUAL)
  1311. self.assertExactTypeEqual('|=', token.VBAREQUAL)
  1312. self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
  1313. self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
  1314. self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
  1315. self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
  1316. self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
  1317. self.assertExactTypeEqual('//', token.DOUBLESLASH)
  1318. self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
  1319. self.assertExactTypeEqual(':=', token.COLONEQUAL)
  1320. self.assertExactTypeEqual('...', token.ELLIPSIS)
  1321. self.assertExactTypeEqual('->', token.RARROW)
  1322. self.assertExactTypeEqual('@', token.AT)
  1323. self.assertExactTypeEqual('@=', token.ATEQUAL)
  1324. self.assertExactTypeEqual('a**2+b**2==c**2',
  1325. NAME, token.DOUBLESTAR, NUMBER,
  1326. token.PLUS,
  1327. NAME, token.DOUBLESTAR, NUMBER,
  1328. token.EQEQUAL,
  1329. NAME, token.DOUBLESTAR, NUMBER)
  1330. self.assertExactTypeEqual('{1, 2, 3}',
  1331. token.LBRACE,
  1332. token.NUMBER, token.COMMA,
  1333. token.NUMBER, token.COMMA,
  1334. token.NUMBER,
  1335. token.RBRACE)
  1336. self.assertExactTypeEqual('^(x & 0x1)',
  1337. token.CIRCUMFLEX,
  1338. token.LPAR,
  1339. token.NAME, token.AMPER, token.NUMBER,
  1340. token.RPAR)
  1341. def test_pathological_trailing_whitespace(self):
  1342. # See http://bugs.python.org/issue16152
  1343. self.assertExactTypeEqual('@ ', token.AT)
  1344. def test_comment_at_the_end_of_the_source_without_newline(self):
  1345. # See http://bugs.python.org/issue44667
  1346. source = 'b = 1\n\n#test'
  1347. expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
  1348. tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
  1349. self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
  1350. for i in range(6):
  1351. self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
  1352. self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
  1353. class UntokenizeTest(TestCase):
  1354. def test_bad_input_order(self):
  1355. # raise if previous row
  1356. u = Untokenizer()
  1357. u.prev_row = 2
  1358. u.prev_col = 2
  1359. with self.assertRaises(ValueError) as cm:
  1360. u.add_whitespace((1,3))
  1361. self.assertEqual(cm.exception.args[0],
  1362. 'start (1,3) precedes previous end (2,2)')
  1363. # raise if previous column in row
  1364. self.assertRaises(ValueError, u.add_whitespace, (2,1))
  1365. def test_backslash_continuation(self):
  1366. # The problem is that <whitespace>\<newline> leaves no token
  1367. u = Untokenizer()
  1368. u.prev_row = 1
  1369. u.prev_col = 1
  1370. u.tokens = []
  1371. u.add_whitespace((2, 0))
  1372. self.assertEqual(u.tokens, ['\\\n'])
  1373. u.prev_row = 2
  1374. u.add_whitespace((4, 4))
  1375. self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
  1376. TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
  1377. def test_iter_compat(self):
  1378. u = Untokenizer()
  1379. token = (NAME, 'Hello')
  1380. tokens = [(ENCODING, 'utf-8'), token]
  1381. u.compat(token, iter([]))
  1382. self.assertEqual(u.tokens, ["Hello "])
  1383. u = Untokenizer()
  1384. self.assertEqual(u.untokenize(iter([token])), 'Hello ')
  1385. u = Untokenizer()
  1386. self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
  1387. self.assertEqual(u.encoding, 'utf-8')
  1388. self.assertEqual(untokenize(iter(tokens)), b'Hello ')
  1389. class TestRoundtrip(TestCase):
  1390. def check_roundtrip(self, f):
  1391. """
  1392. Test roundtrip for `untokenize`. `f` is an open file or a string.
  1393. The source code in f is tokenized to both 5- and 2-tuples.
  1394. Both sequences are converted back to source code via
  1395. tokenize.untokenize(), and the latter tokenized again to 2-tuples.
  1396. The test fails if the 3 pair tokenizations do not match.
  1397. When untokenize bugs are fixed, untokenize with 5-tuples should
  1398. reproduce code that does not contain a backslash continuation
  1399. following spaces. A proper test should test this.
  1400. """
  1401. # Get source code and original tokenizations
  1402. if isinstance(f, str):
  1403. code = f.encode('utf-8')
  1404. else:
  1405. code = f.read()
  1406. f.close()
  1407. readline = iter(code.splitlines(keepends=True)).__next__
  1408. tokens5 = list(tokenize(readline))
  1409. tokens2 = [tok[:2] for tok in tokens5]
  1410. # Reproduce tokens2 from pairs
  1411. bytes_from2 = untokenize(tokens2)
  1412. readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
  1413. tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
  1414. self.assertEqual(tokens2_from2, tokens2)
  1415. # Reproduce tokens2 from 5-tuples
  1416. bytes_from5 = untokenize(tokens5)
  1417. readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
  1418. tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
  1419. self.assertEqual(tokens2_from5, tokens2)
  1420. def test_roundtrip(self):
  1421. # There are some standard formatting practices that are easy to get right.
  1422. self.check_roundtrip("if x == 1:\n"
  1423. " print(x)\n")
  1424. self.check_roundtrip("# This is a comment\n"
  1425. "# This also\n")
  1426. # Some people use different formatting conventions, which makes
  1427. # untokenize a little trickier. Note that this test involves trailing
  1428. # whitespace after the colon. Note that we use hex escapes to make the
  1429. # two trailing blanks apparent in the expected output.
  1430. self.check_roundtrip("if x == 1 : \n"
  1431. " print(x)\n")
  1432. fn = support.findfile("tokenize_tests.txt")
  1433. with open(fn, 'rb') as f:
  1434. self.check_roundtrip(f)
  1435. self.check_roundtrip("if x == 1:\n"
  1436. " # A comment by itself.\n"
  1437. " print(x) # Comment here, too.\n"
  1438. " # Another comment.\n"
  1439. "after_if = True\n")
  1440. self.check_roundtrip("if (x # The comments need to go in the right place\n"
  1441. " == 1):\n"
  1442. " print('x==1')\n")
  1443. self.check_roundtrip("class Test: # A comment here\n"
  1444. " # A comment with weird indent\n"
  1445. " after_com = 5\n"
  1446. " def x(m): return m*5 # a one liner\n"
  1447. " def y(m): # A whitespace after the colon\n"
  1448. " return y*4 # 3-space indent\n")
  1449. # Some error-handling code
  1450. self.check_roundtrip("try: import somemodule\n"
  1451. "except ImportError: # comment\n"
  1452. " print('Can not import' # comment2\n)"
  1453. "else: print('Loaded')\n")
  1454. def test_continuation(self):
  1455. # Balancing continuation
  1456. self.check_roundtrip("a = (3,4, \n"
  1457. "5,6)\n"
  1458. "y = [3, 4,\n"
  1459. "5]\n"
  1460. "z = {'a': 5,\n"
  1461. "'b':15, 'c':True}\n"
  1462. "x = len(y) + 5 - a[\n"
  1463. "3] - a[2]\n"
  1464. "+ len(z) - z[\n"
  1465. "'b']\n")
  1466. def test_backslash_continuation(self):
  1467. # Backslash means line continuation, except for comments
  1468. self.check_roundtrip("x=1+\\\n"
  1469. "1\n"
  1470. "# This is a comment\\\n"
  1471. "# This also\n")
  1472. self.check_roundtrip("# Comment \\\n"
  1473. "x = 0")
  1474. def test_string_concatenation(self):
  1475. # Two string literals on the same line
  1476. self.check_roundtrip("'' ''")
  1477. def test_random_files(self):
  1478. # Test roundtrip on random python modules.
  1479. # pass the '-ucpu' option to process the full directory.
  1480. import glob, random
  1481. fn = support.findfile("tokenize_tests.txt")
  1482. tempdir = os.path.dirname(fn) or os.curdir
  1483. testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
  1484. # Tokenize is broken on test_pep3131.py because regular expressions are
  1485. # broken on the obscure unicode identifiers in it. *sigh*
  1486. # With roundtrip extended to test the 5-tuple mode of untokenize,
  1487. # 7 more testfiles fail. Remove them also until the failure is diagnosed.
  1488. testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
  1489. for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
  1490. testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
  1491. if not support.is_resource_enabled("cpu"):
  1492. testfiles = random.sample(testfiles, 10)
  1493. for testfile in testfiles:
  1494. if support.verbose >= 2:
  1495. print('tokenize', testfile)
  1496. with open(testfile, 'rb') as f:
  1497. with self.subTest(file=testfile):
  1498. self.check_roundtrip(f)
  1499. def roundtrip(self, code):
  1500. if isinstance(code, str):
  1501. code = code.encode('utf-8')
  1502. return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
  1503. def test_indentation_semantics_retained(self):
  1504. """
  1505. Ensure that although whitespace might be mutated in a roundtrip,
  1506. the semantic meaning of the indentation remains consistent.
  1507. """
  1508. code = "if False:\n\tx=3\n\tx=3\n"
  1509. codelines = self.roundtrip(code).split('\n')
  1510. self.assertEqual(codelines[1], codelines[2])
  1511. self.check_roundtrip(code)
  1512. class CTokenizeTest(TestCase):
  1513. def check_tokenize(self, s, expected):
  1514. # Format the tokens in s in a table format.
  1515. # The ENDMARKER and final NEWLINE are omitted.
  1516. with self.subTest(source=s):
  1517. result = stringify_tokens_from_source(
  1518. _generate_tokens_from_c_tokenizer(s), s
  1519. )
  1520. self.assertEqual(result, expected.rstrip().splitlines())
  1521. def test_int(self):
  1522. self.check_tokenize('0xff <= 255', """\
  1523. NUMBER '0xff' (1, 0) (1, 4)
  1524. LESSEQUAL '<=' (1, 5) (1, 7)
  1525. NUMBER '255' (1, 8) (1, 11)
  1526. """)
  1527. self.check_tokenize('0b10 <= 255', """\
  1528. NUMBER '0b10' (1, 0) (1, 4)
  1529. LESSEQUAL '<=' (1, 5) (1, 7)
  1530. NUMBER '255' (1, 8) (1, 11)
  1531. """)
  1532. self.check_tokenize('0o123 <= 0O123', """\
  1533. NUMBER '0o123' (1, 0) (1, 5)
  1534. LESSEQUAL '<=' (1, 6) (1, 8)
  1535. NUMBER '0O123' (1, 9) (1, 14)
  1536. """)
  1537. self.check_tokenize('1234567 > ~0x15', """\
  1538. NUMBER '1234567' (1, 0) (1, 7)
  1539. GREATER '>' (1, 8) (1, 9)
  1540. TILDE '~' (1, 10) (1, 11)
  1541. NUMBER '0x15' (1, 11) (1, 15)
  1542. """)
  1543. self.check_tokenize('2134568 != 1231515', """\
  1544. NUMBER '2134568' (1, 0) (1, 7)
  1545. NOTEQUAL '!=' (1, 8) (1, 10)
  1546. NUMBER '1231515' (1, 11) (1, 18)
  1547. """)
  1548. self.check_tokenize('(-124561-1) & 200000000', """\
  1549. LPAR '(' (1, 0) (1, 1)
  1550. MINUS '-' (1, 1) (1, 2)
  1551. NUMBER '124561' (1, 2) (1, 8)
  1552. MINUS '-' (1, 8) (1, 9)
  1553. NUMBER '1' (1, 9) (1, 10)
  1554. RPAR ')' (1, 10) (1, 11)
  1555. AMPER '&' (1, 12) (1, 13)
  1556. NUMBER '200000000' (1, 14) (1, 23)
  1557. """)
  1558. self.check_tokenize('0xdeadbeef != -1', """\
  1559. NUMBER '0xdeadbeef' (1, 0) (1, 10)
  1560. NOTEQUAL '!=' (1, 11) (1, 13)
  1561. MINUS '-' (1, 14) (1, 15)
  1562. NUMBER '1' (1, 15) (1, 16)
  1563. """)
  1564. self.check_tokenize('0xdeadc0de & 12345', """\
  1565. NUMBER '0xdeadc0de' (1, 0) (1, 10)
  1566. AMPER '&' (1, 11) (1, 12)
  1567. NUMBER '12345' (1, 13) (1, 18)
  1568. """)
  1569. self.check_tokenize('0xFF & 0x15 | 1234', """\
  1570. NUMBER '0xFF' (1, 0) (1, 4)
  1571. AMPER '&' (1, 5) (1, 6)
  1572. NUMBER '0x15' (1, 7) (1, 11)
  1573. VBAR '|' (1, 12) (1, 13)
  1574. NUMBER '1234' (1, 14) (1, 18)
  1575. """)
  1576. def test_float(self):
  1577. self.check_tokenize('x = 3.14159', """\
  1578. NAME 'x' (1, 0) (1, 1)
  1579. EQUAL '=' (1, 2) (1, 3)
  1580. NUMBER '3.14159' (1, 4) (1, 11)
  1581. """)
  1582. self.check_tokenize('x = 314159.', """\
  1583. NAME 'x' (1, 0) (1, 1)
  1584. EQUAL '=' (1, 2) (1, 3)
  1585. NUMBER '314159.' (1, 4) (1, 11)
  1586. """)
  1587. self.check_tokenize('x = .314159', """\
  1588. NAME 'x' (1, 0) (1, 1)
  1589. EQUAL '=' (1, 2) (1, 3)
  1590. NUMBER '.314159' (1, 4) (1, 11)
  1591. """)
  1592. self.check_tokenize('x = 3e14159', """\
  1593. NAME 'x' (1, 0) (1, 1)
  1594. EQUAL '=' (1, 2) (1, 3)
  1595. NUMBER '3e14159' (1, 4) (1, 11)
  1596. """)
  1597. self.check_tokenize('x = 3E123', """\
  1598. NAME 'x' (1, 0) (1, 1)
  1599. EQUAL '=' (1, 2) (1, 3)
  1600. NUMBER '3E123' (1, 4) (1, 9)
  1601. """)
  1602. self.check_tokenize('x+y = 3e-1230', """\
  1603. NAME 'x' (1, 0) (1, 1)
  1604. PLUS '+' (1, 1) (1, 2)
  1605. NAME 'y' (1, 2) (1, 3)
  1606. EQUAL '=' (1, 4) (1, 5)
  1607. NUMBER '3e-1230' (1, 6) (1, 13)
  1608. """)
  1609. self.check_tokenize('x = 3.14e159', """\
  1610. NAME 'x' (1, 0) (1, 1)
  1611. EQUAL '=' (1, 2) (1, 3)
  1612. NUMBER '3.14e159' (1, 4) (1, 12)
  1613. """)
  1614. def test_string(self):
  1615. self.check_tokenize('x = \'\'; y = ""', """\
  1616. NAME 'x' (1, 0) (1, 1)
  1617. EQUAL '=' (1, 2) (1, 3)
  1618. STRING "''" (1, 4) (1, 6)
  1619. SEMI ';' (1, 6) (1, 7)
  1620. NAME 'y' (1, 8) (1, 9)
  1621. EQUAL '=' (1, 10) (1, 11)
  1622. STRING '""' (1, 12) (1, 14)
  1623. """)
  1624. self.check_tokenize('x = \'"\'; y = "\'"', """\
  1625. NAME 'x' (1, 0) (1, 1)
  1626. EQUAL '=' (1, 2) (1, 3)
  1627. STRING '\\'"\\'' (1, 4) (1, 7)
  1628. SEMI ';' (1, 7) (1, 8)
  1629. NAME 'y' (1, 9) (1, 10)
  1630. EQUAL '=' (1, 11) (1, 12)
  1631. STRING '"\\'"' (1, 13) (1, 16)
  1632. """)
  1633. self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
  1634. NAME 'x' (1, 0) (1, 1)
  1635. EQUAL '=' (1, 2) (1, 3)
  1636. STRING '"doesn\\'t "' (1, 4) (1, 14)
  1637. NAME 'shrink' (1, 14) (1, 20)
  1638. STRING '", does it"' (1, 20) (1, 31)
  1639. """)
  1640. self.check_tokenize("x = 'abc' + 'ABC'", """\
  1641. NAME 'x' (1, 0) (1, 1)
  1642. EQUAL '=' (1, 2) (1, 3)
  1643. STRING "'abc'" (1, 4) (1, 9)
  1644. PLUS '+' (1, 10) (1, 11)
  1645. STRING "'ABC'" (1, 12) (1, 17)
  1646. """)
  1647. self.check_tokenize('y = "ABC" + "ABC"', """\
  1648. NAME 'y' (1, 0) (1, 1)
  1649. EQUAL '=' (1, 2) (1, 3)
  1650. STRING '"ABC"' (1, 4) (1, 9)
  1651. PLUS '+' (1, 10) (1, 11)
  1652. STRING '"ABC"' (1, 12) (1, 17)
  1653. """)
  1654. self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
  1655. NAME 'x' (1, 0) (1, 1)
  1656. EQUAL '=' (1, 2) (1, 3)
  1657. STRING "r'abc'" (1, 4) (1, 10)
  1658. PLUS '+' (1, 11) (1, 12)
  1659. STRING "r'ABC'" (1, 13) (1, 19)
  1660. PLUS '+' (1, 20) (1, 21)
  1661. STRING "R'ABC'" (1, 22) (1, 28)
  1662. PLUS '+' (1, 29) (1, 30)
  1663. STRING "R'ABC'" (1, 31) (1, 37)
  1664. """)
  1665. self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
  1666. NAME 'y' (1, 0) (1, 1)
  1667. EQUAL '=' (1, 2) (1, 3)
  1668. STRING 'r"abc"' (1, 4) (1, 10)
  1669. PLUS '+' (1, 11) (1, 12)
  1670. STRING 'r"ABC"' (1, 13) (1, 19)
  1671. PLUS '+' (1, 20) (1, 21)
  1672. STRING 'R"ABC"' (1, 22) (1, 28)
  1673. PLUS '+' (1, 29) (1, 30)
  1674. STRING 'R"ABC"' (1, 31) (1, 37)
  1675. """)
  1676. self.check_tokenize("u'abc' + U'abc'", """\
  1677. STRING "u'abc'" (1, 0) (1, 6)
  1678. PLUS '+' (1, 7) (1, 8)
  1679. STRING "U'abc'" (1, 9) (1, 15)
  1680. """)
  1681. self.check_tokenize('u"abc" + U"abc"', """\
  1682. STRING 'u"abc"' (1, 0) (1, 6)
  1683. PLUS '+' (1, 7) (1, 8)
  1684. STRING 'U"abc"' (1, 9) (1, 15)
  1685. """)
  1686. self.check_tokenize("b'abc' + B'abc'", """\
  1687. STRING "b'abc'" (1, 0) (1, 6)
  1688. PLUS '+' (1, 7) (1, 8)
  1689. STRING "B'abc'" (1, 9) (1, 15)
  1690. """)
  1691. self.check_tokenize('b"abc" + B"abc"', """\
  1692. STRING 'b"abc"' (1, 0) (1, 6)
  1693. PLUS '+' (1, 7) (1, 8)
  1694. STRING 'B"abc"' (1, 9) (1, 15)
  1695. """)
  1696. self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
  1697. STRING "br'abc'" (1, 0) (1, 7)
  1698. PLUS '+' (1, 8) (1, 9)
  1699. STRING "bR'abc'" (1, 10) (1, 17)
  1700. PLUS '+' (1, 18) (1, 19)
  1701. STRING "Br'abc'" (1, 20) (1, 27)
  1702. PLUS '+' (1, 28) (1, 29)
  1703. STRING "BR'abc'" (1, 30) (1, 37)
  1704. """)
  1705. self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
  1706. STRING 'br"abc"' (1, 0) (1, 7)
  1707. PLUS '+' (1, 8) (1, 9)
  1708. STRING 'bR"abc"' (1, 10) (1, 17)
  1709. PLUS '+' (1, 18) (1, 19)
  1710. STRING 'Br"abc"' (1, 20) (1, 27)
  1711. PLUS '+' (1, 28) (1, 29)
  1712. STRING 'BR"abc"' (1, 30) (1, 37)
  1713. """)
  1714. self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
  1715. STRING "rb'abc'" (1, 0) (1, 7)
  1716. PLUS '+' (1, 8) (1, 9)
  1717. STRING "rB'abc'" (1, 10) (1, 17)
  1718. PLUS '+' (1, 18) (1, 19)
  1719. STRING "Rb'abc'" (1, 20) (1, 27)
  1720. PLUS '+' (1, 28) (1, 29)
  1721. STRING "RB'abc'" (1, 30) (1, 37)
  1722. """)
  1723. self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
  1724. STRING 'rb"abc"' (1, 0) (1, 7)
  1725. PLUS '+' (1, 8) (1, 9)
  1726. STRING 'rB"abc"' (1, 10) (1, 17)
  1727. PLUS '+' (1, 18) (1, 19)
  1728. STRING 'Rb"abc"' (1, 20) (1, 27)
  1729. PLUS '+' (1, 28) (1, 29)
  1730. STRING 'RB"abc"' (1, 30) (1, 37)
  1731. """)
  1732. self.check_tokenize('"a\\\nde\\\nfg"', """\
  1733. STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
  1734. """)
  1735. self.check_tokenize('u"a\\\nde"', """\
  1736. STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
  1737. """)
  1738. self.check_tokenize('rb"a\\\nd"', """\
  1739. STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
  1740. """)
  1741. self.check_tokenize(r'"""a\
  1742. b"""', """\
  1743. STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
  1744. """)
  1745. self.check_tokenize(r'u"""a\
  1746. b"""', """\
  1747. STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
  1748. """)
  1749. self.check_tokenize(r'rb"""a\
  1750. b\
  1751. c"""', """\
  1752. STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
  1753. """)
  1754. self.check_tokenize('f"abc"', """\
  1755. STRING 'f"abc"' (1, 0) (1, 6)
  1756. """)
  1757. self.check_tokenize('fR"a{b}c"', """\
  1758. STRING 'fR"a{b}c"' (1, 0) (1, 9)
  1759. """)
  1760. self.check_tokenize('f"""abc"""', """\
  1761. STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
  1762. """)
  1763. self.check_tokenize(r'f"abc\
  1764. def"', """\
  1765. STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
  1766. """)
  1767. self.check_tokenize(r'Rf"abc\
  1768. def"', """\
  1769. STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
  1770. """)
  1771. def test_function(self):
  1772. self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
  1773. NAME 'def' (1, 0) (1, 3)
  1774. NAME 'd22' (1, 4) (1, 7)
  1775. LPAR '(' (1, 7) (1, 8)
  1776. NAME 'a' (1, 8) (1, 9)
  1777. COMMA ',' (1, 9) (1, 10)
  1778. NAME 'b' (1, 11) (1, 12)
  1779. COMMA ',' (1, 12) (1, 13)
  1780. NAME 'c' (1, 14) (1, 15)
  1781. EQUAL '=' (1, 15) (1, 16)
  1782. NUMBER '2' (1, 16) (1, 17)
  1783. COMMA ',' (1, 17) (1, 18)
  1784. NAME 'd' (1, 19) (1, 20)
  1785. EQUAL '=' (1, 20) (1, 21)
  1786. NUMBER '2' (1, 21) (1, 22)
  1787. COMMA ',' (1, 22) (1, 23)
  1788. STAR '*' (1, 24) (1, 25)
  1789. NAME 'k' (1, 25) (1, 26)
  1790. RPAR ')' (1, 26) (1, 27)
  1791. COLON ':' (1, 27) (1, 28)
  1792. NAME 'pass' (1, 29) (1, 33)
  1793. """)
  1794. self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
  1795. NAME 'def' (1, 0) (1, 3)
  1796. NAME 'd01v_' (1, 4) (1, 9)
  1797. LPAR '(' (1, 9) (1, 10)
  1798. NAME 'a' (1, 10) (1, 11)
  1799. EQUAL '=' (1, 11) (1, 12)
  1800. NUMBER '1' (1, 12) (1, 13)
  1801. COMMA ',' (1, 13) (1, 14)
  1802. STAR '*' (1, 15) (1, 16)
  1803. NAME 'k' (1, 16) (1, 17)
  1804. COMMA ',' (1, 17) (1, 18)
  1805. DOUBLESTAR '**' (1, 19) (1, 21)
  1806. NAME 'w' (1, 21) (1, 22)
  1807. RPAR ')' (1, 22) (1, 23)
  1808. COLON ':' (1, 23) (1, 24)
  1809. NAME 'pass' (1, 25) (1, 29)
  1810. """)
  1811. self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
  1812. NAME 'def' (1, 0) (1, 3)
  1813. NAME 'd23' (1, 4) (1, 7)
  1814. LPAR '(' (1, 7) (1, 8)
  1815. NAME 'a' (1, 8) (1, 9)
  1816. COLON ':' (1, 9) (1, 10)
  1817. NAME 'str' (1, 11) (1, 14)
  1818. COMMA ',' (1, 14) (1, 15)
  1819. NAME 'b' (1, 16) (1, 17)
  1820. COLON ':' (1, 17) (1, 18)
  1821. NAME 'int' (1, 19) (1, 22)
  1822. EQUAL '=' (1, 22) (1, 23)
  1823. NUMBER '3' (1, 23) (1, 24)
  1824. RPAR ')' (1, 24) (1, 25)
  1825. RARROW '->' (1, 26) (1, 28)
  1826. NAME 'int' (1, 29) (1, 32)
  1827. COLON ':' (1, 32) (1, 33)
  1828. NAME 'pass' (1, 34) (1, 38)
  1829. """)
  1830. def test_comparison(self):
  1831. self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
  1832. "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
  1833. NAME 'if' (1, 0) (1, 2)
  1834. NUMBER '1' (1, 3) (1, 4)
  1835. LESS '<' (1, 5) (1, 6)
  1836. NUMBER '1' (1, 7) (1, 8)
  1837. GREATER '>' (1, 9) (1, 10)
  1838. NUMBER '1' (1, 11) (1, 12)
  1839. EQEQUAL '==' (1, 13) (1, 15)
  1840. NUMBER '1' (1, 16) (1, 17)
  1841. GREATEREQUAL '>=' (1, 18) (1, 20)
  1842. NUMBER '5' (1, 21) (1, 22)
  1843. LESSEQUAL '<=' (1, 23) (1, 25)
  1844. NUMBER '0x15' (1, 26) (1, 30)
  1845. LESSEQUAL '<=' (1, 31) (1, 33)
  1846. NUMBER '0x12' (1, 34) (1, 38)
  1847. NOTEQUAL '!=' (1, 39) (1, 41)
  1848. NUMBER '1' (1, 42) (1, 43)
  1849. NAME 'and' (1, 44) (1, 47)
  1850. NUMBER '5' (1, 48) (1, 49)
  1851. NAME 'in' (1, 50) (1, 52)
  1852. NUMBER '1' (1, 53) (1, 54)
  1853. NAME 'not' (1, 55) (1, 58)
  1854. NAME 'in' (1, 59) (1, 61)
  1855. NUMBER '1' (1, 62) (1, 63)
  1856. NAME 'is' (1, 64) (1, 66)
  1857. NUMBER '1' (1, 67) (1, 68)
  1858. NAME 'or' (1, 69) (1, 71)
  1859. NUMBER '5' (1, 72) (1, 73)
  1860. NAME 'is' (1, 74) (1, 76)
  1861. NAME 'not' (1, 77) (1, 80)
  1862. NUMBER '1' (1, 81) (1, 82)
  1863. COLON ':' (1, 82) (1, 83)
  1864. NAME 'pass' (1, 84) (1, 88)
  1865. """)
  1866. def test_additive(self):
  1867. self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
  1868. NAME 'x' (1, 0) (1, 1)
  1869. EQUAL '=' (1, 2) (1, 3)
  1870. NUMBER '1' (1, 4) (1, 5)
  1871. MINUS '-' (1, 6) (1, 7)
  1872. NAME 'y' (1, 8) (1, 9)
  1873. PLUS '+' (1, 10) (1, 11)
  1874. NUMBER '15' (1, 12) (1, 14)
  1875. MINUS '-' (1, 15) (1, 16)
  1876. NUMBER '1' (1, 17) (1, 18)
  1877. PLUS '+' (1, 19) (1, 20)
  1878. NUMBER '0x124' (1, 21) (1, 26)
  1879. PLUS '+' (1, 27) (1, 28)
  1880. NAME 'z' (1, 29) (1, 30)
  1881. PLUS '+' (1, 31) (1, 32)
  1882. NAME 'a' (1, 33) (1, 34)
  1883. LSQB '[' (1, 34) (1, 35)
  1884. NUMBER '5' (1, 35) (1, 36)
  1885. RSQB ']' (1, 36) (1, 37)
  1886. """)
  1887. def test_multiplicative(self):
  1888. self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
  1889. NAME 'x' (1, 0) (1, 1)
  1890. EQUAL '=' (1, 2) (1, 3)
  1891. NUMBER '1' (1, 4) (1, 5)
  1892. DOUBLESLASH '//' (1, 5) (1, 7)
  1893. NUMBER '1' (1, 7) (1, 8)
  1894. STAR '*' (1, 8) (1, 9)
  1895. NUMBER '1' (1, 9) (1, 10)
  1896. SLASH '/' (1, 10) (1, 11)
  1897. NUMBER '5' (1, 11) (1, 12)
  1898. STAR '*' (1, 12) (1, 13)
  1899. NUMBER '12' (1, 13) (1, 15)
  1900. PERCENT '%' (1, 15) (1, 16)
  1901. NUMBER '0x12' (1, 16) (1, 20)
  1902. AT '@' (1, 20) (1, 21)
  1903. NUMBER '42' (1, 21) (1, 23)
  1904. """)
  1905. def test_unary(self):
  1906. self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
  1907. TILDE '~' (1, 0) (1, 1)
  1908. NUMBER '1' (1, 1) (1, 2)
  1909. CIRCUMFLEX '^' (1, 3) (1, 4)
  1910. NUMBER '1' (1, 5) (1, 6)
  1911. AMPER '&' (1, 7) (1, 8)
  1912. NUMBER '1' (1, 9) (1, 10)
  1913. VBAR '|' (1, 11) (1, 12)
  1914. NUMBER '1' (1, 12) (1, 13)
  1915. CIRCUMFLEX '^' (1, 14) (1, 15)
  1916. MINUS '-' (1, 16) (1, 17)
  1917. NUMBER '1' (1, 17) (1, 18)
  1918. """)
  1919. self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
  1920. MINUS '-' (1, 0) (1, 1)
  1921. NUMBER '1' (1, 1) (1, 2)
  1922. STAR '*' (1, 2) (1, 3)
  1923. NUMBER '1' (1, 3) (1, 4)
  1924. SLASH '/' (1, 4) (1, 5)
  1925. NUMBER '1' (1, 5) (1, 6)
  1926. PLUS '+' (1, 6) (1, 7)
  1927. NUMBER '1' (1, 7) (1, 8)
  1928. STAR '*' (1, 8) (1, 9)
  1929. NUMBER '1' (1, 9) (1, 10)
  1930. DOUBLESLASH '//' (1, 10) (1, 12)
  1931. NUMBER '1' (1, 12) (1, 13)
  1932. MINUS '-' (1, 14) (1, 15)
  1933. MINUS '-' (1, 16) (1, 17)
  1934. MINUS '-' (1, 17) (1, 18)
  1935. MINUS '-' (1, 18) (1, 19)
  1936. NUMBER '1' (1, 19) (1, 20)
  1937. DOUBLESTAR '**' (1, 20) (1, 22)
  1938. NUMBER '1' (1, 22) (1, 23)
  1939. """)
  1940. def test_selector(self):
  1941. self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
  1942. NAME 'import' (1, 0) (1, 6)
  1943. NAME 'sys' (1, 7) (1, 10)
  1944. COMMA ',' (1, 10) (1, 11)
  1945. NAME 'time' (1, 12) (1, 16)
  1946. NEWLINE '' (1, 16) (1, 16)
  1947. NAME 'x' (2, 0) (2, 1)
  1948. EQUAL '=' (2, 2) (2, 3)
  1949. NAME 'sys' (2, 4) (2, 7)
  1950. DOT '.' (2, 7) (2, 8)
  1951. NAME 'modules' (2, 8) (2, 15)
  1952. LSQB '[' (2, 15) (2, 16)
  1953. STRING "'time'" (2, 16) (2, 22)
  1954. RSQB ']' (2, 22) (2, 23)
  1955. DOT '.' (2, 23) (2, 24)
  1956. NAME 'time' (2, 24) (2, 28)
  1957. LPAR '(' (2, 28) (2, 29)
  1958. RPAR ')' (2, 29) (2, 30)
  1959. """)
  1960. def test_method(self):
  1961. self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
  1962. AT '@' (1, 0) (1, 1)
  1963. NAME 'staticmethod' (1, 1) (1, 13)
  1964. NEWLINE '' (1, 13) (1, 13)
  1965. NAME 'def' (2, 0) (2, 3)
  1966. NAME 'foo' (2, 4) (2, 7)
  1967. LPAR '(' (2, 7) (2, 8)
  1968. NAME 'x' (2, 8) (2, 9)
  1969. COMMA ',' (2, 9) (2, 10)
  1970. NAME 'y' (2, 10) (2, 11)
  1971. RPAR ')' (2, 11) (2, 12)
  1972. COLON ':' (2, 12) (2, 13)
  1973. NAME 'pass' (2, 14) (2, 18)
  1974. """)
  1975. def test_tabs(self):
  1976. self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
  1977. AT '@' (1, 0) (1, 1)
  1978. NAME 'staticmethod' (1, 1) (1, 13)
  1979. NEWLINE '' (1, 13) (1, 13)
  1980. NAME 'def' (2, 0) (2, 3)
  1981. NAME 'foo' (2, 4) (2, 7)
  1982. LPAR '(' (2, 7) (2, 8)
  1983. NAME 'x' (2, 8) (2, 9)
  1984. COMMA ',' (2, 9) (2, 10)
  1985. NAME 'y' (2, 10) (2, 11)
  1986. RPAR ')' (2, 11) (2, 12)
  1987. COLON ':' (2, 12) (2, 13)
  1988. NAME 'pass' (2, 14) (2, 18)
  1989. """)
  1990. def test_async(self):
  1991. self.check_tokenize('async = 1', """\
  1992. ASYNC 'async' (1, 0) (1, 5)
  1993. EQUAL '=' (1, 6) (1, 7)
  1994. NUMBER '1' (1, 8) (1, 9)
  1995. """)
  1996. self.check_tokenize('a = (async = 1)', """\
  1997. NAME 'a' (1, 0) (1, 1)
  1998. EQUAL '=' (1, 2) (1, 3)
  1999. LPAR '(' (1, 4) (1, 5)
  2000. ASYNC 'async' (1, 5) (1, 10)
  2001. EQUAL '=' (1, 11) (1, 12)
  2002. NUMBER '1' (1, 13) (1, 14)
  2003. RPAR ')' (1, 14) (1, 15)
  2004. """)
  2005. self.check_tokenize('async()', """\
  2006. ASYNC 'async' (1, 0) (1, 5)
  2007. LPAR '(' (1, 5) (1, 6)
  2008. RPAR ')' (1, 6) (1, 7)
  2009. """)
  2010. self.check_tokenize('class async(Bar):pass', """\
  2011. NAME 'class' (1, 0) (1, 5)
  2012. ASYNC 'async' (1, 6) (1, 11)
  2013. LPAR '(' (1, 11) (1, 12)
  2014. NAME 'Bar' (1, 12) (1, 15)
  2015. RPAR ')' (1, 15) (1, 16)
  2016. COLON ':' (1, 16) (1, 17)
  2017. NAME 'pass' (1, 17) (1, 21)
  2018. """)
  2019. self.check_tokenize('class async:pass', """\
  2020. NAME 'class' (1, 0) (1, 5)
  2021. ASYNC 'async' (1, 6) (1, 11)
  2022. COLON ':' (1, 11) (1, 12)
  2023. NAME 'pass' (1, 12) (1, 16)
  2024. """)
  2025. self.check_tokenize('await = 1', """\
  2026. AWAIT 'await' (1, 0) (1, 5)
  2027. EQUAL '=' (1, 6) (1, 7)
  2028. NUMBER '1' (1, 8) (1, 9)
  2029. """)
  2030. self.check_tokenize('foo.async', """\
  2031. NAME 'foo' (1, 0) (1, 3)
  2032. DOT '.' (1, 3) (1, 4)
  2033. ASYNC 'async' (1, 4) (1, 9)
  2034. """)
  2035. self.check_tokenize('async for a in b: pass', """\
  2036. ASYNC 'async' (1, 0) (1, 5)
  2037. NAME 'for' (1, 6) (1, 9)
  2038. NAME 'a' (1, 10) (1, 11)
  2039. NAME 'in' (1, 12) (1, 14)
  2040. NAME 'b' (1, 15) (1, 16)
  2041. COLON ':' (1, 16) (1, 17)
  2042. NAME 'pass' (1, 18) (1, 22)
  2043. """)
  2044. self.check_tokenize('async with a as b: pass', """\
  2045. ASYNC 'async' (1, 0) (1, 5)
  2046. NAME 'with' (1, 6) (1, 10)
  2047. NAME 'a' (1, 11) (1, 12)
  2048. NAME 'as' (1, 13) (1, 15)
  2049. NAME 'b' (1, 16) (1, 17)
  2050. COLON ':' (1, 17) (1, 18)
  2051. NAME 'pass' (1, 19) (1, 23)
  2052. """)
  2053. self.check_tokenize('async.foo', """\
  2054. ASYNC 'async' (1, 0) (1, 5)
  2055. DOT '.' (1, 5) (1, 6)
  2056. NAME 'foo' (1, 6) (1, 9)
  2057. """)
  2058. self.check_tokenize('async', """\
  2059. ASYNC 'async' (1, 0) (1, 5)
  2060. """)
  2061. self.check_tokenize('async\n#comment\nawait', """\
  2062. ASYNC 'async' (1, 0) (1, 5)
  2063. NEWLINE '' (1, 5) (1, 5)
  2064. AWAIT 'await' (3, 0) (3, 5)
  2065. """)
  2066. self.check_tokenize('async\n...\nawait', """\
  2067. ASYNC 'async' (1, 0) (1, 5)
  2068. NEWLINE '' (1, 5) (1, 5)
  2069. ELLIPSIS '...' (2, 0) (2, 3)
  2070. NEWLINE '' (2, 3) (2, 3)
  2071. AWAIT 'await' (3, 0) (3, 5)
  2072. """)
  2073. self.check_tokenize('async\nawait', """\
  2074. ASYNC 'async' (1, 0) (1, 5)
  2075. NEWLINE '' (1, 5) (1, 5)
  2076. AWAIT 'await' (2, 0) (2, 5)
  2077. """)
  2078. self.check_tokenize('foo.async + 1', """\
  2079. NAME 'foo' (1, 0) (1, 3)
  2080. DOT '.' (1, 3) (1, 4)
  2081. ASYNC 'async' (1, 4) (1, 9)
  2082. PLUS '+' (1, 10) (1, 11)
  2083. NUMBER '1' (1, 12) (1, 13)
  2084. """)
  2085. self.check_tokenize('async def foo(): pass', """\
  2086. ASYNC 'async' (1, 0) (1, 5)
  2087. NAME 'def' (1, 6) (1, 9)
  2088. NAME 'foo' (1, 10) (1, 13)
  2089. LPAR '(' (1, 13) (1, 14)
  2090. RPAR ')' (1, 14) (1, 15)
  2091. COLON ':' (1, 15) (1, 16)
  2092. NAME 'pass' (1, 17) (1, 21)
  2093. """)
  2094. self.check_tokenize('''\
  2095. async def foo():
  2096. def foo(await):
  2097. await = 1
  2098. if 1:
  2099. await
  2100. async += 1
  2101. ''', """\
  2102. ASYNC 'async' (1, 0) (1, 5)
  2103. NAME 'def' (1, 6) (1, 9)
  2104. NAME 'foo' (1, 10) (1, 13)
  2105. LPAR '(' (1, 13) (1, 14)
  2106. RPAR ')' (1, 14) (1, 15)
  2107. COLON ':' (1, 15) (1, 16)
  2108. NEWLINE '' (1, 16) (1, 16)
  2109. INDENT '' (2, -1) (2, -1)
  2110. NAME 'def' (2, 2) (2, 5)
  2111. NAME 'foo' (2, 6) (2, 9)
  2112. LPAR '(' (2, 9) (2, 10)
  2113. AWAIT 'await' (2, 10) (2, 15)
  2114. RPAR ')' (2, 15) (2, 16)
  2115. COLON ':' (2, 16) (2, 17)
  2116. NEWLINE '' (2, 17) (2, 17)
  2117. INDENT '' (3, -1) (3, -1)
  2118. AWAIT 'await' (3, 4) (3, 9)
  2119. EQUAL '=' (3, 10) (3, 11)
  2120. NUMBER '1' (3, 12) (3, 13)
  2121. NEWLINE '' (3, 13) (3, 13)
  2122. DEDENT '' (4, -1) (4, -1)
  2123. NAME 'if' (4, 2) (4, 4)
  2124. NUMBER '1' (4, 5) (4, 6)
  2125. COLON ':' (4, 6) (4, 7)
  2126. NEWLINE '' (4, 7) (4, 7)
  2127. INDENT '' (5, -1) (5, -1)
  2128. AWAIT 'await' (5, 4) (5, 9)
  2129. NEWLINE '' (5, 9) (5, 9)
  2130. DEDENT '' (6, -1) (6, -1)
  2131. DEDENT '' (6, -1) (6, -1)
  2132. ASYNC 'async' (6, 0) (6, 5)
  2133. PLUSEQUAL '+=' (6, 6) (6, 8)
  2134. NUMBER '1' (6, 9) (6, 10)
  2135. NEWLINE '' (6, 10) (6, 10)
  2136. """)
  2137. self.check_tokenize('async def foo():\n async for i in 1: pass', """\
  2138. ASYNC 'async' (1, 0) (1, 5)
  2139. NAME 'def' (1, 6) (1, 9)
  2140. NAME 'foo' (1, 10) (1, 13)
  2141. LPAR '(' (1, 13) (1, 14)
  2142. RPAR ')' (1, 14) (1, 15)
  2143. COLON ':' (1, 15) (1, 16)
  2144. NEWLINE '' (1, 16) (1, 16)
  2145. INDENT '' (2, -1) (2, -1)
  2146. ASYNC 'async' (2, 2) (2, 7)
  2147. NAME 'for' (2, 8) (2, 11)
  2148. NAME 'i' (2, 12) (2, 13)
  2149. NAME 'in' (2, 14) (2, 16)
  2150. NUMBER '1' (2, 17) (2, 18)
  2151. COLON ':' (2, 18) (2, 19)
  2152. NAME 'pass' (2, 20) (2, 24)
  2153. DEDENT '' (2, -1) (2, -1)
  2154. """)
  2155. self.check_tokenize('async def foo(async): await', """\
  2156. ASYNC 'async' (1, 0) (1, 5)
  2157. NAME 'def' (1, 6) (1, 9)
  2158. NAME 'foo' (1, 10) (1, 13)
  2159. LPAR '(' (1, 13) (1, 14)
  2160. ASYNC 'async' (1, 14) (1, 19)
  2161. RPAR ')' (1, 19) (1, 20)
  2162. COLON ':' (1, 20) (1, 21)
  2163. AWAIT 'await' (1, 22) (1, 27)
  2164. """)
  2165. self.check_tokenize('''\
  2166. def f():
  2167. def baz(): pass
  2168. async def bar(): pass
  2169. await = 2''', """\
  2170. NAME 'def' (1, 0) (1, 3)
  2171. NAME 'f' (1, 4) (1, 5)
  2172. LPAR '(' (1, 5) (1, 6)
  2173. RPAR ')' (1, 6) (1, 7)
  2174. COLON ':' (1, 7) (1, 8)
  2175. NEWLINE '' (1, 8) (1, 8)
  2176. INDENT '' (3, -1) (3, -1)
  2177. NAME 'def' (3, 2) (3, 5)
  2178. NAME 'baz' (3, 6) (3, 9)
  2179. LPAR '(' (3, 9) (3, 10)
  2180. RPAR ')' (3, 10) (3, 11)
  2181. COLON ':' (3, 11) (3, 12)
  2182. NAME 'pass' (3, 13) (3, 17)
  2183. NEWLINE '' (3, 17) (3, 17)
  2184. ASYNC 'async' (4, 2) (4, 7)
  2185. NAME 'def' (4, 8) (4, 11)
  2186. NAME 'bar' (4, 12) (4, 15)
  2187. LPAR '(' (4, 15) (4, 16)
  2188. RPAR ')' (4, 16) (4, 17)
  2189. COLON ':' (4, 17) (4, 18)
  2190. NAME 'pass' (4, 19) (4, 23)
  2191. NEWLINE '' (4, 23) (4, 23)
  2192. AWAIT 'await' (6, 2) (6, 7)
  2193. EQUAL '=' (6, 8) (6, 9)
  2194. NUMBER '2' (6, 10) (6, 11)
  2195. DEDENT '' (6, -1) (6, -1)
  2196. """)
  2197. self.check_tokenize('''\
  2198. async def f():
  2199. def baz(): pass
  2200. async def bar(): pass
  2201. await = 2''', """\
  2202. ASYNC 'async' (1, 0) (1, 5)
  2203. NAME 'def' (1, 6) (1, 9)
  2204. NAME 'f' (1, 10) (1, 11)
  2205. LPAR '(' (1, 11) (1, 12)
  2206. RPAR ')' (1, 12) (1, 13)
  2207. COLON ':' (1, 13) (1, 14)
  2208. NEWLINE '' (1, 14) (1, 14)
  2209. INDENT '' (3, -1) (3, -1)
  2210. NAME 'def' (3, 2) (3, 5)
  2211. NAME 'baz' (3, 6) (3, 9)
  2212. LPAR '(' (3, 9) (3, 10)
  2213. RPAR ')' (3, 10) (3, 11)
  2214. COLON ':' (3, 11) (3, 12)
  2215. NAME 'pass' (3, 13) (3, 17)
  2216. NEWLINE '' (3, 17) (3, 17)
  2217. ASYNC 'async' (4, 2) (4, 7)
  2218. NAME 'def' (4, 8) (4, 11)
  2219. NAME 'bar' (4, 12) (4, 15)
  2220. LPAR '(' (4, 15) (4, 16)
  2221. RPAR ')' (4, 16) (4, 17)
  2222. COLON ':' (4, 17) (4, 18)
  2223. NAME 'pass' (4, 19) (4, 23)
  2224. NEWLINE '' (4, 23) (4, 23)
  2225. AWAIT 'await' (6, 2) (6, 7)
  2226. EQUAL '=' (6, 8) (6, 9)
  2227. NUMBER '2' (6, 10) (6, 11)
  2228. DEDENT '' (6, -1) (6, -1)
  2229. """)
  2230. def test_unicode(self):
  2231. self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
  2232. NAME 'Örter' (1, 0) (1, 6)
  2233. EQUAL '=' (1, 7) (1, 8)
  2234. STRING "u'places'" (1, 9) (1, 18)
  2235. NEWLINE '' (1, 18) (1, 18)
  2236. NAME 'grün' (2, 0) (2, 5)
  2237. EQUAL '=' (2, 6) (2, 7)
  2238. STRING "U'green'" (2, 8) (2, 16)
  2239. """)
  2240. def test_invalid_syntax(self):
  2241. def get_tokens(string):
  2242. return list(_generate_tokens_from_c_tokenizer(string))
  2243. self.assertRaises(SyntaxError, get_tokens, "(1+2]")
  2244. self.assertRaises(SyntaxError, get_tokens, "(1+2}")
  2245. self.assertRaises(SyntaxError, get_tokens, "{1+2]")
  2246. self.assertRaises(SyntaxError, get_tokens, "1_")
  2247. self.assertRaises(SyntaxError, get_tokens, "1.2_")
  2248. self.assertRaises(SyntaxError, get_tokens, "1e2_")
  2249. self.assertRaises(SyntaxError, get_tokens, "1e+")
  2250. self.assertRaises(SyntaxError, get_tokens, "\xa0")
  2251. self.assertRaises(SyntaxError, get_tokens, "€")
  2252. self.assertRaises(SyntaxError, get_tokens, "0b12")
  2253. self.assertRaises(SyntaxError, get_tokens, "0b1_2")
  2254. self.assertRaises(SyntaxError, get_tokens, "0b2")
  2255. self.assertRaises(SyntaxError, get_tokens, "0b1_")
  2256. self.assertRaises(SyntaxError, get_tokens, "0b")
  2257. self.assertRaises(SyntaxError, get_tokens, "0o18")
  2258. self.assertRaises(SyntaxError, get_tokens, "0o1_8")
  2259. self.assertRaises(SyntaxError, get_tokens, "0o8")
  2260. self.assertRaises(SyntaxError, get_tokens, "0o1_")
  2261. self.assertRaises(SyntaxError, get_tokens, "0o")
  2262. self.assertRaises(SyntaxError, get_tokens, "0x1_")
  2263. self.assertRaises(SyntaxError, get_tokens, "0x")
  2264. self.assertRaises(SyntaxError, get_tokens, "1_")
  2265. self.assertRaises(SyntaxError, get_tokens, "012")
  2266. self.assertRaises(SyntaxError, get_tokens, "1.2_")
  2267. self.assertRaises(SyntaxError, get_tokens, "1e2_")
  2268. self.assertRaises(SyntaxError, get_tokens, "1e+")
  2269. self.assertRaises(SyntaxError, get_tokens, "'sdfsdf")
  2270. self.assertRaises(SyntaxError, get_tokens, "'''sdfsdf''")
  2271. self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
  2272. self.assertRaises(SyntaxError, get_tokens, "]")
  2273. def test_max_indent(self):
  2274. MAXINDENT = 100
  2275. def generate_source(indents):
  2276. source = ''.join((' ' * x) + 'if True:\n' for x in range(indents))
  2277. source += ' ' * indents + 'pass\n'
  2278. return source
  2279. valid = generate_source(MAXINDENT - 1)
  2280. tokens = list(_generate_tokens_from_c_tokenizer(valid))
  2281. self.assertEqual(tokens[-1].type, DEDENT)
  2282. compile(valid, "<string>", "exec")
  2283. invalid = generate_source(MAXINDENT)
  2284. tokens = list(_generate_tokens_from_c_tokenizer(invalid))
  2285. self.assertEqual(tokens[-1].type, NEWLINE)
  2286. self.assertRaises(
  2287. IndentationError, compile, invalid, "<string>", "exec"
  2288. )
  2289. def test_continuation_lines_indentation(self):
  2290. def get_tokens(string):
  2291. return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)]
  2292. code = dedent("""
  2293. def fib(n):
  2294. \\
  2295. '''Print a Fibonacci series up to n.'''
  2296. \\
  2297. a, b = 0, 1
  2298. """)
  2299. self.check_tokenize(code, """\
  2300. NAME 'def' (2, 0) (2, 3)
  2301. NAME 'fib' (2, 4) (2, 7)
  2302. LPAR '(' (2, 7) (2, 8)
  2303. NAME 'n' (2, 8) (2, 9)
  2304. RPAR ')' (2, 9) (2, 10)
  2305. COLON ':' (2, 10) (2, 11)
  2306. NEWLINE '' (2, 11) (2, 11)
  2307. INDENT '' (4, -1) (4, -1)
  2308. STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
  2309. NEWLINE '' (4, 39) (4, 39)
  2310. NAME 'a' (6, 0) (6, 1)
  2311. COMMA ',' (6, 1) (6, 2)
  2312. NAME 'b' (6, 3) (6, 4)
  2313. EQUAL '=' (6, 5) (6, 6)
  2314. NUMBER '0' (6, 7) (6, 8)
  2315. COMMA ',' (6, 8) (6, 9)
  2316. NUMBER '1' (6, 10) (6, 11)
  2317. NEWLINE '' (6, 11) (6, 11)
  2318. DEDENT '' (6, -1) (6, -1)
  2319. """)
  2320. code_no_cont = dedent("""
  2321. def fib(n):
  2322. '''Print a Fibonacci series up to n.'''
  2323. a, b = 0, 1
  2324. """)
  2325. self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
  2326. code = dedent("""
  2327. pass
  2328. \\
  2329. pass
  2330. """)
  2331. self.check_tokenize(code, """\
  2332. NAME 'pass' (2, 0) (2, 4)
  2333. NEWLINE '' (2, 4) (2, 4)
  2334. NAME 'pass' (5, 0) (5, 4)
  2335. NEWLINE '' (5, 4) (5, 4)
  2336. """)
  2337. code_no_cont = dedent("""
  2338. pass
  2339. pass
  2340. """)
  2341. self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
  2342. code = dedent("""
  2343. if x:
  2344. y = 1
  2345. \\
  2346. \\
  2347. \\
  2348. \\
  2349. foo = 1
  2350. """)
  2351. self.check_tokenize(code, """\
  2352. NAME 'if' (2, 0) (2, 2)
  2353. NAME 'x' (2, 3) (2, 4)
  2354. COLON ':' (2, 4) (2, 5)
  2355. NEWLINE '' (2, 5) (2, 5)
  2356. INDENT '' (3, -1) (3, -1)
  2357. NAME 'y' (3, 4) (3, 5)
  2358. EQUAL '=' (3, 6) (3, 7)
  2359. NUMBER '1' (3, 8) (3, 9)
  2360. NEWLINE '' (3, 9) (3, 9)
  2361. NAME 'foo' (8, 4) (8, 7)
  2362. EQUAL '=' (8, 8) (8, 9)
  2363. NUMBER '1' (8, 10) (8, 11)
  2364. NEWLINE '' (8, 11) (8, 11)
  2365. DEDENT '' (8, -1) (8, -1)
  2366. """)
  2367. code_no_cont = dedent("""
  2368. if x:
  2369. y = 1
  2370. foo = 1
  2371. """)
  2372. self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
  2373. class CTokenizerBufferTests(unittest.TestCase):
  2374. def test_newline_at_the_end_of_buffer(self):
  2375. # See issue 99581: Make sure that if we need to add a new line at the
  2376. # end of the buffer, we have enough space in the buffer, specially when
  2377. # the current line is as long as the buffer space available.
  2378. test_script = f"""\
  2379. #coding: latin-1
  2380. #{"a"*10000}
  2381. #{"a"*10002}"""
  2382. with os_helper.temp_dir() as temp_dir:
  2383. file_name = make_script(temp_dir, 'foo', test_script)
  2384. run_test_script(file_name)
  2385. if __name__ == "__main__":
  2386. unittest.main()