dis.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. """Disassembler of Python byte code into mnemonics."""
  2. import sys
  3. import types
  4. import collections
  5. import io
  6. from opcode import *
  7. from opcode import (
  8. __all__ as _opcodes_all,
  9. _cache_format,
  10. _inline_cache_entries,
  11. _nb_ops,
  12. _specializations,
  13. _specialized_instructions,
  14. )
  15. __all__ = ["code_info", "dis", "disassemble", "distb", "disco",
  16. "findlinestarts", "findlabels", "show_code",
  17. "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
  18. del _opcodes_all
  19. _have_code = (types.MethodType, types.FunctionType, types.CodeType,
  20. classmethod, staticmethod, type)
  21. FORMAT_VALUE = opmap['FORMAT_VALUE']
  22. FORMAT_VALUE_CONVERTERS = (
  23. (None, ''),
  24. (str, 'str'),
  25. (repr, 'repr'),
  26. (ascii, 'ascii'),
  27. )
  28. MAKE_FUNCTION = opmap['MAKE_FUNCTION']
  29. MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
  30. LOAD_CONST = opmap['LOAD_CONST']
  31. LOAD_GLOBAL = opmap['LOAD_GLOBAL']
  32. BINARY_OP = opmap['BINARY_OP']
  33. JUMP_BACKWARD = opmap['JUMP_BACKWARD']
  34. CACHE = opmap["CACHE"]
  35. _all_opname = list(opname)
  36. _all_opmap = dict(opmap)
  37. _empty_slot = [slot for slot, name in enumerate(_all_opname) if name.startswith("<")]
  38. for spec_op, specialized in zip(_empty_slot, _specialized_instructions):
  39. # fill opname and opmap
  40. _all_opname[spec_op] = specialized
  41. _all_opmap[specialized] = spec_op
  42. deoptmap = {
  43. specialized: base for base, family in _specializations.items() for specialized in family
  44. }
  45. def _try_compile(source, name):
  46. """Attempts to compile the given source, first as an expression and
  47. then as a statement if the first approach fails.
  48. Utility function to accept strings in functions that otherwise
  49. expect code objects
  50. """
  51. try:
  52. c = compile(source, name, 'eval')
  53. except SyntaxError:
  54. c = compile(source, name, 'exec')
  55. return c
  56. def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False):
  57. """Disassemble classes, methods, functions, and other compiled objects.
  58. With no argument, disassemble the last traceback.
  59. Compiled objects currently include generator objects, async generator
  60. objects, and coroutine objects, all of which store their code object
  61. in a special attribute.
  62. """
  63. if x is None:
  64. distb(file=file, show_caches=show_caches, adaptive=adaptive)
  65. return
  66. # Extract functions from methods.
  67. if hasattr(x, '__func__'):
  68. x = x.__func__
  69. # Extract compiled code objects from...
  70. if hasattr(x, '__code__'): # ...a function, or
  71. x = x.__code__
  72. elif hasattr(x, 'gi_code'): #...a generator object, or
  73. x = x.gi_code
  74. elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
  75. x = x.ag_code
  76. elif hasattr(x, 'cr_code'): #...a coroutine.
  77. x = x.cr_code
  78. # Perform the disassembly.
  79. if hasattr(x, '__dict__'): # Class or module
  80. items = sorted(x.__dict__.items())
  81. for name, x1 in items:
  82. if isinstance(x1, _have_code):
  83. print("Disassembly of %s:" % name, file=file)
  84. try:
  85. dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive)
  86. except TypeError as msg:
  87. print("Sorry:", msg, file=file)
  88. print(file=file)
  89. elif hasattr(x, 'co_code'): # Code object
  90. _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive)
  91. elif isinstance(x, (bytes, bytearray)): # Raw bytecode
  92. _disassemble_bytes(x, file=file, show_caches=show_caches)
  93. elif isinstance(x, str): # Source code
  94. _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive)
  95. else:
  96. raise TypeError("don't know how to disassemble %s objects" %
  97. type(x).__name__)
  98. def distb(tb=None, *, file=None, show_caches=False, adaptive=False):
  99. """Disassemble a traceback (default: last traceback)."""
  100. if tb is None:
  101. try:
  102. tb = sys.last_traceback
  103. except AttributeError:
  104. raise RuntimeError("no last traceback to disassemble") from None
  105. while tb.tb_next: tb = tb.tb_next
  106. disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive)
  107. # The inspect module interrogates this dictionary to build its
  108. # list of CO_* constants. It is also used by pretty_flags to
  109. # turn the co_flags field into a human readable list.
  110. COMPILER_FLAG_NAMES = {
  111. 1: "OPTIMIZED",
  112. 2: "NEWLOCALS",
  113. 4: "VARARGS",
  114. 8: "VARKEYWORDS",
  115. 16: "NESTED",
  116. 32: "GENERATOR",
  117. 64: "NOFREE",
  118. 128: "COROUTINE",
  119. 256: "ITERABLE_COROUTINE",
  120. 512: "ASYNC_GENERATOR",
  121. }
  122. def pretty_flags(flags):
  123. """Return pretty representation of code flags."""
  124. names = []
  125. for i in range(32):
  126. flag = 1<<i
  127. if flags & flag:
  128. names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
  129. flags ^= flag
  130. if not flags:
  131. break
  132. else:
  133. names.append(hex(flags))
  134. return ", ".join(names)
  135. class _Unknown:
  136. def __repr__(self):
  137. return "<unknown>"
  138. # Sentinel to represent values that cannot be calculated
  139. UNKNOWN = _Unknown()
  140. def _get_code_object(x):
  141. """Helper to handle methods, compiled or raw code objects, and strings."""
  142. # Extract functions from methods.
  143. if hasattr(x, '__func__'):
  144. x = x.__func__
  145. # Extract compiled code objects from...
  146. if hasattr(x, '__code__'): # ...a function, or
  147. x = x.__code__
  148. elif hasattr(x, 'gi_code'): #...a generator object, or
  149. x = x.gi_code
  150. elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
  151. x = x.ag_code
  152. elif hasattr(x, 'cr_code'): #...a coroutine.
  153. x = x.cr_code
  154. # Handle source code.
  155. if isinstance(x, str):
  156. x = _try_compile(x, "<disassembly>")
  157. # By now, if we don't have a code object, we can't disassemble x.
  158. if hasattr(x, 'co_code'):
  159. return x
  160. raise TypeError("don't know how to disassemble %s objects" %
  161. type(x).__name__)
  162. def _deoptop(op):
  163. name = _all_opname[op]
  164. return _all_opmap[deoptmap[name]] if name in deoptmap else op
  165. def _get_code_array(co, adaptive):
  166. return co._co_code_adaptive if adaptive else co.co_code
  167. def code_info(x):
  168. """Formatted details of methods, functions, or code."""
  169. return _format_code_info(_get_code_object(x))
  170. def _format_code_info(co):
  171. lines = []
  172. lines.append("Name: %s" % co.co_name)
  173. lines.append("Filename: %s" % co.co_filename)
  174. lines.append("Argument count: %s" % co.co_argcount)
  175. lines.append("Positional-only arguments: %s" % co.co_posonlyargcount)
  176. lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
  177. lines.append("Number of locals: %s" % co.co_nlocals)
  178. lines.append("Stack size: %s" % co.co_stacksize)
  179. lines.append("Flags: %s" % pretty_flags(co.co_flags))
  180. if co.co_consts:
  181. lines.append("Constants:")
  182. for i_c in enumerate(co.co_consts):
  183. lines.append("%4d: %r" % i_c)
  184. if co.co_names:
  185. lines.append("Names:")
  186. for i_n in enumerate(co.co_names):
  187. lines.append("%4d: %s" % i_n)
  188. if co.co_varnames:
  189. lines.append("Variable names:")
  190. for i_n in enumerate(co.co_varnames):
  191. lines.append("%4d: %s" % i_n)
  192. if co.co_freevars:
  193. lines.append("Free variables:")
  194. for i_n in enumerate(co.co_freevars):
  195. lines.append("%4d: %s" % i_n)
  196. if co.co_cellvars:
  197. lines.append("Cell variables:")
  198. for i_n in enumerate(co.co_cellvars):
  199. lines.append("%4d: %s" % i_n)
  200. return "\n".join(lines)
  201. def show_code(co, *, file=None):
  202. """Print details of methods, functions, or code to *file*.
  203. If *file* is not provided, the output is printed on stdout.
  204. """
  205. print(code_info(co), file=file)
  206. Positions = collections.namedtuple(
  207. 'Positions',
  208. [
  209. 'lineno',
  210. 'end_lineno',
  211. 'col_offset',
  212. 'end_col_offset',
  213. ],
  214. defaults=[None] * 4
  215. )
  216. _Instruction = collections.namedtuple(
  217. "_Instruction",
  218. [
  219. 'opname',
  220. 'opcode',
  221. 'arg',
  222. 'argval',
  223. 'argrepr',
  224. 'offset',
  225. 'starts_line',
  226. 'is_jump_target',
  227. 'positions'
  228. ],
  229. defaults=[None]
  230. )
  231. _Instruction.opname.__doc__ = "Human readable name for operation"
  232. _Instruction.opcode.__doc__ = "Numeric code for operation"
  233. _Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
  234. _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
  235. _Instruction.argrepr.__doc__ = "Human readable description of operation argument"
  236. _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
  237. _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
  238. _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
  239. _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
  240. _ExceptionTableEntry = collections.namedtuple("_ExceptionTableEntry",
  241. "start end target depth lasti")
  242. _OPNAME_WIDTH = 20
  243. _OPARG_WIDTH = 5
  244. class Instruction(_Instruction):
  245. """Details for a bytecode operation
  246. Defined fields:
  247. opname - human readable name for operation
  248. opcode - numeric code for operation
  249. arg - numeric argument to operation (if any), otherwise None
  250. argval - resolved arg value (if known), otherwise same as arg
  251. argrepr - human readable description of operation argument
  252. offset - start index of operation within bytecode sequence
  253. starts_line - line started by this opcode (if any), otherwise None
  254. is_jump_target - True if other code jumps to here, otherwise False
  255. positions - Optional dis.Positions object holding the span of source code
  256. covered by this instruction
  257. """
  258. def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
  259. """Format instruction details for inclusion in disassembly output
  260. *lineno_width* sets the width of the line number field (0 omits it)
  261. *mark_as_current* inserts a '-->' marker arrow as part of the line
  262. *offset_width* sets the width of the instruction offset field
  263. """
  264. fields = []
  265. # Column: Source code line number
  266. if lineno_width:
  267. if self.starts_line is not None:
  268. lineno_fmt = "%%%dd" % lineno_width
  269. fields.append(lineno_fmt % self.starts_line)
  270. else:
  271. fields.append(' ' * lineno_width)
  272. # Column: Current instruction indicator
  273. if mark_as_current:
  274. fields.append('-->')
  275. else:
  276. fields.append(' ')
  277. # Column: Jump target marker
  278. if self.is_jump_target:
  279. fields.append('>>')
  280. else:
  281. fields.append(' ')
  282. # Column: Instruction offset from start of code sequence
  283. fields.append(repr(self.offset).rjust(offset_width))
  284. # Column: Opcode name
  285. fields.append(self.opname.ljust(_OPNAME_WIDTH))
  286. # Column: Opcode argument
  287. if self.arg is not None:
  288. fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
  289. # Column: Opcode argument details
  290. if self.argrepr:
  291. fields.append('(' + self.argrepr + ')')
  292. return ' '.join(fields).rstrip()
  293. def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
  294. """Iterator for the opcodes in methods, functions or code
  295. Generates a series of Instruction named tuples giving the details of
  296. each operations in the supplied code.
  297. If *first_line* is not None, it indicates the line number that should
  298. be reported for the first source line in the disassembled code.
  299. Otherwise, the source line information (if any) is taken directly from
  300. the disassembled code object.
  301. """
  302. co = _get_code_object(x)
  303. linestarts = dict(findlinestarts(co))
  304. if first_line is not None:
  305. line_offset = first_line - co.co_firstlineno
  306. else:
  307. line_offset = 0
  308. return _get_instructions_bytes(_get_code_array(co, adaptive),
  309. co._varname_from_oparg,
  310. co.co_names, co.co_consts,
  311. linestarts, line_offset,
  312. co_positions=co.co_positions(),
  313. show_caches=show_caches)
  314. def _get_const_value(op, arg, co_consts):
  315. """Helper to get the value of the const in a hasconst op.
  316. Returns the dereferenced constant if this is possible.
  317. Otherwise (if it is a LOAD_CONST and co_consts is not
  318. provided) returns the dis.UNKNOWN sentinel.
  319. """
  320. assert op in hasconst
  321. argval = UNKNOWN
  322. if op == LOAD_CONST:
  323. if co_consts is not None:
  324. argval = co_consts[arg]
  325. return argval
  326. def _get_const_info(op, arg, co_consts):
  327. """Helper to get optional details about const references
  328. Returns the dereferenced constant and its repr if the value
  329. can be calculated.
  330. Otherwise returns the sentinel value dis.UNKNOWN for the value
  331. and an empty string for its repr.
  332. """
  333. argval = _get_const_value(op, arg, co_consts)
  334. argrepr = repr(argval) if argval is not UNKNOWN else ''
  335. return argval, argrepr
  336. def _get_name_info(name_index, get_name, **extrainfo):
  337. """Helper to get optional details about named references
  338. Returns the dereferenced name as both value and repr if the name
  339. list is defined.
  340. Otherwise returns the sentinel value dis.UNKNOWN for the value
  341. and an empty string for its repr.
  342. """
  343. if get_name is not None:
  344. argval = get_name(name_index, **extrainfo)
  345. return argval, argval
  346. else:
  347. return UNKNOWN, ''
  348. def _parse_varint(iterator):
  349. b = next(iterator)
  350. val = b & 63
  351. while b&64:
  352. val <<= 6
  353. b = next(iterator)
  354. val |= b&63
  355. return val
  356. def _parse_exception_table(code):
  357. iterator = iter(code.co_exceptiontable)
  358. entries = []
  359. try:
  360. while True:
  361. start = _parse_varint(iterator)*2
  362. length = _parse_varint(iterator)*2
  363. end = start + length
  364. target = _parse_varint(iterator)*2
  365. dl = _parse_varint(iterator)
  366. depth = dl >> 1
  367. lasti = bool(dl&1)
  368. entries.append(_ExceptionTableEntry(start, end, target, depth, lasti))
  369. except StopIteration:
  370. return entries
  371. def _is_backward_jump(op):
  372. return 'JUMP_BACKWARD' in opname[op]
  373. def _get_instructions_bytes(code, varname_from_oparg=None,
  374. names=None, co_consts=None,
  375. linestarts=None, line_offset=0,
  376. exception_entries=(), co_positions=None,
  377. show_caches=False):
  378. """Iterate over the instructions in a bytecode string.
  379. Generates a sequence of Instruction namedtuples giving the details of each
  380. opcode. Additional information about the code's runtime environment
  381. (e.g. variable names, co_consts) can be specified using optional
  382. arguments.
  383. """
  384. co_positions = co_positions or iter(())
  385. get_name = None if names is None else names.__getitem__
  386. labels = set(findlabels(code))
  387. for start, end, target, _, _ in exception_entries:
  388. for i in range(start, end):
  389. labels.add(target)
  390. starts_line = None
  391. for offset, op, arg in _unpack_opargs(code):
  392. if linestarts is not None:
  393. starts_line = linestarts.get(offset, None)
  394. if starts_line is not None:
  395. starts_line += line_offset
  396. is_jump_target = offset in labels
  397. argval = None
  398. argrepr = ''
  399. positions = Positions(*next(co_positions, ()))
  400. deop = _deoptop(op)
  401. if arg is not None:
  402. # Set argval to the dereferenced value of the argument when
  403. # available, and argrepr to the string representation of argval.
  404. # _disassemble_bytes needs the string repr of the
  405. # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
  406. argval = arg
  407. if deop in hasconst:
  408. argval, argrepr = _get_const_info(deop, arg, co_consts)
  409. elif deop in hasname:
  410. if deop == LOAD_GLOBAL:
  411. argval, argrepr = _get_name_info(arg//2, get_name)
  412. if (arg & 1) and argrepr:
  413. argrepr = "NULL + " + argrepr
  414. else:
  415. argval, argrepr = _get_name_info(arg, get_name)
  416. elif deop in hasjabs:
  417. argval = arg*2
  418. argrepr = "to " + repr(argval)
  419. elif deop in hasjrel:
  420. signed_arg = -arg if _is_backward_jump(deop) else arg
  421. argval = offset + 2 + signed_arg*2
  422. argrepr = "to " + repr(argval)
  423. elif deop in haslocal or deop in hasfree:
  424. argval, argrepr = _get_name_info(arg, varname_from_oparg)
  425. elif deop in hascompare:
  426. argval = cmp_op[arg]
  427. argrepr = argval
  428. elif deop == FORMAT_VALUE:
  429. argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
  430. argval = (argval, bool(arg & 0x4))
  431. if argval[1]:
  432. if argrepr:
  433. argrepr += ', '
  434. argrepr += 'with format'
  435. elif deop == MAKE_FUNCTION:
  436. argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
  437. if arg & (1<<i))
  438. elif deop == BINARY_OP:
  439. _, argrepr = _nb_ops[arg]
  440. yield Instruction(_all_opname[op], op,
  441. arg, argval, argrepr,
  442. offset, starts_line, is_jump_target, positions)
  443. caches = _inline_cache_entries[deop]
  444. if not caches:
  445. continue
  446. if not show_caches:
  447. # We still need to advance the co_positions iterator:
  448. for _ in range(caches):
  449. next(co_positions, ())
  450. continue
  451. for name, size in _cache_format[opname[deop]].items():
  452. for i in range(size):
  453. offset += 2
  454. # Only show the fancy argrepr for a CACHE instruction when it's
  455. # the first entry for a particular cache value and the
  456. # instruction using it is actually quickened:
  457. if i == 0 and op != deop:
  458. data = code[offset: offset + 2 * size]
  459. argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
  460. else:
  461. argrepr = ""
  462. yield Instruction(
  463. "CACHE", CACHE, 0, None, argrepr, offset, None, False,
  464. Positions(*next(co_positions, ()))
  465. )
  466. def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False):
  467. """Disassemble a code object."""
  468. linestarts = dict(findlinestarts(co))
  469. exception_entries = _parse_exception_table(co)
  470. _disassemble_bytes(_get_code_array(co, adaptive),
  471. lasti, co._varname_from_oparg,
  472. co.co_names, co.co_consts, linestarts, file=file,
  473. exception_entries=exception_entries,
  474. co_positions=co.co_positions(), show_caches=show_caches)
  475. def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False):
  476. disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive)
  477. if depth is None or depth > 0:
  478. if depth is not None:
  479. depth = depth - 1
  480. for x in co.co_consts:
  481. if hasattr(x, 'co_code'):
  482. print(file=file)
  483. print("Disassembly of %r:" % (x,), file=file)
  484. _disassemble_recursive(
  485. x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive
  486. )
  487. def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
  488. names=None, co_consts=None, linestarts=None,
  489. *, file=None, line_offset=0, exception_entries=(),
  490. co_positions=None, show_caches=False):
  491. # Omit the line number column entirely if we have no line number info
  492. show_lineno = bool(linestarts)
  493. if show_lineno:
  494. maxlineno = max(linestarts.values()) + line_offset
  495. if maxlineno >= 1000:
  496. lineno_width = len(str(maxlineno))
  497. else:
  498. lineno_width = 3
  499. else:
  500. lineno_width = 0
  501. maxoffset = len(code) - 2
  502. if maxoffset >= 10000:
  503. offset_width = len(str(maxoffset))
  504. else:
  505. offset_width = 4
  506. for instr in _get_instructions_bytes(code, varname_from_oparg, names,
  507. co_consts, linestarts,
  508. line_offset=line_offset,
  509. exception_entries=exception_entries,
  510. co_positions=co_positions,
  511. show_caches=show_caches):
  512. new_source_line = (show_lineno and
  513. instr.starts_line is not None and
  514. instr.offset > 0)
  515. if new_source_line:
  516. print(file=file)
  517. is_current_instr = instr.offset == lasti
  518. print(instr._disassemble(lineno_width, is_current_instr, offset_width),
  519. file=file)
  520. if exception_entries:
  521. print("ExceptionTable:", file=file)
  522. for entry in exception_entries:
  523. lasti = " lasti" if entry.lasti else ""
  524. end = entry.end-2
  525. print(f" {entry.start} to {end} -> {entry.target} [{entry.depth}]{lasti}", file=file)
  526. def _disassemble_str(source, **kwargs):
  527. """Compile the source string, then disassemble the code object."""
  528. _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
  529. disco = disassemble # XXX For backwards compatibility
  530. # Rely on C `int` being 32 bits for oparg
  531. _INT_BITS = 32
  532. # Value for c int when it overflows
  533. _INT_OVERFLOW = 2 ** (_INT_BITS - 1)
  534. def _unpack_opargs(code):
  535. extended_arg = 0
  536. caches = 0
  537. for i in range(0, len(code), 2):
  538. # Skip inline CACHE entries:
  539. if caches:
  540. caches -= 1
  541. continue
  542. op = code[i]
  543. deop = _deoptop(op)
  544. caches = _inline_cache_entries[deop]
  545. if deop >= HAVE_ARGUMENT:
  546. arg = code[i+1] | extended_arg
  547. extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0
  548. # The oparg is stored as a signed integer
  549. # If the value exceeds its upper limit, it will overflow and wrap
  550. # to a negative integer
  551. if extended_arg >= _INT_OVERFLOW:
  552. extended_arg -= 2 * _INT_OVERFLOW
  553. else:
  554. arg = None
  555. extended_arg = 0
  556. yield (i, op, arg)
  557. def findlabels(code):
  558. """Detect all offsets in a byte code which are jump targets.
  559. Return the list of offsets.
  560. """
  561. labels = []
  562. for offset, op, arg in _unpack_opargs(code):
  563. if arg is not None:
  564. if op in hasjrel:
  565. if _is_backward_jump(op):
  566. arg = -arg
  567. label = offset + 2 + arg*2
  568. elif op in hasjabs:
  569. label = arg*2
  570. else:
  571. continue
  572. if label not in labels:
  573. labels.append(label)
  574. return labels
  575. def findlinestarts(code):
  576. """Find the offsets in a byte code which are start of lines in the source.
  577. Generate pairs (offset, lineno)
  578. """
  579. lastline = None
  580. for start, end, line in code.co_lines():
  581. if line is not None and line != lastline:
  582. lastline = line
  583. yield start, line
  584. return
  585. def _find_imports(co):
  586. """Find import statements in the code
  587. Generate triplets (name, level, fromlist) where
  588. name is the imported module and level, fromlist are
  589. the corresponding args to __import__.
  590. """
  591. IMPORT_NAME = opmap['IMPORT_NAME']
  592. LOAD_CONST = opmap['LOAD_CONST']
  593. consts = co.co_consts
  594. names = co.co_names
  595. opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code)
  596. if op != EXTENDED_ARG]
  597. for i, (op, oparg) in enumerate(opargs):
  598. if op == IMPORT_NAME and i >= 2:
  599. from_op = opargs[i-1]
  600. level_op = opargs[i-2]
  601. if (from_op[0] in hasconst and level_op[0] in hasconst):
  602. level = _get_const_value(level_op[0], level_op[1], consts)
  603. fromlist = _get_const_value(from_op[0], from_op[1], consts)
  604. yield (names[oparg], level, fromlist)
  605. def _find_store_names(co):
  606. """Find names of variables which are written in the code
  607. Generate sequence of strings
  608. """
  609. STORE_OPS = {
  610. opmap['STORE_NAME'],
  611. opmap['STORE_GLOBAL']
  612. }
  613. names = co.co_names
  614. for _, op, arg in _unpack_opargs(co.co_code):
  615. if op in STORE_OPS:
  616. yield names[arg]
  617. class Bytecode:
  618. """The bytecode operations of a piece of code
  619. Instantiate this with a function, method, other compiled object, string of
  620. code, or a code object (as returned by compile()).
  621. Iterating over this yields the bytecode operations as Instruction instances.
  622. """
  623. def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False):
  624. self.codeobj = co = _get_code_object(x)
  625. if first_line is None:
  626. self.first_line = co.co_firstlineno
  627. self._line_offset = 0
  628. else:
  629. self.first_line = first_line
  630. self._line_offset = first_line - co.co_firstlineno
  631. self._linestarts = dict(findlinestarts(co))
  632. self._original_object = x
  633. self.current_offset = current_offset
  634. self.exception_entries = _parse_exception_table(co)
  635. self.show_caches = show_caches
  636. self.adaptive = adaptive
  637. def __iter__(self):
  638. co = self.codeobj
  639. return _get_instructions_bytes(_get_code_array(co, self.adaptive),
  640. co._varname_from_oparg,
  641. co.co_names, co.co_consts,
  642. self._linestarts,
  643. line_offset=self._line_offset,
  644. exception_entries=self.exception_entries,
  645. co_positions=co.co_positions(),
  646. show_caches=self.show_caches)
  647. def __repr__(self):
  648. return "{}({!r})".format(self.__class__.__name__,
  649. self._original_object)
  650. @classmethod
  651. def from_traceback(cls, tb, *, show_caches=False, adaptive=False):
  652. """ Construct a Bytecode from the given traceback """
  653. while tb.tb_next:
  654. tb = tb.tb_next
  655. return cls(
  656. tb.tb_frame.f_code, current_offset=tb.tb_lasti, show_caches=show_caches, adaptive=adaptive
  657. )
  658. def info(self):
  659. """Return formatted information about the code object."""
  660. return _format_code_info(self.codeobj)
  661. def dis(self):
  662. """Return a formatted view of the bytecode operations."""
  663. co = self.codeobj
  664. if self.current_offset is not None:
  665. offset = self.current_offset
  666. else:
  667. offset = -1
  668. with io.StringIO() as output:
  669. _disassemble_bytes(_get_code_array(co, self.adaptive),
  670. varname_from_oparg=co._varname_from_oparg,
  671. names=co.co_names, co_consts=co.co_consts,
  672. linestarts=self._linestarts,
  673. line_offset=self._line_offset,
  674. file=output,
  675. lasti=offset,
  676. exception_entries=self.exception_entries,
  677. co_positions=co.co_positions(),
  678. show_caches=self.show_caches)
  679. return output.getvalue()
  680. def _test():
  681. """Simple test program to disassemble a file."""
  682. import argparse
  683. parser = argparse.ArgumentParser()
  684. parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-')
  685. args = parser.parse_args()
  686. with args.infile as infile:
  687. source = infile.read()
  688. code = compile(source, args.infile.name, "exec")
  689. dis(code)
  690. if __name__ == "__main__":
  691. _test()