asm
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This module enhances the Python AST tree with token and source code information, sufficent to
|
||||
detect the source text of each AST node. This is helpful for tools that make source code
|
||||
transformations.
|
||||
"""
|
||||
|
||||
from .line_numbers import LineNumbers
|
||||
from .asttokens import ASTText, ASTTokens, supports_tokenless
|
||||
|
||||
__all__ = ['ASTText', 'ASTTokens', 'LineNumbers', 'supports_tokenless']
|
||||
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,18 @@
|
||||
try:
|
||||
from astroid import nodes as astroid_node_classes
|
||||
|
||||
# astroid_node_classes should be whichever module has the NodeNG class
|
||||
from astroid.nodes import NodeNG
|
||||
from astroid.nodes import BaseContainer
|
||||
except Exception:
|
||||
try:
|
||||
from astroid import node_classes as astroid_node_classes
|
||||
from astroid.node_classes import NodeNG
|
||||
from astroid.node_classes import _BaseContainer as BaseContainer
|
||||
except Exception: # pragma: no cover
|
||||
astroid_node_classes = None
|
||||
NodeNG = None
|
||||
BaseContainer = None
|
||||
|
||||
|
||||
__all__ = ["astroid_node_classes", "NodeNG", "BaseContainer"]
|
||||
@@ -0,0 +1,471 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import abc
|
||||
import ast
|
||||
import bisect
|
||||
import sys
|
||||
import token
|
||||
from ast import Module
|
||||
from typing import Iterable, Iterator, List, Optional, Tuple, Any, cast, TYPE_CHECKING
|
||||
|
||||
import six
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
|
||||
from .line_numbers import LineNumbers
|
||||
from .util import (
|
||||
Token, match_token, is_non_coding_token, patched_generate_tokens, last_stmt,
|
||||
annotate_fstring_nodes, generate_tokens, is_module, is_stmt
|
||||
)
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from .util import AstNode, TokenInfo
|
||||
|
||||
|
||||
class ASTTextBase(six.with_metaclass(abc.ABCMeta, object)):
|
||||
def __init__(self, source_text, filename):
|
||||
# type: (Any, str) -> None
|
||||
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
|
||||
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
|
||||
|
||||
self._filename = filename
|
||||
|
||||
# Decode source after parsing to let Python 2 handle coding declarations.
|
||||
# (If the encoding was not utf-8 compatible, then even if it parses correctly,
|
||||
# we'll fail with a unicode error here.)
|
||||
source_text = six.ensure_text(source_text)
|
||||
|
||||
self._text = source_text
|
||||
self._line_numbers = LineNumbers(source_text)
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_text_positions(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
|
||||
If the positions can't be determined, or the nodes don't correspond to any particular text,
|
||||
returns ``(1, 0)`` for both.
|
||||
|
||||
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
|
||||
This means that if ``padded`` is True, the start position will be adjusted to include
|
||||
leading whitespace if ``node`` is a multiline statement.
|
||||
"""
|
||||
raise NotImplementedError # pragma: no cover
|
||||
|
||||
def get_text_range(self, node, padded=True):
|
||||
# type: (AstNode, bool) -> Tuple[int, int]
|
||||
"""
|
||||
Returns the (startpos, endpos) positions in source text corresponding to the given node.
|
||||
Returns (0, 0) for nodes (like `Load`) that don't correspond to any particular text.
|
||||
|
||||
See ``get_text_positions()`` for details on the ``padded`` argument.
|
||||
"""
|
||||
start, end = self.get_text_positions(node, padded)
|
||||
return (
|
||||
self._line_numbers.line_to_offset(*start),
|
||||
self._line_numbers.line_to_offset(*end),
|
||||
)
|
||||
|
||||
def get_text(self, node, padded=True):
|
||||
# type: (AstNode, bool) -> str
|
||||
"""
|
||||
Returns the text corresponding to the given node.
|
||||
Returns '' for nodes (like `Load`) that don't correspond to any particular text.
|
||||
|
||||
See ``get_text_positions()`` for details on the ``padded`` argument.
|
||||
"""
|
||||
start, end = self.get_text_range(node, padded)
|
||||
return self._text[start: end]
|
||||
|
||||
|
||||
class ASTTokens(ASTTextBase, object):
|
||||
"""
|
||||
ASTTokens maintains the text of Python code in several forms: as a string, as line numbers, and
|
||||
as tokens, and is used to mark and access token and position information.
|
||||
|
||||
``source_text`` must be a unicode or UTF8-encoded string. If you pass in UTF8 bytes, remember
|
||||
that all offsets you'll get are to the unicode text, which is available as the ``.text``
|
||||
property.
|
||||
|
||||
If ``parse`` is set, the ``source_text`` will be parsed with ``ast.parse()``, and the resulting
|
||||
tree marked with token info and made available as the ``.tree`` property.
|
||||
|
||||
If ``tree`` is given, it will be marked and made available as the ``.tree`` property. In
|
||||
addition to the trees produced by the ``ast`` module, ASTTokens will also mark trees produced
|
||||
using ``astroid`` library <https://www.astroid.org>.
|
||||
|
||||
If only ``source_text`` is given, you may use ``.mark_tokens(tree)`` to mark the nodes of an AST
|
||||
tree created separately.
|
||||
"""
|
||||
|
||||
def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
|
||||
# type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None
|
||||
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
|
||||
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
|
||||
|
||||
super(ASTTokens, self).__init__(source_text, filename)
|
||||
|
||||
self._tree = ast.parse(source_text, filename) if parse else tree
|
||||
|
||||
# Tokenize the code.
|
||||
if tokens is None:
|
||||
tokens = generate_tokens(self._text)
|
||||
self._tokens = list(self._translate_tokens(tokens))
|
||||
|
||||
# Extract the start positions of all tokens, so that we can quickly map positions to tokens.
|
||||
self._token_offsets = [tok.startpos for tok in self._tokens]
|
||||
|
||||
if self._tree:
|
||||
self.mark_tokens(self._tree)
|
||||
|
||||
def mark_tokens(self, root_node):
|
||||
# type: (Module) -> None
|
||||
"""
|
||||
Given the root of the AST or Astroid tree produced from source_text, visits all nodes marking
|
||||
them with token and position information by adding ``.first_token`` and
|
||||
``.last_token``attributes. This is done automatically in the constructor when ``parse`` or
|
||||
``tree`` arguments are set, but may be used manually with a separate AST or Astroid tree.
|
||||
"""
|
||||
# The hard work of this class is done by MarkTokens
|
||||
from .mark_tokens import MarkTokens # to avoid import loops
|
||||
MarkTokens(self).visit_tree(root_node)
|
||||
|
||||
def _translate_tokens(self, original_tokens):
|
||||
# type: (Iterable[TokenInfo]) -> Iterator[Token]
|
||||
"""
|
||||
Translates the given standard library tokens into our own representation.
|
||||
"""
|
||||
for index, tok in enumerate(patched_generate_tokens(original_tokens)):
|
||||
tok_type, tok_str, start, end, line = tok
|
||||
yield Token(tok_type, tok_str, start, end, line, index,
|
||||
self._line_numbers.line_to_offset(start[0], start[1]),
|
||||
self._line_numbers.line_to_offset(end[0], end[1]))
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
# type: () -> str
|
||||
"""The source code passed into the constructor."""
|
||||
return self._text
|
||||
|
||||
@property
|
||||
def tokens(self):
|
||||
# type: () -> List[Token]
|
||||
"""The list of tokens corresponding to the source code from the constructor."""
|
||||
return self._tokens
|
||||
|
||||
@property
|
||||
def tree(self):
|
||||
# type: () -> Optional[Module]
|
||||
"""The root of the AST tree passed into the constructor or parsed from the source code."""
|
||||
return self._tree
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
# type: () -> str
|
||||
"""The filename that was parsed"""
|
||||
return self._filename
|
||||
|
||||
def get_token_from_offset(self, offset):
|
||||
# type: (int) -> Token
|
||||
"""
|
||||
Returns the token containing the given character offset (0-based position in source text),
|
||||
or the preceeding token if the position is between tokens.
|
||||
"""
|
||||
return self._tokens[bisect.bisect(self._token_offsets, offset) - 1]
|
||||
|
||||
def get_token(self, lineno, col_offset):
|
||||
# type: (int, int) -> Token
|
||||
"""
|
||||
Returns the token containing the given (lineno, col_offset) position, or the preceeding token
|
||||
if the position is between tokens.
|
||||
"""
|
||||
# TODO: add test for multibyte unicode. We need to translate offsets from ast module (which
|
||||
# are in utf8) to offsets into the unicode text. tokenize module seems to use unicode offsets
|
||||
# but isn't explicit.
|
||||
return self.get_token_from_offset(self._line_numbers.line_to_offset(lineno, col_offset))
|
||||
|
||||
def get_token_from_utf8(self, lineno, col_offset):
|
||||
# type: (int, int) -> Token
|
||||
"""
|
||||
Same as get_token(), but interprets col_offset as a UTF8 offset, which is what `ast` uses.
|
||||
"""
|
||||
return self.get_token(lineno, self._line_numbers.from_utf8_col(lineno, col_offset))
|
||||
|
||||
def next_token(self, tok, include_extra=False):
|
||||
# type: (Token, bool) -> Token
|
||||
"""
|
||||
Returns the next token after the given one. If include_extra is True, includes non-coding
|
||||
tokens from the tokenize module, such as NL and COMMENT.
|
||||
"""
|
||||
i = tok.index + 1
|
||||
if not include_extra:
|
||||
while is_non_coding_token(self._tokens[i].type):
|
||||
i += 1
|
||||
return self._tokens[i]
|
||||
|
||||
def prev_token(self, tok, include_extra=False):
|
||||
# type: (Token, bool) -> Token
|
||||
"""
|
||||
Returns the previous token before the given one. If include_extra is True, includes non-coding
|
||||
tokens from the tokenize module, such as NL and COMMENT.
|
||||
"""
|
||||
i = tok.index - 1
|
||||
if not include_extra:
|
||||
while is_non_coding_token(self._tokens[i].type):
|
||||
i -= 1
|
||||
return self._tokens[i]
|
||||
|
||||
def find_token(self, start_token, tok_type, tok_str=None, reverse=False):
|
||||
# type: (Token, int, Optional[str], bool) -> Token
|
||||
"""
|
||||
Looks for the first token, starting at start_token, that matches tok_type and, if given, the
|
||||
token string. Searches backwards if reverse is True. Returns ENDMARKER token if not found (you
|
||||
can check it with `token.ISEOF(t.type)`).
|
||||
"""
|
||||
t = start_token
|
||||
advance = self.prev_token if reverse else self.next_token
|
||||
while not match_token(t, tok_type, tok_str) and not token.ISEOF(t.type):
|
||||
t = advance(t, include_extra=True)
|
||||
return t
|
||||
|
||||
def token_range(self,
|
||||
first_token, # type: Token
|
||||
last_token, # type: Token
|
||||
include_extra=False, # type: bool
|
||||
):
|
||||
# type: (...) -> Iterator[Token]
|
||||
"""
|
||||
Yields all tokens in order from first_token through and including last_token. If
|
||||
include_extra is True, includes non-coding tokens such as tokenize.NL and .COMMENT.
|
||||
"""
|
||||
for i in xrange(first_token.index, last_token.index + 1):
|
||||
if include_extra or not is_non_coding_token(self._tokens[i].type):
|
||||
yield self._tokens[i]
|
||||
|
||||
def get_tokens(self, node, include_extra=False):
|
||||
# type: (AstNode, bool) -> Iterator[Token]
|
||||
"""
|
||||
Yields all tokens making up the given node. If include_extra is True, includes non-coding
|
||||
tokens such as tokenize.NL and .COMMENT.
|
||||
"""
|
||||
return self.token_range(node.first_token, node.last_token, include_extra=include_extra)
|
||||
|
||||
def get_text_positions(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
|
||||
If the positions can't be determined, or the nodes don't correspond to any particular text,
|
||||
returns ``(1, 0)`` for both.
|
||||
|
||||
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
|
||||
This means that if ``padded`` is True, the start position will be adjusted to include
|
||||
leading whitespace if ``node`` is a multiline statement.
|
||||
"""
|
||||
if not hasattr(node, 'first_token'):
|
||||
return (1, 0), (1, 0)
|
||||
|
||||
start = node.first_token.start
|
||||
end = node.last_token.end
|
||||
if padded and any(match_token(t, token.NEWLINE) for t in self.get_tokens(node)):
|
||||
# Set col_offset to 0 to include leading indentation for multiline statements.
|
||||
start = (start[0], 0)
|
||||
|
||||
return start, end
|
||||
|
||||
|
||||
class ASTText(ASTTextBase, object):
|
||||
"""
|
||||
Supports the same ``get_text*`` methods as ``ASTTokens``,
|
||||
but uses the AST to determine the text positions instead of tokens.
|
||||
This is faster than ``ASTTokens`` as it requires less setup work.
|
||||
|
||||
It also (sometimes) supports nodes inside f-strings, which ``ASTTokens`` doesn't.
|
||||
|
||||
Some node types and/or Python versions are not supported.
|
||||
In these cases the ``get_text*`` methods will fall back to using ``ASTTokens``
|
||||
which incurs the usual setup cost the first time.
|
||||
If you want to avoid this, check ``supports_tokenless(node)`` before calling ``get_text*`` methods.
|
||||
"""
|
||||
def __init__(self, source_text, tree=None, filename='<unknown>'):
|
||||
# type: (Any, Optional[Module], str) -> None
|
||||
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
|
||||
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
|
||||
|
||||
super(ASTText, self).__init__(source_text, filename)
|
||||
|
||||
self._tree = tree
|
||||
if self._tree is not None:
|
||||
annotate_fstring_nodes(self._tree)
|
||||
|
||||
self._asttokens = None # type: Optional[ASTTokens]
|
||||
|
||||
@property
|
||||
def tree(self):
|
||||
# type: () -> Module
|
||||
if self._tree is None:
|
||||
self._tree = ast.parse(self._text, self._filename)
|
||||
annotate_fstring_nodes(self._tree)
|
||||
return self._tree
|
||||
|
||||
@property
|
||||
def asttokens(self):
|
||||
# type: () -> ASTTokens
|
||||
if self._asttokens is None:
|
||||
self._asttokens = ASTTokens(
|
||||
self._text,
|
||||
tree=self.tree,
|
||||
filename=self._filename,
|
||||
)
|
||||
return self._asttokens
|
||||
|
||||
def _get_text_positions_tokenless(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Version of ``get_text_positions()`` that doesn't use tokens.
|
||||
"""
|
||||
if sys.version_info[:2] < (3, 8): # pragma: no cover
|
||||
# This is just for mpypy
|
||||
raise AssertionError("This method should only be called internally after checking supports_tokenless()")
|
||||
|
||||
if is_module(node):
|
||||
# Modules don't have position info, so just return the range of the whole text.
|
||||
# The token-using method does something different, but its behavior seems weird and inconsistent.
|
||||
# For example, in a file with only comments, it only returns the first line.
|
||||
# It's hard to imagine a case when this matters.
|
||||
return (1, 0), self._line_numbers.offset_to_line(len(self._text))
|
||||
|
||||
if getattr(node, 'lineno', None) is None:
|
||||
return (1, 0), (1, 0)
|
||||
|
||||
assert node # tell mypy that node is not None, which we allowed up to here for compatibility
|
||||
|
||||
decorators = getattr(node, 'decorator_list', [])
|
||||
if not decorators:
|
||||
# Astroid uses node.decorators.nodes instead of node.decorator_list.
|
||||
decorators_node = getattr(node, 'decorators', None)
|
||||
decorators = getattr(decorators_node, 'nodes', [])
|
||||
if decorators:
|
||||
# Function/Class definition nodes are marked by AST as starting at def/class,
|
||||
# not the first decorator. This doesn't match the token-using behavior,
|
||||
# or inspect.getsource(), and just seems weird.
|
||||
start_node = decorators[0]
|
||||
else:
|
||||
start_node = node
|
||||
|
||||
start_lineno = start_node.lineno
|
||||
end_node = last_stmt(node)
|
||||
|
||||
# Include leading indentation for multiline statements.
|
||||
# This doesn't mean simple statements that happen to be on multiple lines,
|
||||
# but compound statements where inner indentation matters.
|
||||
# So we don't just compare node.lineno and node.end_lineno,
|
||||
# we check for a contained statement starting on a different line.
|
||||
if padded and (
|
||||
start_lineno != end_node.lineno
|
||||
or (
|
||||
# Astroid docstrings aren't treated as separate statements.
|
||||
# So to handle function/class definitions with a docstring but no other body,
|
||||
# we just check that the node is a statement with a docstring
|
||||
# and spanning multiple lines in the simple, literal sense.
|
||||
start_lineno != node.end_lineno
|
||||
and getattr(node, "doc_node", None)
|
||||
and is_stmt(node)
|
||||
)
|
||||
):
|
||||
start_col_offset = 0
|
||||
else:
|
||||
start_col_offset = self._line_numbers.from_utf8_col(start_lineno, start_node.col_offset)
|
||||
|
||||
start = (start_lineno, start_col_offset)
|
||||
|
||||
# To match the token-using behaviour, we exclude trailing semicolons and comments.
|
||||
# This means that for blocks containing multiple statements, we have to use the last one
|
||||
# instead of the actual node for end_lineno and end_col_offset.
|
||||
end_lineno = cast(int, end_node.end_lineno)
|
||||
end_col_offset = cast(int, end_node.end_col_offset)
|
||||
end_col_offset = self._line_numbers.from_utf8_col(end_lineno, end_col_offset)
|
||||
end = (end_lineno, end_col_offset)
|
||||
|
||||
return start, end
|
||||
|
||||
def get_text_positions(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
|
||||
If the positions can't be determined, or the nodes don't correspond to any particular text,
|
||||
returns ``(1, 0)`` for both.
|
||||
|
||||
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
|
||||
This means that if ``padded`` is True, the start position will be adjusted to include
|
||||
leading whitespace if ``node`` is a multiline statement.
|
||||
"""
|
||||
if getattr(node, "_broken_positions", None):
|
||||
# This node was marked in util.annotate_fstring_nodes as having untrustworthy lineno/col_offset.
|
||||
return (1, 0), (1, 0)
|
||||
|
||||
if supports_tokenless(node):
|
||||
return self._get_text_positions_tokenless(node, padded)
|
||||
|
||||
return self.asttokens.get_text_positions(node, padded)
|
||||
|
||||
|
||||
# Node types that _get_text_positions_tokenless doesn't support. Only relevant for Python 3.8+.
|
||||
_unsupported_tokenless_types = () # type: Tuple[str, ...]
|
||||
if sys.version_info[:2] >= (3, 8):
|
||||
# no lineno
|
||||
_unsupported_tokenless_types += ("arguments", "Arguments", "withitem")
|
||||
if sys.version_info[:2] == (3, 8):
|
||||
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
|
||||
_unsupported_tokenless_types += ("arg", "Starred")
|
||||
# no lineno in 3.8
|
||||
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
|
||||
|
||||
|
||||
def supports_tokenless(node=None):
|
||||
# type: (Any) -> bool
|
||||
"""
|
||||
Returns True if the Python version and the node (if given) are supported by
|
||||
the ``get_text*`` methods of ``ASTText`` without falling back to ``ASTTokens``.
|
||||
See ``ASTText`` for why this matters.
|
||||
|
||||
The following cases are not supported:
|
||||
|
||||
- Python 3.7 and earlier
|
||||
- PyPy
|
||||
- ``ast.arguments`` / ``astroid.Arguments``
|
||||
- ``ast.withitem``
|
||||
- ``astroid.Comprehension``
|
||||
- ``astroid.AssignName`` inside ``astroid.Arguments`` or ``astroid.ExceptHandler``
|
||||
- The following nodes in Python 3.8 only:
|
||||
- ``ast.arg``
|
||||
- ``ast.Starred``
|
||||
- ``ast.Slice``
|
||||
- ``ast.ExtSlice``
|
||||
- ``ast.Index``
|
||||
- ``ast.keyword``
|
||||
"""
|
||||
return (
|
||||
type(node).__name__ not in _unsupported_tokenless_types
|
||||
and not (
|
||||
# astroid nodes
|
||||
not isinstance(node, ast.AST) and node is not None and (
|
||||
(
|
||||
type(node).__name__ == "AssignName"
|
||||
and type(node.parent).__name__ in ("Arguments", "ExceptHandler")
|
||||
)
|
||||
)
|
||||
)
|
||||
and sys.version_info[:2] >= (3, 8)
|
||||
and 'pypy' not in sys.version.lower()
|
||||
)
|
||||
@@ -0,0 +1,76 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import bisect
|
||||
import re
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
_line_start_re = re.compile(r'^', re.M)
|
||||
|
||||
class LineNumbers(object):
|
||||
"""
|
||||
Class to convert between character offsets in a text string, and pairs (line, column) of 1-based
|
||||
line and 0-based column numbers, as used by tokens and AST nodes.
|
||||
|
||||
This class expects unicode for input and stores positions in unicode. But it supports
|
||||
translating to and from utf8 offsets, which are used by ast parsing.
|
||||
"""
|
||||
def __init__(self, text):
|
||||
# type: (str) -> None
|
||||
# A list of character offsets of each line's first character.
|
||||
self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)]
|
||||
self._text = text
|
||||
self._text_len = len(text)
|
||||
self._utf8_offset_cache = {} # type: Dict[int, List[int]] # maps line num to list of char offset for each byte in line
|
||||
|
||||
def from_utf8_col(self, line, utf8_column):
|
||||
# type: (int, int) -> int
|
||||
"""
|
||||
Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column.
|
||||
"""
|
||||
offsets = self._utf8_offset_cache.get(line)
|
||||
if offsets is None:
|
||||
end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len
|
||||
line_text = self._text[self._line_offsets[line - 1] : end_offset]
|
||||
|
||||
offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')]
|
||||
offsets.append(len(line_text))
|
||||
self._utf8_offset_cache[line] = offsets
|
||||
|
||||
return offsets[max(0, min(len(offsets)-1, utf8_column))]
|
||||
|
||||
def line_to_offset(self, line, column):
|
||||
# type: (int, int) -> int
|
||||
"""
|
||||
Converts 1-based line number and 0-based column to 0-based character offset into text.
|
||||
"""
|
||||
line -= 1
|
||||
if line >= len(self._line_offsets):
|
||||
return self._text_len
|
||||
elif line < 0:
|
||||
return 0
|
||||
else:
|
||||
return min(self._line_offsets[line] + max(0, column), self._text_len)
|
||||
|
||||
def offset_to_line(self, offset):
|
||||
# type: (int) -> Tuple[int, int]
|
||||
"""
|
||||
Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column
|
||||
numbers.
|
||||
"""
|
||||
offset = max(0, min(self._text_len, offset))
|
||||
line_index = bisect.bisect_right(self._line_offsets, offset) - 1
|
||||
return (line_index + 1, offset - self._line_offsets[line_index])
|
||||
|
||||
|
||||
@@ -0,0 +1,505 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import ast
|
||||
import numbers
|
||||
import sys
|
||||
import token
|
||||
from ast import Module
|
||||
from typing import Callable, List, Union, cast, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
import six
|
||||
|
||||
from . import util
|
||||
from .asttokens import ASTTokens
|
||||
from .util import AstConstant
|
||||
from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .util import AstNode
|
||||
|
||||
|
||||
# Mapping of matching braces. To find a token here, look up token[:2].
|
||||
_matching_pairs_left = {
|
||||
(token.OP, '('): (token.OP, ')'),
|
||||
(token.OP, '['): (token.OP, ']'),
|
||||
(token.OP, '{'): (token.OP, '}'),
|
||||
}
|
||||
|
||||
_matching_pairs_right = {
|
||||
(token.OP, ')'): (token.OP, '('),
|
||||
(token.OP, ']'): (token.OP, '['),
|
||||
(token.OP, '}'): (token.OP, '{'),
|
||||
}
|
||||
|
||||
|
||||
class MarkTokens(object):
|
||||
"""
|
||||
Helper that visits all nodes in the AST tree and assigns .first_token and .last_token attributes
|
||||
to each of them. This is the heart of the token-marking logic.
|
||||
"""
|
||||
def __init__(self, code):
|
||||
# type: (ASTTokens) -> None
|
||||
self._code = code
|
||||
self._methods = util.NodeMethods()
|
||||
self._iter_children = None # type: Optional[Callable]
|
||||
|
||||
def visit_tree(self, node):
|
||||
# type: (Module) -> None
|
||||
self._iter_children = util.iter_children_func(node)
|
||||
util.visit_tree(node, self._visit_before_children, self._visit_after_children)
|
||||
|
||||
def _visit_before_children(self, node, parent_token):
|
||||
# type: (AstNode, Optional[util.Token]) -> Tuple[Optional[util.Token], Optional[util.Token]]
|
||||
col = getattr(node, 'col_offset', None)
|
||||
token = self._code.get_token_from_utf8(node.lineno, col) if col is not None else None
|
||||
|
||||
if not token and util.is_module(node):
|
||||
# We'll assume that a Module node starts at the start of the source code.
|
||||
token = self._code.get_token(1, 0)
|
||||
|
||||
# Use our own token, or our parent's if we don't have one, to pass to child calls as
|
||||
# parent_token argument. The second value becomes the token argument of _visit_after_children.
|
||||
return (token or parent_token, token)
|
||||
|
||||
def _visit_after_children(self, node, parent_token, token):
|
||||
# type: (AstNode, Optional[util.Token], Optional[util.Token]) -> None
|
||||
# This processes the node generically first, after all children have been processed.
|
||||
|
||||
# Get the first and last tokens that belong to children. Note how this doesn't assume that we
|
||||
# iterate through children in order that corresponds to occurrence in source code. This
|
||||
# assumption can fail (e.g. with return annotations).
|
||||
first = token
|
||||
last = None
|
||||
for child in cast(Callable, self._iter_children)(node):
|
||||
# astroid slices have especially wrong positions, we don't want them to corrupt their parents.
|
||||
if util.is_empty_astroid_slice(child):
|
||||
continue
|
||||
if not first or child.first_token.index < first.index:
|
||||
first = child.first_token
|
||||
if not last or child.last_token.index > last.index:
|
||||
last = child.last_token
|
||||
|
||||
# If we don't have a first token from _visit_before_children, and there were no children, then
|
||||
# use the parent's token as the first token.
|
||||
first = first or parent_token
|
||||
|
||||
# If no children, set last token to the first one.
|
||||
last = last or first
|
||||
|
||||
# Statements continue to before NEWLINE. This helps cover a few different cases at once.
|
||||
if util.is_stmt(node):
|
||||
last = self._find_last_in_stmt(cast(util.Token, last))
|
||||
|
||||
# Capture any unmatched brackets.
|
||||
first, last = self._expand_to_matching_pairs(cast(util.Token, first), cast(util.Token, last), node)
|
||||
|
||||
# Give a chance to node-specific methods to adjust.
|
||||
nfirst, nlast = self._methods.get(self, node.__class__)(node, first, last)
|
||||
|
||||
if (nfirst, nlast) != (first, last):
|
||||
# If anything changed, expand again to capture any unmatched brackets.
|
||||
nfirst, nlast = self._expand_to_matching_pairs(nfirst, nlast, node)
|
||||
|
||||
node.first_token = nfirst
|
||||
node.last_token = nlast
|
||||
|
||||
def _find_last_in_stmt(self, start_token):
|
||||
# type: (util.Token) -> util.Token
|
||||
t = start_token
|
||||
while (not util.match_token(t, token.NEWLINE) and
|
||||
not util.match_token(t, token.OP, ';') and
|
||||
not token.ISEOF(t.type)):
|
||||
t = self._code.next_token(t, include_extra=True)
|
||||
return self._code.prev_token(t)
|
||||
|
||||
def _expand_to_matching_pairs(self, first_token, last_token, node):
|
||||
# type: (util.Token, util.Token, AstNode) -> Tuple[util.Token, util.Token]
|
||||
"""
|
||||
Scan tokens in [first_token, last_token] range that are between node's children, and for any
|
||||
unmatched brackets, adjust first/last tokens to include the closing pair.
|
||||
"""
|
||||
# We look for opening parens/braces among non-child tokens (i.e. tokens between our actual
|
||||
# child nodes). If we find any closing ones, we match them to the opens.
|
||||
to_match_right = [] # type: List[Tuple[int, str]]
|
||||
to_match_left = []
|
||||
for tok in self._code.token_range(first_token, last_token):
|
||||
tok_info = tok[:2]
|
||||
if to_match_right and tok_info == to_match_right[-1]:
|
||||
to_match_right.pop()
|
||||
elif tok_info in _matching_pairs_left:
|
||||
to_match_right.append(_matching_pairs_left[tok_info])
|
||||
elif tok_info in _matching_pairs_right:
|
||||
to_match_left.append(_matching_pairs_right[tok_info])
|
||||
|
||||
# Once done, extend `last_token` to match any unclosed parens/braces.
|
||||
for match in reversed(to_match_right):
|
||||
last = self._code.next_token(last_token)
|
||||
# Allow for trailing commas or colons (allowed in subscripts) before the closing delimiter
|
||||
while any(util.match_token(last, token.OP, x) for x in (',', ':')):
|
||||
last = self._code.next_token(last)
|
||||
# Now check for the actual closing delimiter.
|
||||
if util.match_token(last, *match):
|
||||
last_token = last
|
||||
|
||||
# And extend `first_token` to match any unclosed opening parens/braces.
|
||||
for match in to_match_left:
|
||||
first = self._code.prev_token(first_token)
|
||||
if util.match_token(first, *match):
|
||||
first_token = first
|
||||
|
||||
return (first_token, last_token)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair
|
||||
# that will actually be assigned.
|
||||
|
||||
def visit_default(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# pylint: disable=no-self-use
|
||||
# By default, we don't need to adjust the token we computed earlier.
|
||||
return (first_token, last_token)
|
||||
|
||||
def handle_comp(self, open_brace, node, first_token, last_token):
|
||||
# type: (str, AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# For list/set/dict comprehensions, we only get the token of the first child, so adjust it to
|
||||
# include the opening brace (the closing brace will be matched automatically).
|
||||
before = self._code.prev_token(first_token)
|
||||
util.expect_token(before, token.OP, open_brace)
|
||||
return (before, last_token)
|
||||
|
||||
# Python 3.8 fixed the starting position of list comprehensions:
|
||||
# https://bugs.python.org/issue31241
|
||||
if sys.version_info < (3, 8):
|
||||
def visit_listcomp(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_comp('[', node, first_token, last_token)
|
||||
|
||||
if six.PY2:
|
||||
# We shouldn't do this on PY3 because its SetComp/DictComp already have a correct start.
|
||||
def visit_setcomp(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_comp('{', node, first_token, last_token)
|
||||
|
||||
def visit_dictcomp(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_comp('{', node, first_token, last_token)
|
||||
|
||||
def visit_comprehension(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
# The 'comprehension' node starts with 'for' but we only get first child; we search backwards
|
||||
# to find the 'for' keyword.
|
||||
first = self._code.find_token(first_token, token.NAME, 'for', reverse=True)
|
||||
return (first, last_token)
|
||||
|
||||
def visit_if(self, node, first_token, last_token):
|
||||
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
while first_token.string not in ('if', 'elif'):
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return first_token, last_token
|
||||
|
||||
def handle_attr(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Attribute node has ".attr" (2 tokens) after the last child.
|
||||
dot = self._code.find_token(last_token, token.OP, '.')
|
||||
name = self._code.next_token(dot)
|
||||
util.expect_token(name, token.NAME)
|
||||
return (first_token, name)
|
||||
|
||||
visit_attribute = handle_attr
|
||||
visit_assignattr = handle_attr
|
||||
visit_delattr = handle_attr
|
||||
|
||||
def handle_def(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# With astroid, nodes that start with a doc-string can have an empty body, in which case we
|
||||
# need to adjust the last token to include the doc string.
|
||||
if not node.body and (getattr(node, 'doc_node', None) or getattr(node, 'doc', None)): # type: ignore[union-attr]
|
||||
last_token = self._code.find_token(last_token, token.STRING)
|
||||
|
||||
# Include @ from decorator
|
||||
if first_token.index > 0:
|
||||
prev = self._code.prev_token(first_token)
|
||||
if util.match_token(prev, token.OP, '@'):
|
||||
first_token = prev
|
||||
return (first_token, last_token)
|
||||
|
||||
visit_classdef = handle_def
|
||||
visit_functiondef = handle_def
|
||||
|
||||
def handle_following_brackets(self, node, last_token, opening_bracket):
|
||||
# type: (AstNode, util.Token, str) -> util.Token
|
||||
# This is for calls and subscripts, which have a pair of brackets
|
||||
# at the end which may contain no nodes, e.g. foo() or bar[:].
|
||||
# We look for the opening bracket and then let the matching pair be found automatically
|
||||
# Remember that last_token is at the end of all children,
|
||||
# so we are not worried about encountering a bracket that belongs to a child.
|
||||
first_child = next(cast(Callable, self._iter_children)(node))
|
||||
call_start = self._code.find_token(first_child.last_token, token.OP, opening_bracket)
|
||||
if call_start.index > last_token.index:
|
||||
last_token = call_start
|
||||
return last_token
|
||||
|
||||
def visit_call(self, node, first_token, last_token):
|
||||
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
last_token = self.handle_following_brackets(node, last_token, '(')
|
||||
|
||||
# Handling a python bug with decorators with empty parens, e.g.
|
||||
# @deco()
|
||||
# def ...
|
||||
if util.match_token(first_token, token.OP, '@'):
|
||||
first_token = self._code.next_token(first_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_matchclass(self, node, first_token, last_token):
|
||||
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
last_token = self.handle_following_brackets(node, last_token, '(')
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_subscript(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
last_token = self.handle_following_brackets(node, last_token, '[')
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_slice(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# consume `:` tokens to the left and right. In Python 3.9, Slice nodes are
|
||||
# given a col_offset, (and end_col_offset), so this will always start inside
|
||||
# the slice, even if it is the empty slice. However, in 3.8 and below, this
|
||||
# will only expand to the full slice if the slice contains a node with a
|
||||
# col_offset. So x[:] will only get the correct tokens in 3.9, but x[1:] and
|
||||
# x[:1] will even on earlier versions of Python.
|
||||
while True:
|
||||
prev = self._code.prev_token(first_token)
|
||||
if prev.string != ':':
|
||||
break
|
||||
first_token = prev
|
||||
while True:
|
||||
next_ = self._code.next_token(last_token)
|
||||
if next_.string != ':':
|
||||
break
|
||||
last_token = next_
|
||||
return (first_token, last_token)
|
||||
|
||||
def handle_bare_tuple(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# A bare tuple doesn't include parens; if there is a trailing comma, make it part of the tuple.
|
||||
maybe_comma = self._code.next_token(last_token)
|
||||
if util.match_token(maybe_comma, token.OP, ','):
|
||||
last_token = maybe_comma
|
||||
return (first_token, last_token)
|
||||
|
||||
if sys.version_info >= (3, 8):
|
||||
# In Python3.8 parsed tuples include parentheses when present.
|
||||
def handle_tuple_nonempty(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
|
||||
# It's a bare tuple if the first token belongs to the first child. The first child may
|
||||
# include extraneous parentheses (which don't create new nodes), so account for those too.
|
||||
child = node.elts[0]
|
||||
if TYPE_CHECKING:
|
||||
child = cast(AstNode, child)
|
||||
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
|
||||
if first_token == child_first:
|
||||
return self.handle_bare_tuple(node, first_token, last_token)
|
||||
return (first_token, last_token)
|
||||
else:
|
||||
# Before python 3.8, parsed tuples do not include parens.
|
||||
def handle_tuple_nonempty(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
(first_token, last_token) = self.handle_bare_tuple(node, first_token, last_token)
|
||||
return self._gobble_parens(first_token, last_token, False)
|
||||
|
||||
def visit_tuple(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
|
||||
if not node.elts:
|
||||
# An empty tuple is just "()", and we need no further info.
|
||||
return (first_token, last_token)
|
||||
return self.handle_tuple_nonempty(node, first_token, last_token)
|
||||
|
||||
def _gobble_parens(self, first_token, last_token, include_all=False):
|
||||
# type: (util.Token, util.Token, bool) -> Tuple[util.Token, util.Token]
|
||||
# Expands a range of tokens to include one or all pairs of surrounding parentheses, and
|
||||
# returns (first, last) tokens that include these parens.
|
||||
while first_token.index > 0:
|
||||
prev = self._code.prev_token(first_token)
|
||||
next = self._code.next_token(last_token)
|
||||
if util.match_token(prev, token.OP, '(') and util.match_token(next, token.OP, ')'):
|
||||
first_token, last_token = prev, next
|
||||
if include_all:
|
||||
continue
|
||||
break
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_str(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_str(first_token, last_token)
|
||||
|
||||
def visit_joinedstr(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
if sys.version_info < (3, 12):
|
||||
# Older versions don't tokenize the contents of f-strings
|
||||
return self.handle_str(first_token, last_token)
|
||||
|
||||
last = first_token
|
||||
while True:
|
||||
if util.match_token(last, getattr(token, "FSTRING_START")):
|
||||
# Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`)
|
||||
# of the f-string. We can't just look for the next FSTRING_END
|
||||
# because f-strings can be nested, e.g. f"{f'{x}'}", so we need
|
||||
# to treat this like matching balanced parentheses.
|
||||
count = 1
|
||||
while count > 0:
|
||||
last = self._code.next_token(last)
|
||||
# mypy complains about token.FSTRING_START and token.FSTRING_END.
|
||||
if util.match_token(last, getattr(token, "FSTRING_START")):
|
||||
count += 1
|
||||
elif util.match_token(last, getattr(token, "FSTRING_END")):
|
||||
count -= 1
|
||||
last_token = last
|
||||
last = self._code.next_token(last_token)
|
||||
elif util.match_token(last, token.STRING):
|
||||
# Similar to handle_str, we also need to handle adjacent strings.
|
||||
last_token = last
|
||||
last = self._code.next_token(last_token)
|
||||
else:
|
||||
break
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_bytes(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_str(first_token, last_token)
|
||||
|
||||
def handle_str(self, first_token, last_token):
|
||||
# type: (util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Multiple adjacent STRING tokens form a single string.
|
||||
last = self._code.next_token(last_token)
|
||||
while util.match_token(last, token.STRING):
|
||||
last_token = last
|
||||
last = self._code.next_token(last_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def handle_num(self,
|
||||
node, # type: AstNode
|
||||
value, # type: Union[complex, int, numbers.Number]
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
# A constant like '-1' gets turned into two tokens; this will skip the '-'.
|
||||
while util.match_token(last_token, token.OP):
|
||||
last_token = self._code.next_token(last_token)
|
||||
|
||||
if isinstance(value, complex):
|
||||
# A complex number like -2j cannot be compared directly to 0
|
||||
# A complex number like 1-2j is expressed as a binary operation
|
||||
# so we don't need to worry about it
|
||||
value = value.imag
|
||||
|
||||
# This makes sure that the - is included
|
||||
if value < 0 and first_token.type == token.NUMBER: # type: ignore[operator]
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_num(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token)
|
||||
|
||||
# In Astroid, the Num and Str nodes are replaced by Const.
|
||||
def visit_const(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
assert isinstance(node, AstConstant) or isinstance(node, nc.Const)
|
||||
if isinstance(node.value, numbers.Number):
|
||||
return self.handle_num(node, node.value, first_token, last_token)
|
||||
elif isinstance(node.value, (six.text_type, six.binary_type)):
|
||||
return self.visit_str(node, first_token, last_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
# In Python >= 3.6, there is a similar class 'Constant' for literals
|
||||
# In 3.8 it became the type produced by ast.parse
|
||||
# https://bugs.python.org/issue32892
|
||||
visit_constant = visit_const
|
||||
|
||||
def visit_keyword(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Until python 3.9 (https://bugs.python.org/issue40141),
|
||||
# ast.keyword nodes didn't have line info. Astroid has lineno None.
|
||||
assert isinstance(node, ast.keyword) or isinstance(node, nc.Keyword)
|
||||
if node.arg is not None and getattr(node, 'lineno', None) is None:
|
||||
equals = self._code.find_token(first_token, token.OP, '=', reverse=True)
|
||||
name = self._code.prev_token(equals)
|
||||
util.expect_token(name, token.NAME, node.arg)
|
||||
first_token = name
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_starred(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted.
|
||||
if not util.match_token(first_token, token.OP, '*'):
|
||||
star = self._code.prev_token(first_token)
|
||||
if util.match_token(star, token.OP, '*'):
|
||||
first_token = star
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_assignname(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Astroid may turn 'except' clause into AssignName, but we need to adjust it.
|
||||
if util.match_token(first_token, token.NAME, 'except'):
|
||||
colon = self._code.find_token(last_token, token.OP, ':')
|
||||
first_token = last_token = self._code.prev_token(colon)
|
||||
return (first_token, last_token)
|
||||
|
||||
if six.PY2:
|
||||
# No need for this on Python3, which already handles 'with' nodes correctly.
|
||||
def visit_with(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
first = self._code.find_token(first_token, token.NAME, 'with', reverse=True)
|
||||
return (first, last_token)
|
||||
|
||||
# Async nodes should typically start with the word 'async'
|
||||
# but Python < 3.7 doesn't put the col_offset there
|
||||
# AsyncFunctionDef is slightly different because it might have
|
||||
# decorators before that, which visit_functiondef handles
|
||||
def handle_async(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
if not first_token.string == 'async':
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
visit_asyncfor = handle_async
|
||||
visit_asyncwith = handle_async
|
||||
|
||||
def visit_asyncfunctiondef(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
if util.match_token(first_token, token.NAME, 'def'):
|
||||
# Include the 'async' token
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return self.visit_functiondef(node, first_token, last_token)
|
||||
@@ -0,0 +1,484 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import ast
|
||||
import collections
|
||||
import io
|
||||
import sys
|
||||
import token
|
||||
import tokenize
|
||||
from abc import ABCMeta
|
||||
from ast import Module, expr, AST
|
||||
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING
|
||||
|
||||
from six import iteritems
|
||||
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from .astroid_compat import NodeNG
|
||||
|
||||
# Type class used to expand out the definition of AST to include fields added by this library
|
||||
# It's not actually used for anything other than type checking though!
|
||||
class EnhancedAST(AST):
|
||||
# Additional attributes set by mark_tokens
|
||||
first_token = None # type: Token
|
||||
last_token = None # type: Token
|
||||
lineno = 0 # type: int
|
||||
|
||||
AstNode = Union[EnhancedAST, NodeNG]
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
TokenInfo = Tuple[int, str, Tuple[int, int], Tuple[int, int], str]
|
||||
else:
|
||||
TokenInfo = tokenize.TokenInfo
|
||||
|
||||
|
||||
def token_repr(tok_type, string):
|
||||
# type: (int, Optional[str]) -> str
|
||||
"""Returns a human-friendly representation of a token with the given type and string."""
|
||||
# repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency.
|
||||
return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u'))
|
||||
|
||||
|
||||
class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')):
|
||||
"""
|
||||
TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize
|
||||
module, and 3 additional ones useful for this module:
|
||||
|
||||
- [0] .type Token type (see token.py)
|
||||
- [1] .string Token (a string)
|
||||
- [2] .start Starting (row, column) indices of the token (a 2-tuple of ints)
|
||||
- [3] .end Ending (row, column) indices of the token (a 2-tuple of ints)
|
||||
- [4] .line Original line (string)
|
||||
- [5] .index Index of the token in the list of tokens that it belongs to.
|
||||
- [6] .startpos Starting character offset into the input text.
|
||||
- [7] .endpos Ending character offset into the input text.
|
||||
"""
|
||||
def __str__(self):
|
||||
# type: () -> str
|
||||
return token_repr(self.type, self.string)
|
||||
|
||||
|
||||
if sys.version_info >= (3, 6):
|
||||
AstConstant = ast.Constant
|
||||
else:
|
||||
class AstConstant:
|
||||
value = object()
|
||||
|
||||
|
||||
def match_token(token, tok_type, tok_str=None):
|
||||
# type: (Token, int, Optional[str]) -> bool
|
||||
"""Returns true if token is of the given type and, if a string is given, has that string."""
|
||||
return token.type == tok_type and (tok_str is None or token.string == tok_str)
|
||||
|
||||
|
||||
def expect_token(token, tok_type, tok_str=None):
|
||||
# type: (Token, int, Optional[str]) -> None
|
||||
"""
|
||||
Verifies that the given token is of the expected type. If tok_str is given, the token string
|
||||
is verified too. If the token doesn't match, raises an informative ValueError.
|
||||
"""
|
||||
if not match_token(token, tok_type, tok_str):
|
||||
raise ValueError("Expected token %s, got %s on line %s col %s" % (
|
||||
token_repr(tok_type, tok_str), str(token),
|
||||
token.start[0], token.start[1] + 1))
|
||||
|
||||
# These were previously defined in tokenize.py and distinguishable by being greater than
|
||||
# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly.
|
||||
if sys.version_info >= (3, 7):
|
||||
def is_non_coding_token(token_type):
|
||||
# type: (int) -> bool
|
||||
"""
|
||||
These are considered non-coding tokens, as they don't affect the syntax tree.
|
||||
"""
|
||||
return token_type in (token.NL, token.COMMENT, token.ENCODING)
|
||||
else:
|
||||
def is_non_coding_token(token_type):
|
||||
# type: (int) -> bool
|
||||
"""
|
||||
These are considered non-coding tokens, as they don't affect the syntax tree.
|
||||
"""
|
||||
return token_type >= token.N_TOKENS
|
||||
|
||||
|
||||
def generate_tokens(text):
|
||||
# type: (str) -> Iterator[TokenInfo]
|
||||
"""
|
||||
Generates standard library tokens for the given code.
|
||||
"""
|
||||
# tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for
|
||||
# Python2. See http://stackoverflow.com/a/4952291/328565.
|
||||
# FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed
|
||||
return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline))
|
||||
|
||||
|
||||
def iter_children_func(node):
|
||||
# type: (AST) -> Callable
|
||||
"""
|
||||
Returns a function which yields all direct children of a AST node,
|
||||
skipping children that are singleton nodes.
|
||||
The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module.
|
||||
"""
|
||||
return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast
|
||||
|
||||
|
||||
def iter_children_astroid(node, include_joined_str=False):
|
||||
# type: (NodeNG, bool) -> Union[Iterator, List]
|
||||
if not include_joined_str and is_joined_str(node):
|
||||
return []
|
||||
|
||||
return node.get_children()
|
||||
|
||||
|
||||
SINGLETONS = {c for n, c in iteritems(ast.__dict__) if isinstance(c, type) and
|
||||
issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))}
|
||||
|
||||
|
||||
def iter_children_ast(node, include_joined_str=False):
|
||||
# type: (AST, bool) -> Iterator[Union[AST, expr]]
|
||||
if not include_joined_str and is_joined_str(node):
|
||||
return
|
||||
|
||||
if isinstance(node, ast.Dict):
|
||||
# override the iteration order: instead of <all keys>, <all values>,
|
||||
# yield keys and values in source order (key1, value1, key2, value2, ...)
|
||||
for (key, value) in zip(node.keys, node.values):
|
||||
if key is not None:
|
||||
yield key
|
||||
yield value
|
||||
return
|
||||
|
||||
for child in ast.iter_child_nodes(node):
|
||||
# Skip singleton children; they don't reflect particular positions in the code and break the
|
||||
# assumptions about the tree consisting of distinct nodes. Note that collecting classes
|
||||
# beforehand and checking them in a set is faster than using isinstance each time.
|
||||
if child.__class__ not in SINGLETONS:
|
||||
yield child
|
||||
|
||||
|
||||
stmt_class_names = {n for n, c in iteritems(ast.__dict__)
|
||||
if isinstance(c, type) and issubclass(c, ast.stmt)}
|
||||
expr_class_names = ({n for n, c in iteritems(ast.__dict__)
|
||||
if isinstance(c, type) and issubclass(c, ast.expr)} |
|
||||
{'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'})
|
||||
|
||||
# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes
|
||||
# in the same way, and without even importing astroid.
|
||||
def is_expr(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is an expression node."""
|
||||
return node.__class__.__name__ in expr_class_names
|
||||
|
||||
def is_stmt(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a statement node."""
|
||||
return node.__class__.__name__ in stmt_class_names
|
||||
|
||||
def is_module(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a module node."""
|
||||
return node.__class__.__name__ == 'Module'
|
||||
|
||||
def is_joined_str(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a JoinedStr node, used to represent f-strings."""
|
||||
# At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only
|
||||
# leads to errors.
|
||||
return node.__class__.__name__ == 'JoinedStr'
|
||||
|
||||
|
||||
def is_starred(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a starred expression node."""
|
||||
return node.__class__.__name__ == 'Starred'
|
||||
|
||||
|
||||
def is_slice(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`"""
|
||||
# Before 3.9, a tuple containing a slice is an ExtSlice,
|
||||
# but this was removed in https://bugs.python.org/issue34822
|
||||
return (
|
||||
node.__class__.__name__ in ('Slice', 'ExtSlice')
|
||||
or (
|
||||
node.__class__.__name__ == 'Tuple'
|
||||
and any(map(is_slice, cast(ast.Tuple, node).elts))
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_empty_astroid_slice(node):
|
||||
# type: (AstNode) -> bool
|
||||
return (
|
||||
node.__class__.__name__ == "Slice"
|
||||
and not isinstance(node, ast.AST)
|
||||
and node.lower is node.upper is node.step is None
|
||||
)
|
||||
|
||||
|
||||
# Sentinel value used by visit_tree().
|
||||
_PREVISIT = object()
|
||||
|
||||
def visit_tree(node, previsit, postvisit):
|
||||
# type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None
|
||||
"""
|
||||
Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion
|
||||
via the function call stack to avoid hitting 'maximum recursion depth exceeded' error.
|
||||
|
||||
It calls ``previsit()`` and ``postvisit()`` as follows:
|
||||
|
||||
* ``previsit(node, par_value)`` - should return ``(par_value, value)``
|
||||
``par_value`` is as returned from ``previsit()`` of the parent.
|
||||
|
||||
* ``postvisit(node, par_value, value)`` - should return ``value``
|
||||
``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as
|
||||
returned from ``previsit()`` of this node itself. The return ``value`` is ignored except
|
||||
the one for the root node, which is returned from the overall ``visit_tree()`` call.
|
||||
|
||||
For the initial node, ``par_value`` is None. ``postvisit`` may be None.
|
||||
"""
|
||||
if not postvisit:
|
||||
postvisit = lambda node, pvalue, value: None
|
||||
|
||||
iter_children = iter_children_func(node)
|
||||
done = set()
|
||||
ret = None
|
||||
stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]]
|
||||
while stack:
|
||||
current, par_value, value = stack.pop()
|
||||
if value is _PREVISIT:
|
||||
assert current not in done # protect againt infinite loop in case of a bad tree.
|
||||
done.add(current)
|
||||
|
||||
pvalue, post_value = previsit(current, par_value)
|
||||
stack.append((current, par_value, post_value))
|
||||
|
||||
# Insert all children in reverse order (so that first child ends up on top of the stack).
|
||||
ins = len(stack)
|
||||
for n in iter_children(current):
|
||||
stack.insert(ins, (n, pvalue, _PREVISIT))
|
||||
else:
|
||||
ret = postvisit(current, par_value, cast(Optional[Token], value))
|
||||
return ret
|
||||
|
||||
|
||||
def walk(node, include_joined_str=False):
|
||||
# type: (AST, bool) -> Iterator[Union[Module, AstNode]]
|
||||
"""
|
||||
Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node``
|
||||
itself), using depth-first pre-order traversal (yieling parents before their children).
|
||||
|
||||
This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and
|
||||
``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``.
|
||||
|
||||
By default, ``JoinedStr`` (f-string) nodes and their contents are skipped
|
||||
because they previously couldn't be handled. Set ``include_joined_str`` to True to include them.
|
||||
"""
|
||||
iter_children = iter_children_func(node)
|
||||
done = set()
|
||||
stack = [node]
|
||||
while stack:
|
||||
current = stack.pop()
|
||||
assert current not in done # protect againt infinite loop in case of a bad tree.
|
||||
done.add(current)
|
||||
|
||||
yield current
|
||||
|
||||
# Insert all children in reverse order (so that first child ends up on top of the stack).
|
||||
# This is faster than building a list and reversing it.
|
||||
ins = len(stack)
|
||||
for c in iter_children(current, include_joined_str):
|
||||
stack.insert(ins, c)
|
||||
|
||||
|
||||
def replace(text, replacements):
|
||||
# type: (str, List[Tuple[int, int, str]]) -> str
|
||||
"""
|
||||
Replaces multiple slices of text with new values. This is a convenience method for making code
|
||||
modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is
|
||||
an iterable of ``(start, end, new_text)`` tuples.
|
||||
|
||||
For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces
|
||||
``"X is THE test"``.
|
||||
"""
|
||||
p = 0
|
||||
parts = []
|
||||
for (start, end, new_text) in sorted(replacements):
|
||||
parts.append(text[p:start])
|
||||
parts.append(new_text)
|
||||
p = end
|
||||
parts.append(text[p:])
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
class NodeMethods(object):
|
||||
"""
|
||||
Helper to get `visit_{node_type}` methods given a node's class and cache the results.
|
||||
"""
|
||||
def __init__(self):
|
||||
# type: () -> None
|
||||
self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]]
|
||||
|
||||
def get(self, obj, cls):
|
||||
# type: (Any, Union[ABCMeta, type]) -> Callable
|
||||
"""
|
||||
Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`,
|
||||
or `obj.visit_default` if the type-specific method is not found.
|
||||
"""
|
||||
method = self._cache.get(cls)
|
||||
if not method:
|
||||
name = "visit_" + cls.__name__.lower()
|
||||
method = getattr(obj, name, obj.visit_default)
|
||||
self._cache[cls] = method
|
||||
return method
|
||||
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
# Python 2 doesn't support non-ASCII identifiers, and making the real patched_generate_tokens support Python 2
|
||||
# means working with raw tuples instead of tokenize.TokenInfo namedtuples.
|
||||
def patched_generate_tokens(original_tokens):
|
||||
# type: (Iterable[TokenInfo]) -> Iterator[TokenInfo]
|
||||
return iter(original_tokens)
|
||||
else:
|
||||
def patched_generate_tokens(original_tokens):
|
||||
# type: (Iterable[TokenInfo]) -> Iterator[TokenInfo]
|
||||
"""
|
||||
Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers.
|
||||
Workaround for https://github.com/python/cpython/issues/68382.
|
||||
Should only be used when tokenizing a string that is known to be valid syntax,
|
||||
because it assumes that error tokens are not actually errors.
|
||||
Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token.
|
||||
"""
|
||||
group = [] # type: List[tokenize.TokenInfo]
|
||||
for tok in original_tokens:
|
||||
if (
|
||||
tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER)
|
||||
# Only combine tokens if they have no whitespace in between
|
||||
and (not group or group[-1].end == tok.start)
|
||||
):
|
||||
group.append(tok)
|
||||
else:
|
||||
for combined_token in combine_tokens(group):
|
||||
yield combined_token
|
||||
group = []
|
||||
yield tok
|
||||
for combined_token in combine_tokens(group):
|
||||
yield combined_token
|
||||
|
||||
def combine_tokens(group):
|
||||
# type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo]
|
||||
if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1:
|
||||
return group
|
||||
return [
|
||||
tokenize.TokenInfo(
|
||||
type=tokenize.NAME,
|
||||
string="".join(t.string for t in group),
|
||||
start=group[0].start,
|
||||
end=group[-1].end,
|
||||
line=group[0].line,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def last_stmt(node):
|
||||
# type: (ast.AST) -> ast.AST
|
||||
"""
|
||||
If the given AST node contains multiple statements, return the last one.
|
||||
Otherwise, just return the node.
|
||||
"""
|
||||
child_stmts = [
|
||||
child for child in iter_children_func(node)(node)
|
||||
if is_stmt(child) or type(child).__name__ in (
|
||||
"excepthandler",
|
||||
"ExceptHandler",
|
||||
"match_case",
|
||||
"MatchCase",
|
||||
"TryExcept",
|
||||
"TryFinally",
|
||||
)
|
||||
]
|
||||
if child_stmts:
|
||||
return last_stmt(child_stmts[-1])
|
||||
return node
|
||||
|
||||
|
||||
if sys.version_info[:2] >= (3, 8):
|
||||
from functools import lru_cache
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def fstring_positions_work():
|
||||
# type: () -> bool
|
||||
"""
|
||||
The positions attached to nodes inside f-string FormattedValues have some bugs
|
||||
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
|
||||
This checks for those bugs more concretely without relying on the Python version.
|
||||
Specifically this checks:
|
||||
- Values with a format spec or conversion
|
||||
- Repeated (i.e. identical-looking) expressions
|
||||
- f-strings implicitly concatenated over multiple lines.
|
||||
- Multiline, triple-quoted f-strings.
|
||||
"""
|
||||
source = """(
|
||||
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
|
||||
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
|
||||
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
|
||||
f'''
|
||||
{s} {t}
|
||||
{u} {v}
|
||||
'''
|
||||
)"""
|
||||
tree = ast.parse(source)
|
||||
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
|
||||
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
|
||||
positions_are_unique = len(set(name_positions)) == len(name_positions)
|
||||
correct_source_segments = all(
|
||||
ast.get_source_segment(source, node) == node.id
|
||||
for node in name_nodes
|
||||
)
|
||||
return positions_are_unique and correct_source_segments
|
||||
|
||||
def annotate_fstring_nodes(tree):
|
||||
# type: (ast.AST) -> None
|
||||
"""
|
||||
Add a special attribute `_broken_positions` to nodes inside f-strings
|
||||
if the lineno/col_offset cannot be trusted.
|
||||
"""
|
||||
if sys.version_info >= (3, 12):
|
||||
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
|
||||
# In Python 3.12, inner nodes have sensible positions.
|
||||
return
|
||||
for joinedstr in walk(tree, include_joined_str=True):
|
||||
if not isinstance(joinedstr, ast.JoinedStr):
|
||||
continue
|
||||
for part in joinedstr.values:
|
||||
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
|
||||
setattr(part, '_broken_positions', True) # use setattr for mypy
|
||||
|
||||
if isinstance(part, ast.FormattedValue):
|
||||
if not fstring_positions_work():
|
||||
for child in walk(part.value):
|
||||
setattr(child, '_broken_positions', True)
|
||||
|
||||
if part.format_spec: # this is another JoinedStr
|
||||
# Again, the standard positions span the full f-string.
|
||||
setattr(part.format_spec, '_broken_positions', True)
|
||||
|
||||
else:
|
||||
def fstring_positions_work():
|
||||
# type: () -> bool
|
||||
return False
|
||||
|
||||
def annotate_fstring_nodes(_tree):
|
||||
# type: (ast.AST) -> None
|
||||
pass
|
||||
@@ -0,0 +1 @@
|
||||
__version__ = "2.4.1"
|
||||
Reference in New Issue
Block a user