#!/bin/python3
r"""
The main module. Contains Pythonic wrappers for much of [TeX]'s API.
Refer to :mod:`~.simple` for the "simple" API -- which allows users to avoid the need to
know [TeX] internals such as category codes.
The fundamental data of [TeX] is a token, this is represented by Python's :class:`Token` object.
A list of tokens is represented by :class:`TokenList` object. If it's balanced,
:class:`BalancedTokenList` should be used.
With that, you can manipulate the [TeX] input stream with :meth:`BalancedTokenList.get_next`,
:meth:`BalancedTokenList.get_until`, :meth:`TokenList.put_next`.
Furthermore, executing [TeX] code is possible using :func:`continue_until_passed_back`.
For example, the following code::
TokenList(r"\typeout{123}\pythonimmediatecontinuenoarg").put_next()
continue_until_passed_back()
will just use [TeX] to execute the code ``\typeout{123}``.
With the 3 functions above, you can do *everything* that can be done in [TeX]
(although maybe not very conveniently or quickly). Some other functions are provided,
and for educational purposes, the way to implement it using the primitive functions are discussed.
* :func:`expand_once`: ``TokenList(r"\expandafter\pythonimmediatecontinuenoarg").put_next(); continue_until_passed_back()``
* :meth:`BalancedTokenList.expand_o`: ``TokenList(r"\expandafter\pythonimmediatecontinuenoarg\expandafter", self).put_next(); continue_until_passed_back(); return BalancedTokenList.get_next()``
For example, if the current token list is `\test`, the lines above will:
* put ``\expandafter\pythonimmediatecontinuenoarg\expandafter{\test}`` following in the input stream,
* pass control to [TeX],
* after one expansion step, the input stream becomes ``\pythonimmediatecontinuenoarg{⟨content of \test⟩}``,
* ``\pythonimmediatecontinuenoarg`` is executed, and execution is returned to Python,
* finally :func:`BalancedTokenList.get_next` gets the content of ``\test``, as desired.
* :meth:`TokenList.execute`: ``(self+TokenList(r"\pythonimmediatecontinuenoarg")).put_next(); continue_until_passed_back()``
* :func:`NToken.put_next`: ``TokenList("\expandafter\pythonimmediatecontinuenoarg\noexpand\abc").put_next(); continue_until_passed_back()`` (as an example of putting a blue ``\abc`` token following in the input stream)
* etc.
This is a table of [TeX] primitives, and their Python wrapper:
.. list-table::
:header-rows: 1
* - :math:`TeX`
- Python
* - ``\let``
- :meth:`Token.set_eq`
* - ``\ifx``
- :meth:`NToken.meaning_eq`
* - ``\meaning``
- :meth:`NToken.meaning_str`
* - ``\futurelet``
- :meth:`Token.set_future`, :meth:`Token.set_future2`
* - ``\def``
- :meth:`Token.tl` (no parameter),
:meth:`Token.set_func` (define function to do some task)
* - ``\edef``
- :meth:`BalancedTokenList.expand_x`
* - Get undelimited argument
- :meth:`BalancedTokenList.get_next`
* - Get delimited argument
- :meth:`BalancedTokenList.get_until`, :meth:`BalancedTokenList.get_until_brace`
* - ``\catcode``
- :const:`catcode`
* - ``\count``
- :const:`count`, :meth:`Token.int`
* - ``\Umathcode``
- :const:`umathcode`
* - ``\detokenize``
- :meth:`BalancedTokenList.detokenize`
* - ``\begingroup``, ``\endgroup``
- :const:`group`
In order to get a "value" stored in a "variable"
(using expl3 terminology, this has various meanings e.g. a ``\countdef`` token, or a typical macro storing a token list),
use a property on the token object itself:
* :meth:`Token.int` for ``\int_use:N \int_set:Nn``,
* :meth:`Token.tl` for ``\tl_use:N \tl_set:Nn``,
* :meth:`Token.str` for ``\str_use:N \str_set:Nn``,
* :meth:`Token.bool`,
* etc.
A token list can be:
* interpreted as a string (provide it is already a string) using :meth:`TokenList.str`,
* converted from a Python string (opposite of the operation above) using :meth:`TokenList.fstr`,
* interpreted as an integer using :meth:`TokenList.int`,
* detokenized using :meth:`BalancedTokenList.detokenize`,
* expanded with :meth:`BalancedTokenList.expand_x` or :meth:`BalancedTokenList.expand_o`,
* etc.
Some debug functionalities are provided and can be specified on the command-line, refer to :mod:`~.pytotex` documentation.
"""
from __future__ import annotations
import sys
import os
import inspect
import threading
import contextlib
import io
import functools
from fractions import Fraction
from typing import Optional, Union, Callable, Any, Iterator, Protocol, Iterable, Sequence, Type, Tuple, List, Dict, IO, Set, Literal, Generator
import typing
from abc import ABC, abstractmethod
from pathlib import Path
from dataclasses import dataclass
import tempfile
import signal
import traceback
import re
import collections
from collections import defaultdict
import enum
from weakref import WeakKeyDictionary
import weakref
import itertools
import string
import numbers
import random
import linecache
from .engine import Engine, default_engine, default_engine as engine, ParentProcessEngine, EngineStatus, TeXProcessError, TeXProcessExited, ChildProcessEngine
from .lowlevel import debugging, is_sphinx_build, _handlers, _per_engine_handlers, run_none_finish, PTTInt, PTTBalancedTokenList, PTTBlock, mark_bootstrap, _run_block_finish, get_random_Python_identifier, run_main_loop, TTPRawLine, TeXToPyData, expansion_only_can_call_Python, _format, _readline, get_random_TeX_identifier, scan_Python_call_TeX_module, Python_call_TeX_local, PTTVerbatimLine, TTPEmbeddedLine
T1 = typing.TypeVar("T1")
DimensionUnit = Literal["pt", "in", "pc", "cm", "mm", "bp", "dd", "cc", "sp"]
"""
[TeX] dimension units. ``ex`` and ``em`` are font-dependent, so excluded.
"""
unit_per_pt: Dict[DimensionUnit, Fraction]={
"pt": Fraction(1, 1),
"in": Fraction(7227, 100),
"pc": Fraction(12, 1),
"cm": Fraction(7227, 254),
"mm": Fraction(7227, 2540),
"bp": Fraction(7227, 7200),
"dd": Fraction(1238, 1157),
"cc": Fraction(14856, 1157),
"sp": Fraction(1, 65536),
}
assert {*unit_per_pt.keys()}=={*DimensionUnit.__args__} # type: ignore
@typing.overload
def convert_unit(val: Fraction, from_: DimensionUnit, *, to: DimensionUnit)->Fraction: ...
@typing.overload
def convert_unit(val: float, from_: DimensionUnit, *, to: DimensionUnit)->float: ...
[docs]def convert_unit(val: Fraction|float, from_: DimensionUnit, *, to: DimensionUnit)->Fraction|float:
"""
Convert between units.
>>> convert_unit(1, "in", to="cm")
Fraction(127, 50)
>>> convert_unit(1., "in", to="cm")
2.54
Note that in ``inkex``, then the argument order is reversed,
i.e. ``convert_unit(1, "cm", "in")`` returns ``2.54``.
That's why ``to`` is made into a keyword argument.
"""
if isinstance(val, float):
return float(convert_unit(Fraction(val), from_, to=to))
for unit in from_, to:
if unit not in unit_per_pt:
raise ValueError(f'Unknown unit "{unit}"')
return val*unit_per_pt[from_]/unit_per_pt[to]
[docs]def add_handler_async(f: Callable[[], None], *, all_engines: bool=False)->str:
r"""
This function is for micro-optimization. Usage is not really recommended.
Similar to :func:`add_handler`, however, the function has these additional restrictions:
* Within the function, **it must not send anything to [TeX].**
* It **must not cause a Python error**, otherwise the error reporting facility
may not work properly (does not print the correct [TeX] traceback).
Also, on the [TeX] side you need ``\pythonimmediatecallhandlerasync``.
Example::
def myfunction():
print(1)
identifier = add_handler(myfunction)
execute(r"\def\test{\pythonimmediatecallhandlerasync{" + identifier + "}}")
Note that in order to allow the Python function to call [TeX], it's necessary to
"listen" for callbacks on [TeX] side as well -- as [TeX] does not have the capability
to execute multiple threads, it's necessary to explicitly listen for instructions from the Python side,
which is what the command ``\pythonimmediatelisten`` in ``\pythonimmediatecallhandler``'s implementation does.
.. note::
Internally, ``\pythonimmediatecallhandlerasync{abc}``
sends ``i⟨string⟩`` from [TeX] to Python
(optionally flushes the output),
and the function with index ``⟨string⟩`` in this dict is called.
"""
identifier=get_random_Python_identifier()
assert identifier not in _handlers
if all_engines:
_handlers[identifier]=f
else:
e=default_engine.get_engine()
l=_defaultget_with_cleanup(_per_engine_handlers, dict)
assert identifier not in l
l[identifier]=f
return identifier
[docs]def add_handler(f: Callable[[], None], *, all_engines: bool=False)->str:
r"""
This function provides the facility to efficiently call Python code from [TeX]
and without polluting the global namespace.
First, note that with :func:`.pyc` you can do the following:
>>> a=get_user_scope()["a"]=[]
>>> execute(r"\def\test{\pyc{a.append(1)}}")
Then every time ``\test`` is executed on [TeX] side the corresponding Python code will be executed:
>>> a
[]
>>> execute(r"\test")
>>> a
[1]
However, this pollutes the Python global namespace as well as having to parse the string
``a.append(1)`` into Python code every time it's called.
With this function, you can do the following::
>>> def myfunction(): execute(r"\advance\count0 by 1 ") # it's possible to execute TeX code here
>>> identifier = add_handler(myfunction)
>>> execute(r"\def\test{\pythonimmediatecallhandler{" + identifier + r"}}")
>>> count[0]=5
>>> execute(r"\test")
>>> count[0]
6
The returned value, `identifier`, is a string consist of only English alphabetical letters,
which should be used to pass into ``\pythonimmediatecallhandler`` [TeX] command
and :func:`remove_handler`.
The handlers must take a single argument of type :class:`~engine.Engine` as input, and returns nothing.
.. seealso::
:func:`add_handler_async`, :func:`remove_handler`.
"""
def g()->None:
assert engine.status==EngineStatus.running
engine.status=EngineStatus.waiting
f()
if engine.status==EngineStatus.waiting:
run_none_finish()
assert engine.status==EngineStatus.running
return add_handler_async(g, all_engines=all_engines)
[docs]def remove_handler(identifier: str, *, all_engines: bool=False)->None:
"""
Remove a handler with the given `identifier`.
Note that even if the corresponding [TeX] command is deleted, the command might have been
copied to another command, so use this function with care.
.. seealso::
:func:`add_handler`.
"""
if all_engines:
del _handlers[identifier]
else:
del _per_engine_handlers[default_engine.get_engine()][identifier]
_user_scope: WeakKeyDictionary[Engine, Dict[str, Any]]=WeakKeyDictionary()
def _defaultget_with_cleanup(d: WeakKeyDictionary[Engine, T1], default: Callable[[], T1])->T1:
e=default_engine.get_engine()
if e not in d:
d[e]=default()
def cleanup(e: Engine)->None:
try: del d[e]
except KeyError: pass
e.add_on_close(cleanup)
return d[e]
[docs]def get_user_scope()->Dict[str, Any]:
r"""
This is the global namespace where codes in :func:`.py`, :func:`.pyc`, :func:`.pycode` etc. runs in.
Mainly useful for :class:`.ChildProcessEngine` or cases when the scope is not the global scope (e.g. :func:`.pyfilekpse`) only.
>>> aaa=1
>>> execute(r'\pyc{aaa}')
Traceback (most recent call last):
...
NameError: name 'aaa' is not defined
>>> get_user_scope()["aaa"]=1
>>> execute(r'\pyc{aaa}')
..
Internally this must be cleaned up properly.
>>> n=len(_user_scope)
>>> from pythonimmediate.engine import ChildProcessEngine
>>> with ChildProcessEngine("pdftex") as e, default_engine.set_engine(e):
... assert n==len(_user_scope)
... execute(r'\pyc{a=1}')
... assert n+1==len(_user_scope)
>>> assert n==len(_user_scope), (n, len(_user_scope))
"""
return _defaultget_with_cleanup(_user_scope, dict)
[docs]class NToken(ABC):
"""
Represent a possibly-notexpanded token.
For convenience, a notexpanded token is called a blue token.
It's not always possible to determine the notexpanded status of a following token in the input stream.
Implementation note: Token objects must be frozen.
"""
@abstractmethod
def __str__(self)->str: ...
@abstractmethod
def repr1(self)->str: ...
[docs] def meaning_str(self, escapechar: Optional[int|str]=None)->str:
r"""
Get the meaning of this token as a string.
>>> C.other("-").meaning_str()
'the character -'
>>> T.relax.meaning_str(escapechar="?")
'?relax'
>>> T.relax.meaning_str()
'\\relax'
Note that all blue tokens have the meaning equal to ``\relax``
(or ``[unknown command code! (0, 1)]`` in a buggy LuaTeX implementation)
with the backslash replaced
by the current ``escapechar``.
"""
if escapechar is not None:
tmp=count["escapechar"]
count["escapechar"]=_get_charcode(escapechar)
if self.degree()==0 and isinstance(self, Token):
result=BalancedTokenList([T.meaning, self]).expand_o().str()
else:
result=NTokenList([T.meaning, self]).expand_x().str()
if escapechar is not None:
count["escapechar"]=tmp
return result
@property
@abstractmethod
def noexpand(self)->"NToken":
r"""
Return the result of ``\noexpand`` applied on this token.
"""
...
@property
@abstractmethod
def no_blue(self)->"Token":
r"""
Return the result of this token after being "touched", which drops its blue status if any.
"""
...
[docs] @abstractmethod
def put_next(self)->None:
"""
Put this token forward in the input stream.
"""
...
[docs] def meaning_eq(self, other: "NToken")->bool:
r"""
Whether this token is the same in meaning as the token specified in the parameter *other*.
Equivalent to [TeX]'s ``\ifx``.
Note that two tokens might have different meaning despite having equal :meth:`meaning_str`.
"""
return bool(NTokenList([T.ifx, self, other, Catcode.other("1"), T.fi]).expand_x())
def is_str(self)->bool:
return False
[docs] def str_code(self)->int:
"""
``self`` must represent a character of a [TeX] string. (i.e. equal to itself when detokenized)
:return: the character code.
.. note::
See :meth:`TokenList.str_codes`.
"""
# default implementation, might not be correct. Subclass overrides as needed.
raise ValueError("Token does not represent a string!")
[docs] def degree(self)->int:
"""
return the imbalance degree for this token (``{`` -> 1, ``}`` -> -1, everything else -> 0)
"""
# default implementation, might not be correct. Subclass overrides as needed.
return 0
@mark_bootstrap
def _helper_put_next_brace(engine: Engine)->str:
if engine.name=="luatex": # TODO https://github.com/latex3/latex3/issues/1540
return r"""
\cs_new_protected:Npn \__put_next_unbalanced:n #1 {
\expandafter \expandafter \expandafter \expandafter \expandafter \expandafter \expandafter \pythonimmediatecontinuenoarg
\expandafter \expandafter \expandafter \expandafter \char_generate:nn {\__index} {#1} \empty
}
"""
else:
return r"""
\cs_new_protected:Npn \__put_next_unbalanced:n #1 {
\expandafter \expandafter \expandafter \pythonimmediatecontinuenoarg
\char_generate:nn {\__index} {#1}
}
"""
[docs]class Token(NToken):
"""
Represent a [TeX] token, excluding the notexpanded possibility.
See also documentation of :class:`NToken`.
"""
@property
@abstractmethod
def can_blue(self)->bool:
"""
Return whether this token can possibly be blue i.e. expandable.
"""
...
@property
def blue(self)->"BlueToken":
r"""
Return a :class:`BlueToken` containing ``self``. :attr:`can_blue` must be true.
"""
if not self.can_blue:
raise ValueError("Token cannot be blue!")
return BlueToken(self)
@property
def noexpand(self)->"NToken":
if not self.can_blue:
return self
return BlueToken(self)
[docs] @abstractmethod
def serialize(self)->str:
"""
Internal function, serialize this token to be able to pass to [TeX].
"""
...
[docs] @abstractmethod
def simple_detokenize(self, get_catcode: Callable[[int], Catcode])->str:
"""
Simple approximate detokenizer, implemented in Python.
"""
...
@property
@abstractmethod
def assignable(self)->bool:
"""
Whether this token can be assigned to i.e. it's control sequence or active character.
"""
...
[docs] def is_expandable(self)->bool:
r"""
>>> T.relax.is_expandable()
False
>>> T.expandafter.is_expandable()
True
>>> T.undefined.is_expandable()
True
>>> BalancedTokenList([r'\protected\def\__protected_empty{}']).execute()
>>> T.__protected_empty.is_expandable()
True
>>> C.active("a").set_eq(T.empty)
>>> C.active("a").is_expandable()
True
>>> C.other("a").is_expandable()
False
"""
return TokenList([
T.expandafter, T.ifx, T.noexpand, self, self, C.other("1"), T.fi
]).expand_x().str() == ""
[docs] def set_eq(self, other: "NToken", global_: bool=False)->None:
"""
Assign the meaning of this token to be equivalent to that of the other token.
"""
assert self.assignable
NTokenList([r"\global" if global_ else "", T.let, self, C.other("="), C.space(' '), other]).execute()
[docs] def set_future(self)->None:
r"""
Assign the meaning of this token to be equivalent to that of the following token in the input stream.
For example if this token is ``\a``, and the input stream starts with ``bcde``, then ``\a``'s meaning
will be assigned to that of the explicit character ``b``.
.. note::
Tokenizes one more token in the input stream, and remove its blue status if any.
"""
assert self.assignable
typing.cast(Callable[[PTTBalancedTokenList], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__data)%
\expandafter \futurelet \__data \pythonimmediatecontinuenoarg
}
""" , sync=True))(PTTBalancedTokenList(BalancedTokenList([self])))
[docs] def set_future2(self)->None:
r"""
Assign the meaning of this token to be equivalent to that of the second-next token in the input stream.
For example if this token is ``\a``, and the input stream starts with ``bcde``, then ``\a``'s meaning
will be assigned to that of the explicit character ``c``.
.. note::
Tokenizes two more tokens in the input stream, and remove their blue status if any.
"""
assert self.assignable
typing.cast(Callable[[PTTBalancedTokenList], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__data)%
\afterassignment \pythonimmediatecontinuenoarg \expandafter \futurelet \__data
}
""" , sync=True))(PTTBalancedTokenList(BalancedTokenList([self])))
[docs] def is_defined(self)->bool:
"""
Return whether this token is defined.
>>> T.relax.is_defined()
True
>>> T.undefined.is_defined()
False
>>> C.active("~").is_defined()
True
>>> C.other("a").is_defined()
True
"""
# use \ifdefined
return TokenList([T.ifdefined, self, C.other("1"), T.fi]).expand_x().str() == "1"
[docs] def set_func(self, f: Callable[[], None], global_: bool=False)->str:
"""
Assign this token to call the Python function `f` when executed.
Returns an identifier, as described in :func:`add_handler`.
"""
identifier = add_handler(f)
TokenList([T.gdef if global_ else r"\def", self, r"{"
r"\pythonimmediatecallhandler{"+identifier+r"}"
r"}"]).execute()
return identifier
@property
def no_blue(self)->"Token": return self
def __repr__(self)->str:
return f"<Token: {self.repr1()}>"
[docs] @staticmethod
def deserialize(s: str|bytes)->"Token":
"""
See documentation of :meth:`TokenList.deserialize`.
Always return a single token.
"""
t=TokenList.deserialize(s)
assert len(t)==1
return t[0]
[docs] @staticmethod
def deserialize_bytes(data: bytes)->"Token":
"""
See documentation of :meth:`TokenList.deserialize_bytes`.
Always return a single token.
"""
if engine.is_unicode:
return Token.deserialize(data.decode('u8'))
else:
return Token.deserialize(data)
@typing.overload
@staticmethod
def get_next()->Token: ...
@typing.overload
@staticmethod
def get_next(count: int)->TokenList: ...
[docs] @staticmethod
def get_next(count: Optional[int]=None)->Token|TokenList:
r"""
Get the following token.
.. note::
in LaTeX3 versions without the commit https://github.com/latex3/latex3/commit/24f7188904d6
sometimes this may error out.
.. note::
because of the internal implementation of ``\peek_analysis_map_inline:n``, this may
tokenize up to 2 tokens ahead (including the returned token),
as well as occasionally return the wrong token in unavoidable cases.
"""
if count is None: return Token.deserialize_bytes(
typing.cast(Callable[[], TTPRawLine], Python_call_TeX_local(
r"""
\cs_new_protected:Npn \__get_next_callback #1 {
\peek_analysis_map_break:n { \pythonimmediatecontinue {^^J#1} }
}
\cs_new_protected:Npn %name% {
\peek_analysis_map_inline:n {
\__tlserialize_char_unchecked:nNnN {##2}##3{##1} \__get_next_callback
}
}
""", recursive=False))())
assert count>=0
return TokenList([Token.get_next() for __ in range(count)])
[docs] @staticmethod
def peek_next()->"Token":
"""
Get the following token without removing it from the input stream.
Equivalent to :meth:`get_next` then :meth:`put_next` immediately. See documentation of :meth:`get_next` for some notes.
"""
t=Token.get_next()
t.put_next()
return t
[docs] def defined(self)->bool:
"""
Return whether this token is defined, that is, its meaning is not ``undefined``.
"""
assert self.assignable
return not BalancedTokenList([T.ifx, self, T["@undefined"], Catcode.other("1"), T.fi]).expand_x()
[docs] def put_next(self)->None:
d=self.degree()
if d==0:
BalancedTokenList([self]).put_next()
else:
assert isinstance(self, CharacterToken)
if not engine.is_unicode and self.index>=256:
raise ValueError("Cannot put this token for non-Unicode engine!")
if d==1:
typing.cast(Callable[[PTTInt], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__index)%
\__put_next_unbalanced:n 1
}
""", recursive=False, sync=True))(PTTInt(self.index))
else:
assert d==-1
typing.cast(Callable[[PTTInt], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__index)%
\__put_next_unbalanced:n 2
}
""", recursive=False, sync=True))(PTTInt(self.index))
[docs] def tl(self, content: Optional[BalancedTokenList]=None, *, global_: bool=False)->BalancedTokenList:
r"""
Manipulate an expl3 tl variable.
>>> BalancedTokenList(r'\tl_set:Nn \l_tmpa_tl {1{2}}').execute()
>>> T.l_tmpa_tl.tl()
<BalancedTokenList: 1₁₂ {₁ 2₁₂ }₂>
>>> T.l_tmpa_tl.tl(BalancedTokenList('3+4'))
<BalancedTokenList: 3₁₂ +₁₂ 4₁₂>
>>> T.l_tmpa_tl.tl()
<BalancedTokenList: 3₁₂ +₁₂ 4₁₂>
"""
if content is not None:
TokenList([T.xdef if global_ else T.edef, self, [T.unexpanded, content]]).execute()
return content
return BalancedTokenList([self]).expand_o()
[docs] def estr(self)->str:
r"""
Expand this token according to :ref:`estr-expansion`.
It's undefined behavior if the expansion result is unbalanced.
>>> T.l_tmpa_tl.tl(BalancedTokenList(r'ab\l_tmpb_tl'))
<BalancedTokenList: a₁₁ b₁₁ \l_tmpb_tl>
>>> T.l_tmpb_tl.tl(BalancedTokenList(r'cd123+$'))
<BalancedTokenList: c₁₁ d₁₁ 1₁₂ 2₁₂ 3₁₂ +₁₂ $₃>
>>> T.l_tmpa_tl.estr()
'abcd123+$'
..seealso::
:meth:`BalancedTokenList.expand_estr`
"""
BalancedTokenList([self]).put_next()
return get_arg_estr()
@typing.overload
def dim(self, unit: DimensionUnit, val: int)->int: ...
@typing.overload
def dim(self, unit: DimensionUnit, val: float)->float: ...
@typing.overload
def dim(self, unit: DimensionUnit, val: Fraction)->Fraction: ...
@typing.overload
def dim(self, unit: DimensionUnit)->Fraction: ...
@typing.overload
def dim(self, unit: str)->Any: ...
@typing.overload
def dim(self, val: float|Fraction, unit: DimensionUnit)->Fraction: ...
@typing.overload
def dim(self)->str: ...
[docs] def dim(self, *args: Any, **kwargs: Any)->Any:
r"""
Manipulate an expl3 dimension variable.
>>> T.l_tmpa_dim.dim("100.5pt")
>>> T.l_tmpa_dim.dim()
'100.5pt'
>>> T.l_tmpa_dim.dim(100.5, "pt")
100.5
>>> T.l_tmpa_dim.dim("pt")
Fraction(201, 2)
>>> T.l_tmpa_dim.dim("1em")
>>> T.l_tmpa_dim.dim(1, "em")
1
>>> T.l_tmpa_dim.dim("em")
Traceback (most recent call last):
...
ValueError: Unknown unit "em"
>>> T.l_tmpa_dim.dim(100.5)
Traceback (most recent call last):
...
ValueError: Explicit unit is required (e.g. "cm")
>>> T.l_tmpa_dim.dim("6586368sp")
>>> T.l_tmpa_dim.dim("sp")
Fraction(6586368, 1)
"""
assert {*kwargs.keys()}<={"val", "unit"}
val, unit=[*args, *kwargs.values(), None, None][:2]
if val is None and unit is None:
return BalancedTokenList([T.the, self]).expand_estr() # dim() -> "100.5pt"
if unit is None and val is not None:
val, unit=unit, val
if val is None and unit is not None:
if isinstance(unit, numbers.Number):
raise ValueError('Explicit unit is required (e.g. "cm")')
assert isinstance(unit, str), unit
if {*string.digits}&{*unit}:
# dim("1pt") -> None
(BalancedTokenList([self])+BalancedTokenList.fstr(f"={unit}")).execute()
return None
else:
# dim("pt") -> 201/2
result_sp=BalancedTokenList([T.number, self]).expand_o().int()
return convert_unit(result_sp, "sp", to=typing.cast(DimensionUnit, unit))
# dim(1.3, "pt") -> 1.3
assert unit is not None
assert val is not None
if isinstance(val, str): val, unit=unit, val
(BalancedTokenList([self])+BalancedTokenList.fstr(f"={float(val):.6f}{unit}")).execute()
# let TeX do the conversion (this will allow em and ex etc.)
return val
[docs] def str(self, val: Optional[str]=None)->str:
r"""
Manipulate an expl3 str variable.
>>> BalancedTokenList(r'\str_set:Nn \l_tmpa_str {a+b}').execute()
>>> T.l_tmpa_str.str()
'a+b'
>>> T.l_tmpa_str.str('e+f')
'e+f'
>>> T.l_tmpa_str.str()
'e+f'
>>> T.l_tmpa_str.str('e+f\ng')
'e+f\ng'
>>> T.l_tmpa_str.str()
'e+f\ng'
"""
if val is not None:
if PTTVerbatimLine(val).valid():
typing.cast(Callable[[PTTBalancedTokenList, PTTVerbatimLine], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__container)%
%read_arg1(\__value)%
\expandafter \let \__container \__value
\pythonimmediatecontinuenoarg
}
""", recursive=False, sync=True))(PTTBalancedTokenList(BalancedTokenList([self])), PTTVerbatimLine(val))
elif PTTBlock(val).valid():
typing.cast(Callable[[PTTBalancedTokenList, PTTBlock], None], Python_call_TeX_local(
r"""
\cs_new:Npn \__remove_nl_relax #1 ^^J \relax {#1}
\cs_new_protected:Npn %name% {
%read_arg0(\__container)%
%read_arg1(\__value)%
\expandafter \__str_continue \expandafter {
\exp:w \expandafter \expandafter \expandafter \exp_end: \expandafter
\__remove_nl_relax \__value \relax }
\pythonimmediatecontinuenoarg
}
\cs_new_protected:Npn \__str_continue { \expandafter \def \__container }
""", recursive=False, sync=True))(PTTBalancedTokenList(BalancedTokenList([self])), PTTBlock(val))
else:
self.tl(BalancedTokenList.fstr(val))
return val
t=self.tl()
try: return t.str()
except ValueError: raise ValueError(f"Token contains {t} which is not a string!")
[docs] def int(self, val: Optional[int]=None)->int:
r"""
Manipulate an expl3 int variable.
>>> BalancedTokenList(r'\int_set:Nn \l_tmpa_int {5+6}').execute()
>>> T.l_tmpa_int.int()
11
.. seealso:: :data:`count`.
"""
if val is not None:
(BalancedTokenList([self])+BalancedTokenList.fstr('=' + str(val))).execute()
return val
return BalancedTokenList([T.the, self]).expand_o().int()
[docs] def bool(self)->bool:
r"""
Manipulate an expl3 bool variable.
>>> BalancedTokenList(r'\bool_set_true:N \l_tmpa_bool').execute()
>>> T.l_tmpa_bool.bool()
True
"""
return bool(len(BalancedTokenList([r"\bool_if:NT", self, "1"]).expand_x()))
# TeX code for serializing and deserializing a token list.
# Convert a token list from/to a string.
# functions moved outside in commit 37888c65ecd96f636ea41cf5cacd1763258eff4c.
# Probably a better idea but doesn't seem to be much faster.
mark_bootstrap(
r"""
\precattl_exec:n {
\def \__tldeserialize_start #1 { \csname #1 \endcsname }
\def \cC{__tldeserialize_\^} #1 #2 { \csname #1 \expandafter \expandafter \expandafter \endcsname \char_generate:nn {`#2-64} {12} }
\def \cC{__tldeserialize_\>} #1 #2 \cO\ { \csname #1 \endcsname #2 \cU\ }
\def \cC{__tldeserialize_\*} #1 #2 \cO\ #3 { \csname #1 \endcsname #2 \char_generate:nn {`#3-64} {12} }
\def \cC{__tldeserialize_\\} #1 \cO\ #2 { \unexpanded \expandafter \expandafter \expandafter { \expandafter \noexpand \csname #1 \endcsname } \csname #2 \endcsname }
\def \cC{__tldeserialize_1} #1 #2 { \char_generate:nn {`#1} {1} \csname #2 \endcsname }
\def \cC{__tldeserialize_2} #1 #2 { \char_generate:nn {`#1} {2} \csname #2 \endcsname }
\def \cC{__tldeserialize_3} #1 #2 { \char_generate:nn {`#1} {3} \csname #2 \endcsname }
\def \cC{__tldeserialize_4} #1 #2 { \char_generate:nn {`#1} {4} \csname #2 \endcsname }
\def \cC{__tldeserialize_6} #1 #2 { ## \char_generate:nn {`#1} {6} \csname #2 \endcsname }
\def \cC{__tldeserialize_7} #1 #2 { \char_generate:nn {`#1} {7} \csname #2 \endcsname }
\def \cC{__tldeserialize_8} #1 #2 { \char_generate:nn {`#1} {8} \csname #2 \endcsname }
\def \__tldeserialize_A #1 #2 { \char_generate:nn {`#1} {10} \csname #2 \endcsname }
\def \__tldeserialize_B #1 #2 { \char_generate:nn {`#1} {11} \csname #2 \endcsname }
\def \__tldeserialize_C #1 #2 { #1 \csname #2 \endcsname }
\def \__tldeserialize_helper { \expandafter \exp_end: \noexpand }
\def \__tldeserialize_D #1 #2 { \unexpanded \expandafter { \exp:w \expandafter \expandafter \expandafter \__tldeserialize_helper \char_generate:nn {`#1} {13} } \csname #2 \endcsname }
\def \__tldeserialize_R #1 { \cFrozenRelax \csname #1 \endcsname }
% here #1 is the target token list to store the result to, #2 is a string with the final '.'.
% normally LaTeX3 token list cannot hold outer tokens, so we use \xdef.
\cs_new_protected:Npn \__tldeserialize_dot:Nn #1 #2 {
\begingroup
%\tl_gset:Nn \__gtmp {#2}
%\tl_greplace_all:Nnn \__gtmp {~} {\cO\ }
\tl_gset:Nx \__gtmp {\cC{_ _kernel_str_to_other_fast:n}{#2}}
\let \^ \cC{__tldeserialize_\^}
\let \> \cC{__tldeserialize_\>}
\let \* \cC{__tldeserialize_\*}
\let \\ \cC{__tldeserialize_\\}
\let \1 \cC{__tldeserialize_1}
\let \2 \cC{__tldeserialize_2}
\let \3 \cC{__tldeserialize_3}
\let \4 \cC{__tldeserialize_4}
\let \6 \cC{__tldeserialize_6}
\let \7 \cC{__tldeserialize_7}
\let \8 \cC{__tldeserialize_8}
\let \A \__tldeserialize_A
\let \B \__tldeserialize_B
\let \C \__tldeserialize_C
\let \D \__tldeserialize_D
\let \R \__tldeserialize_R
\let \. \empty
\xdef \__gtmp {\expandafter \__tldeserialize_start \__gtmp}
\endgroup
\tl_set_eq:NN #1 \__gtmp
}
}
% deserialize as above but #2 does not end with '.'.
\cs_new_protected:Npn \__tldeserialize_nodot:Nn #1 #2 {
\__tldeserialize_dot:Nn #1 {#2 .}
}
""")
# callback will be called exactly once with the serialized result (either other or space catcode)
# and, as usual, with nothing leftover following in the input stream
# the token itself can be gobbled or \edef-ed to discard it.
# if it's active outer or control sequence outer then gobble fails.
# if it's { or } then edef fails.
@mark_bootstrap
def _tlserialize(engine: Engine)->str:
#if engine.name=="luatex": return ""
return (
r"""
\cs_new_protected:Npn \__char_unchecked:nNnN #char #cat {
\int_compare:nNnTF {
\if #cat 1 1 \fi
\if #cat 2 1 \fi
0
} = {0} {
% it's neither 1 nor 2, can edef
\tl_set:Nn \__process_after_edef { \__continue_after_edef {#char} #cat }
\afterassignment \__process_after_edef
\edef \__the_token
} {
% it's either 1 or 2
% might not be able to edef, but can gobble
\__process_gobble {#char} #cat
}
}
\precattl_exec:n {
\def \__frozen_relax_container { \cFrozenRelax }
\def \__null_cs_container { \cC{} }
%\edef \__endwrite_container { \noexpand \cEndwrite }
%\tl_if_eq:NnT \__endwrite_container { \cC{cEndwrite} } {
% \errmessage { endwrite~token~not~supported }
%}
\cs_new:Npn \__prefix_escaper #1 {
\ifnum 0<\__if_weird_charcode_or_space:n {`#1} ~
*
\fi
}
\cs_new:Npn \__content_escaper #1 {
\ifnum 0<\__if_weird_charcode_or_space:n {`#1} ~
\cO\ \char_generate:nn {`#1+64} {12}
\else
#1
\fi
}
% fully expand to zero if #1 is not weird, otherwise expand to nonzero
% weird means as can be seen below <32 or =127 (those that will be ^^-escaped without -8bit)
% XeLaTeX also make 80..9f weird
\cs_new:Npn \__if_weird_charcode:n #1 {
\ifnum #1 < 32 ~ 1 \fi
\ifnum #1 > 126 ~ \ifnum #1 < 160 ~ 1 \fi \fi
0
}
\cs_new:Npn \__if_weird_charcode_or_space:n #1 {
\ifnum #1 < 33 ~ 1 \fi
\ifnum #1 > 126 ~ \ifnum #1 < 160 ~ 1 \fi \fi
0
}
\cs_new_protected:Npn \__continue_after_edef #char #cat #callback {
\token_if_eq_charcode:NNTF #cat 0 {
\tl_if_eq:NNTF \__the_token \__frozen_relax_container {
#callback {\cO{ R }}
} {
\tl_if_eq:NNTF \__the_token \__null_cs_container {
#callback {\cO{ \\\ }}
} {
\tl_set:Nx \__name { \expandafter \cs_to_str:N \__the_token }
\exp_args:Nx #callback {
\str_map_function:NN \__name \__prefix_escaper
\cO\\
\str_map_function:NN \__name \__content_escaper
\cO\ }
}
}
} {
\exp_args:Nx #callback {
\ifnum 0<\__if_weird_charcode:n {#char} ~
\cO{^} #cat \char_generate:nn {#char+64} {12}
\else
#cat \expandafter \string \__the_token
\fi
}
}
}
"""
.replace("#char", "#1")
.replace("#cat", "#2")
.replace("#callback", "#3")
+
r"""
\cs_new_protected:Npn \__process_gobble #char #cat #token #callback {
\exp_args:Nx #callback {
\ifnum 0<\__if_weird_charcode:n {#char} ~
\cO{^} #cat \char_generate:nn {#char+64} {12}
\else
#cat \expandafter \string #token
\fi
}
}
"""
.replace("#char", "#1")
.replace("#cat", "#2")
.replace("#token", "#3")
.replace("#callback", "#4")
+
r"""
}
"""
+
(
"" if engine.name=="luatex" else
r"""
% serialize token list in #2 store to #1.
\cs_new_protected:Npn \__nodot_unchecked:Nn #1 #2 {
\tl_build_begin:N #1
\tl_set:Nn \__callback { \tl_build_put_right:Nn #1 }
\tl_analysis_map_inline:nn {#2} {
\__char_unchecked:nNnN {##2}##3{##1} \__callback
}
\tl_build_end:N #1
}
""")
+
r"""
% serialize token list in #2 store to #1. Call T or F branch depends on whether serialize is successful.
% #1 must be different from \__tmp.
\cs_new_protected:Npn \__nodot:NnTF #1 #2 {
\tl_if_eq:onTF {\detokenize{#2}} {#2} \__nodot_string:NnTF \__nodot_general:NnTF
#1 {#2}
}
\cs_generate_variant:Nn \tl_if_eq:nnTF {o}
% same as above but #1 is guaranteed to be string
\precattl_exec:n{
\cs_new_protected:Npn \__nodot_string:NnTF #1 #2 #3 #4 {
%\tl_set:Nx #1 { \cC{_ _kernel_str_to_other_fast:n}{#2} }
%\tl_set:Nx #1 { \cO\s \tl_map_function:NN #1 \__process_string }
\tl_set:Nx #1 { \cO\s \str_map_function:nN {#2} \__process_string }
#3
}
}
% <string> serialize to 's<the string itself>' with weird characters become \xa0 + (weird character + 64)
% note that TeX-side deserialization does not handle this but it's not needed
% refer to __if_weird_charcode for detail
\cs_new:Npn \__if_weird_charcode_or_esc:n #1 {
\ifnum #1 < 32 ~ 1 \fi
\ifnum #1 > 126 ~ \ifnum #1 < 161 ~ 1 \fi \fi
0
}
\precattl_exec:n{
\cs_new:Npn \__process_string #1 { % similar to \__content_escaper
\ifnum 0<\__if_weird_charcode_or_esc:n {`#1} ~
\cO\^^a0 \char_generate:nn {`#1+64} {12}
\else
#1
\fi
}
}
"""
).replace("__", "__tlserialize_")
mark_bootstrap(
r"""
% same as above but #1 is guaranteed to be not-string
\cs_new_protected:Npn \__tlserialize_nodot_general:NnTF #1 #2 {
\__tlserialize_nodot_unchecked:Nn #1 {#2}
\__tldeserialize_nodot:NV \__tlserialize_nodot_tmp #1
\tl_if_eq:NnTF \__tlserialize_nodot_tmp {#2} % dangling
}
\cs_new_protected:Npn \__tlserialize_nodot:NnF #1 #2 {
\__tlserialize_nodot:NnTF #1 {#2} {} % dangling
}
\cs_new_protected:Npn \__tlserialize_nodot:NnT #1 #2 #3 { \__tlserialize_nodot:NnTF #1 {#2} {#3} {} }
\msg_new:nnn {pythonimmediate} {cannot-serialize} {Token~list~cannot~be~serialized~<#1>}
\cs_new_protected:Npn \__tlserialize_nodot:Nn #1 #2{
\__tlserialize_nodot:NnF #1 {#2} {
\msg_error:nnx {pythonimmediate} {cannot-serialize} {\detokenize{#2} -> #1}
}
}
\cs_generate_variant:Nn \__tldeserialize_dot:Nn {NV}
\cs_generate_variant:Nn \__tldeserialize_nodot:Nn {NV}
\cs_generate_variant:Nn \__tlserialize_nodot:Nn {NV}
""")
enable_get_attribute=True
if is_sphinx_build:
enable_get_attribute=False # otherwise it conflicts with sphinx-autodoc's mechanism to inspect the objects
[docs]class ControlSequenceTokenMaker:
r"""
Shorthand to create :class:`ControlSequenceToken` objects in Python easier.
>>> from pythonimmediate import T
>>> assert T is ControlSequenceToken.make
>>> T.hello
<Token: \hello>
>>> T["a@b"] # for the "harder to construct" tokens
<Token: \a@b>
>>> P=ControlSequenceTokenMaker("__mymodule_")
>>> P.a
<Token: \__mymodule_a>
"""
def __init__(self, prefix: str)->None:
assert all(ord(c) <= 0x7f for c in prefix), "Prefix containing non-ASCII characters is not supported because of complexities with is_unicode (see documentation of ControlSequenceToken)"
self.prefix=prefix
if enable_get_attribute:
def __getattribute__(self, a: str)->"ControlSequenceToken":
return ControlSequenceToken(object.__getattribute__(self, "prefix")+a)
else:
def __getattr__(self, a: str)->"ControlSequenceToken":
return ControlSequenceToken(object.__getattribute__(self, "prefix")+a)
def __getitem__(self, a: str|bytes)->"ControlSequenceToken":
if isinstance(a, bytes):
return ControlSequenceToken(object.__getattribute__(self, "prefix").encode('u8')+a)
return ControlSequenceToken(object.__getattribute__(self, "prefix")+a)
[docs]class ControlSequenceToken(Token):
r"""
Represents a control sequence::
>>> ControlSequenceToken("abc")
<Token: \abc>
>>> ControlSequenceToken([97, 98, 99])
<Token: \abc>
The preferred way to construct a control sequence is :data:`T`.
Some care is needed to construct control sequence tokens whose name contains Unicode characters,
as the exact token created depends on whether the engine is Unicode-based:
>>> with default_engine.set_engine(None): # if there's no default_engine...
... ControlSequenceToken("×") # this will raise an error
Traceback (most recent call last):
...
AssertionError: Cannot construct a control sequence with non-ASCII characters without specifying is_unicode
The same control sequences may appear differently on Unicode and non-Unicode engines, and conversely,
different control sequences may appear the same between Unicode and non-Unicode engines::
>>> a = ControlSequenceToken("u8:×", is_unicode=False)
>>> a
<Token: \u8:×>
>>> a == ControlSequenceToken(b"u8:\xc3\x97", is_unicode=False)
True
>>> a.codes
(117, 56, 58, 195, 151)
>>> b = ControlSequenceToken("u8:×", is_unicode=True)
>>> b
<Token: \u8:×>
>>> b.codes
(117, 56, 58, 215)
>>> a == b
False
>>> a == ControlSequenceToken("u8:\xc3\x97", is_unicode=True)
True
Generally, the default way to construct the control sequence will give you what you want.
>>> with ChildProcessEngine("pdftex") as engine, default_engine.set_engine(engine):
... print(T["u8:×"].meaning_str())
... print(T["u8:×".encode('u8')].meaning_str())
macro:->\IeC {\texttimes }
macro:->\IeC {\texttimes }
>>> with ChildProcessEngine("luatex") as engine, default_engine.set_engine(engine):
... print(C.active("\xAD").meaning_str()) # discretionary hyphen
... BalancedTokenList([r"\expandafter\def\csname\string", C.active("\xAD"), r"\endcsname{123}"]).execute()
... print(T["\xAD"].meaning_str()) # just a convoluted test since no control sequence with non-ASCII name is defined by default in LuaTeX (that I know of)
macro:->\-
macro:->123
*is_unicode* will be fetched from :const:`~engine.default_engine`
if not explicitly specified.
"""
_codes: Tuple[int, ...] # this is the only thing that is guaranteed to be defined.
_csname: Optional[str] # defined if csname is representable as a str. The same control sequence may be represented differently depends on is_unicode.
_csname_bytes: Optional[bytes] # defined if csname is representable as a bytes.
def __init__(self, csname: Union[str, bytes, list[int], tuple[int, ...]], is_unicode: Optional[bool]=None)->None:
if is_unicode is None and default_engine.engine is not None:
is_unicode = default_engine.is_unicode
if isinstance(csname, (list, tuple)):
self._codes = tuple(csname)
self._csname = "".join(chr(c) for c in csname)
return
if is_unicode is None:
# check csname can only be interpreted as one way (i.e. all codes ≤ 0x7f)
if isinstance(csname, str):
assert all(ord(c) <= 0x7f for c in csname), "Cannot construct a control sequence with non-ASCII characters without specifying is_unicode"
else:
assert all(c <= 0x7f for c in csname), "Cannot construct a control sequence with non-ASCII characters without specifying is_unicode"
if isinstance(csname, str):
self._csname = csname
self._csname_bytes = csname.encode("u8")
if is_unicode:
self._codes = tuple(ord(c) for c in csname)
else:
self._codes = tuple(self._csname_bytes)
else:
assert is_unicode in (None, False), "Cannot construct control sequence from bytes if is_unicode"
self._csname_bytes = csname
try: self._csname = csname.decode('u8')
except UnicodeDecodeError: self._csname = None
self._codes = tuple(self._csname_bytes)
def __eq__(self, other: Any)->bool:
if not isinstance(other, ControlSequenceToken): return False
return self._codes == other._codes
def __hash__(self)->int:
return hash(self._codes)
@property
def codes(self)->Tuple[int, ...]:
r"""
Return the codes of this control sequence -- that is, if ``\detokenize{...}`` is applied on this token,
the tokens with the specified character codes (plus ``\escapechar``) will result.
"""
return self._codes
@property
def csname(self)->str:
r"""
Return some readable name of the control sequence. Might return ``None`` if the name is not representable in UTF-8.
"""
assert self._csname is not None
return self._csname
@property
def csname_bytes(self)->bytes:
assert self._csname_bytes is not None
return self._csname_bytes
make=typing.cast(ControlSequenceTokenMaker, None) # some interference makes this incorrect. Manually assign below
"""
Refer to the documentation of :class:`ControlSequenceTokenMaker`.
"""
can_blue=True
@property
def assignable(self)->bool:
return True
def __str__(self)->str:
if not self._codes: return r"\csname\endcsname"
if self._csname is not None:
return "\\"+self._csname
return "\\"+repr(self._csname_bytes)
[docs] def serialize(self)->str:
return (
"*"*sum(1 for x in self._codes if x<33) +
"\\" +
"".join(' '+chr(x+64) if x<33 else chr(x) for x in self._codes)
+ " ")
def repr1(self)->str:
if self._csname is not None:
return f"\\" + repr(self._csname.replace(' ', "␣"))[1:-1]
return f"\\" + repr(self._csname_bytes).replace(' ', "␣")
[docs] def simple_detokenize(self, get_catcode: Callable[[int], Catcode])->str:
if not self.csname:
raise NotImplementedError("This isn't simple!")
if len(self.csname)>1 or get_catcode(ord(self.csname))==Catcode.letter:
for ch in self.csname:
if get_catcode(ord(ch))!=Catcode.letter:
raise NotImplementedError("This isn't simple!")
return "\\"+self.csname+" "
return "\\"+self.csname
ControlSequenceToken.make=ControlSequenceTokenMaker("")
T=ControlSequenceToken.make
"""
See :class:`ControlSequenceTokenMaker`.
"""
P=ControlSequenceTokenMaker("_pythonimmediate_") # create private tokens
if enable_get_attribute:
assert isinstance(T.testa, ControlSequenceToken)
[docs]class Catcode(enum.Enum):
"""
Enum, consist of ``begin_group``, ``end_group``, etc.
The corresponding enum value is the [TeX] code for the catcode:
>>> Catcode.letter.value
11
This class contains a shorthand to allow creating a token with little Python code.
The individual :class:`Catcode` objects
can be called with either a character or a character code to create the object::
>>> C.letter("a") # creates a token with category code letter and character code "a"=chr(97)
<Token: a₁₁>
>>> C.letter(97) # same as above
<Token: a₁₁>
Both of the above forms are equivalent to ``CharacterToken(index=97, catcode=Catcode.letter)``.
Another shorthand is available to check if a token has a particular catcode.
Note that it is not safe to access :attr:`CharacterToken.catcode` directly, as it is
not available for all tokens.
>>> C.letter("a") in C.letter
True
>>> C.letter("a") in C.space
False
>>> T.a in C.letter
False
>>> C.letter("a").catcode==C.letter
True
>>> T.a.catcode==C.letter
Traceback (most recent call last):
...
AttributeError: 'ControlSequenceToken' object has no attribute 'catcode'
The behavior with blue tokens might be unexpected, be careful::
>>> C.active("a").blue in C.active
True
>>> T.a.blue in C.letter
False
>>> T.a.blue in C.active
False
See also :ref:`token-list-construction` for more ways of constructing token lists.
"""
begin_group=bgroup=1
end_group=egroup=2
math_toggle=math=3
alignment=4
parameter=param=6
math_superscript=superscript=7
math_subscript=subscript=8
space=10
letter=11
other=12
active=13
escape=0
end_of_line=paragraph=line=5
ignored=9
comment=14
invalid=15
@property
def for_token(self)->bool:
"""
Return whether a :class:`CharacterToken` may have this catcode.
>>> Catcode.escape.for_token
False
>>> Catcode.letter.for_token
True
"""
return self not in (Catcode.escape, Catcode.line, Catcode.ignored, Catcode.comment, Catcode.invalid)
def __call__(self, ch: Union[str, bytes, int])->"CharacterToken":
if isinstance(ch, str): ch=ord(ch)
elif isinstance(ch, bytes):
if len(ch)!=1: raise ValueError("bytes must have length 1, received "+repr(ch))
ch=ch[0]
return CharacterToken(ch, self)
def __contains__(self, t: NToken)->bool:
t=t.no_blue
return isinstance(t, CharacterToken) and t.catcode==self
[docs] @staticmethod
def lookup(x: int)->Catcode:
"""
Construct from [TeX] code.
>>> C.lookup(11)
<Catcode.letter: 11>
"""
return _catcode_value_to_member[x]
_catcode_value_to_member = {item.value: item for item in Catcode}
C=Catcode
[docs]@dataclass(repr=False, frozen=True) # must be frozen because bgroup and egroup below are reused
class CharacterToken(Token):
"""
Represent a character token. The preferred way to construct a character token
is using :data:`C`.
"""
index: int
"""
The character code of this token.
>>> C.letter("a").index
97
"""
catcode: Catcode
"""
>>> C.letter("a").catcode
<Catcode.letter: 11>
Note that it is recommended to use the shorthand documented in :class:`Catcode` to
check the catcode of a token instead:
>>> C.letter("a") in C.letter
True
"""
@property
def can_blue(self)->bool:
return self.catcode==Catcode.active
@property
def chr(self)->str:
"""
The character of this token.
>>> C.letter("a").chr
'a'
"""
return chr(self.index)
def __post_init__(self)->None:
assert isinstance(self.index, int)
assert self.index>=0
assert self.catcode.for_token
def __str__(self)->str:
return self.chr
[docs] def serialize(self)->str:
if self.index<0x10:
return f"^{self.catcode.value:X}{chr(self.index+0x40)}"
else:
return f"{self.catcode.value:X}{self.chr}"
def repr1(self)->str:
cat=str(self.catcode.value).translate(str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉"))
return f"{repr(self.chr)[1:-1]}{cat}"
@property
def assignable(self)->bool:
return self.catcode==Catcode.active
[docs] def degree(self)->int:
if self.catcode==Catcode.bgroup:
return 1
elif self.catcode==Catcode.egroup:
return -1
else:
return 0
def is_str(self)->bool:
catcode=Catcode.space if self.index==32 else Catcode.other
return catcode==self.catcode
[docs] def str_code(self)->int:
if not self.is_str(): raise ValueError("this CharacterToken does not represent a string!")
return self.index
[docs] def simple_detokenize(self, get_catcode: Callable[[int], Catcode])->str:
return self.chr
[docs]class _FrozenRelaxToken(Token):
r"""
>>> frozen_relax_token
<Token: [frozen]\relax>
>>> BalancedTokenList(r'\ifnum 0=0\fi').expand_x()
<BalancedTokenList: [frozen]\relax>
:meta public:
"""
can_blue=False
assignable=False
def __str__(self)->str:
return r"\relax"
[docs] def serialize(self)->str:
return "R"
def repr1(self)->str:
return r"[frozen]\relax"
[docs] def simple_detokenize(self, get_catcode: Callable[[int], Catcode])->str:
raise NotImplementedError("This isn't simple!")
frozen_relax_token=_FrozenRelaxToken()
r"""
Constant representing the frozen ``\relax`` token. See :class:`_FrozenRelaxToken`.
"""
# other special tokens later...
bgroup=Catcode.bgroup("{")
egroup=Catcode.egroup("}")
space=Catcode.space(" ")
[docs]@dataclass(frozen=True)
class BlueToken(NToken):
"""
Represents a blue token (see documentation of :class:`NToken`).
"""
token: Token
@property
def noexpand(self)->"BlueToken": return self
@property
def no_blue(self)->"Token": return self.token
def __str__(self)->str: return str(self.token)
def repr1(self)->str: return "notexpanded:"+self.token.repr1()
[docs] def put_next(self)->None:
typing.cast(Callable[[PTTBalancedTokenList], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn \__put_next_blue_tmp {
%optional_sync%
\expandafter \pythonimmediatelisten \noexpand
}
\cs_new_protected:Npn %name% {
%read_arg0(\__target)%
\expandafter \__put_next_blue_tmp \__target
}
""", recursive=False))(PTTBalancedTokenList(BalancedTokenList([self.token])))
doc_catcode_table: Dict[int, Catcode]={}
doc_catcode_table[ord("{")]=Catcode.begin_group
doc_catcode_table[ord("}")]=Catcode.end_group
doc_catcode_table[ord("$")]=Catcode.math_toggle
doc_catcode_table[ord("&")]=Catcode.alignment
doc_catcode_table[ord("#")]=Catcode.parameter
doc_catcode_table[ord("^")]=Catcode.math_superscript
doc_catcode_table[ord("_")]=Catcode.math_subscript
doc_catcode_table[ord(" ")]=Catcode.space
doc_catcode_table[ord("~")]=Catcode.active
for ch in range(ord('a'), ord('z')+1): doc_catcode_table[ch]=Catcode.letter
for ch in range(ord('A'), ord('Z')+1): doc_catcode_table[ch]=Catcode.letter
doc_catcode_table[ord("\\")]=Catcode.escape
doc_catcode_table[ord("%")]=Catcode.comment
e3_catcode_table=dict(doc_catcode_table)
e3_catcode_table[ord("_")]=Catcode.letter
e3_catcode_table[ord(":")]=Catcode.letter
e3_catcode_table[ord(" ")]=Catcode.ignored
e3_catcode_table[ord("\t")]=Catcode.ignored
e3_catcode_table[ord("~")]=Catcode.space
TokenListType = typing.TypeVar("TokenListType", bound="TokenList")
if typing.TYPE_CHECKING:
TokenListBaseClass = collections.UserList[Token]
else: # Python 3.8 compatibility
TokenListBaseClass = collections.UserList
def TokenList_e3(s: str)->TokenList: return TokenList.e3(s)
[docs]class UnbalancedTokenListError(ValueError):
"""
Exception raised when a token list is unbalanced.
"""
[docs]class TokenList(TokenListBaseClass):
r"""
Represent a [TeX] token list, none of which can contain a blue token.
The class can be used identical to a Python list consist of :class:`Token` objects,
plus some additional methods to operate on token lists.
The list of tokens represented by this class does not need to be balanced.
Usually you would want to use :class:`BalancedTokenList` instead.
.. _token-list-construction:
Token list construction
-----------------------
The constructor of this class accepts parameters in various different forms to allow convenient
construction of token lists.
Most generally, you can construct a token list from any iterable consist of (recursively) iterables,
or tokens, or strings. For example::
>>> TokenList([Catcode.letter("a"), "bc", [r"def\gh"]])
<TokenList: a₁₁ b₁₁ c₁₁ {₁ d₁₁ e₁₁ f₁₁ \gh }₂>
This will make `a` be the token list with value ``abc{def\gh }``.
Note that the list that is recursively nested inside is used to represent a nesting level.
A string will be "flattened" into the closest level, but a token list will not be flattened --
they can be manually flattened with Python ``*`` syntax.
As a special case, you can construct from a string::
>>> TokenList(r"\let \a \b")
<TokenList: \let \a \b>
The constructor of other classes such as :class:`BalancedTokenList` and :class:`NTokenList`
works the same way.
The above working implies that:
- If you construct a token list from an existing token list, it will be copied (because a :class:`TokenList`
is a ``UserList`` of tokens, and iterating over it gives :class:`Token` objects),
similar to how you can copy a list with the ``list`` constructor::
>>> a = TokenList(["hello world"])
>>> b = TokenList(a)
>>> b
<TokenList: h₁₁ e₁₁ l₁₁ l₁₁ o₁₁ w₁₁ o₁₁ r₁₁ l₁₁ d₁₁>
>>> a==b
True
>>> a is b
False
- Construct a token list from a list of tokens::
>>> TokenList([Catcode.letter("a"), Catcode.other("b"), T.test])
<TokenList: a₁₁ b₁₂ \test>
The above will define ``a`` to be ``ab\test``, provided ``T`` is
the object referred to in :class:`ControlSequenceTokenMaker`.
See also :class:`Catcode` for the explanation of the ``Catcode.letter("a")`` form.
By default, strings will be converted to token lists using :meth:`TokenList.e3`, although you can customize it by:
- Passing the second argument to the constructor.
- Manually specify the type:
>>> TokenList([T.directlua, [*TokenList.fstr(r"hello%world\?")]])
<TokenList: \directlua {₁ h₁₂ e₁₂ l₁₂ l₁₂ o₁₂ %₁₂ w₁₂ o₁₂ r₁₂ l₁₂ d₁₂ \\₁₂ ?₁₂ }₂>
"""
@staticmethod
def force_token_list(a: Iterable, string_tokenizer: Callable[[str], TokenList])->Iterable[Token]:
for x in NTokenList.force_token_list(a, string_tokenizer):
if not isinstance(x, Token):
raise RuntimeError(f"Cannot make TokenList from object {x} of type {type(x)}")
yield x
[docs] def is_balanced(self)->bool:
"""
See :meth:`NTokenList.is_balanced`.
"""
degree=0
for x in self:
degree+=x.degree()
if degree<0: return False
return degree==0
[docs] def check_balanced(self)->None:
"""
ensure that this is balanced.
:raises UnbalancedTokenListError: if this is not balanced.
"""
if not self.is_balanced():
raise UnbalancedTokenListError(f"Token list {self} is not balanced")
[docs] def balanced_parts(self)->"List[Union[BalancedTokenList, Token]]":
"""
Internal function, used for serialization and sending to [TeX].
Split this :class:`TokenList` into a list of balanced parts and unbalanced ``{``/``}`` tokens.
"""
degree=0
min_degree=0, 0
for i, token in enumerate(self):
degree+=token.degree()
min_degree=min(min_degree, (degree, i+1))
min_degree_pos=min_degree[1]
left_half: List[Union[BalancedTokenList, Token]]=[]
degree=0
last_pos=0
for i in range(min_degree_pos):
d=self[i].degree()
degree+=d
if degree<0:
degree=0
if last_pos!=i:
left_half.append(BalancedTokenList(self[last_pos:i]))
left_half.append(self[i])
last_pos=i+1
if min_degree_pos!=last_pos:
left_half.append(BalancedTokenList(self[last_pos:min_degree_pos]))
right_half: List[Union[BalancedTokenList, Token]]=[]
degree=0
last_pos=len(self)
for i in range(len(self)-1, min_degree_pos-1, -1):
d=self[i].degree()
degree-=d
if degree<0:
degree=0
if i+1!=last_pos:
right_half.append(BalancedTokenList(self[i+1:last_pos]))
right_half.append(self[i])
last_pos=i
if min_degree_pos!=last_pos:
right_half.append(BalancedTokenList(self[min_degree_pos:last_pos]))
return left_half+right_half[::-1]
[docs] def put_next(self)->None:
"""
Put this token list forward in the input stream.
"""
for part in reversed(self.balanced_parts()): part.put_next()
@property
def balanced(self)->"BalancedTokenList":
"""
``self`` must be balanced.
:return: a :class:`BalancedTokenList` containing the content of this object.
"""
return BalancedTokenList(self)
@staticmethod
def _iterable_from_string(s: str, get_catcode: Callable[[int], Catcode])->Iterable[Token]:
"""
Refer to documentation of :meth:`from_string` for details.
"""
i=0
while i<len(s):
ch=s[i]
i+=1
cat=get_catcode(ord(ch))
if cat==Catcode.space:
yield space
# special case: collapse multiple spaces into one but only if character code is space
if get_catcode(32) in (Catcode.space, Catcode.ignored):
while i<len(s) and s[i]==' ':
i+=1
elif cat.for_token:
yield cat(ch) # type: ignore
# temporary, see https://github.com/python/mypy/issues/17222
elif cat==Catcode.ignored:
continue
else:
assert cat==Catcode.escape, f"cannot create TokenList from string containing catcode {cat}"
cat=get_catcode(ord(s[i]))
if cat!=Catcode.letter:
yield ControlSequenceToken(s[i])
i+=1
else:
csname=s[i]
i+=1
while i<len(s) and get_catcode(ord(s[i]))==Catcode.letter:
csname+=s[i]
i+=1
yield ControlSequenceToken(csname)
# special case: remove spaces after control sequence but only if character code is space
if get_catcode(32) in (Catcode.space, Catcode.ignored):
while i<len(s) and s[i]==' ':
i+=1
[docs] @classmethod
def from_string(cls: Type[TokenListType], s: str, get_catcode: Callable[[int], Catcode], endlinechar: str)->TokenListType:
"""
Approximate tokenizer implemented in Python.
Convert a string to a :class:`TokenList` (or some subclass of it such as :class:`BalancedTokenList`) approximately.
This is an internal function and should not be used directly. Use one of :meth:`e3` or :meth:`doc` instead.
These are used to allow constructing a :class:`TokenList` object in Python without being too verbose.
Refer to :ref:`token-list-construction` for alternatives.
The tokenization algorithm is slightly different from [TeX]'s in the following respect:
* multiple spaces are collapsed to one space, but only if it has character code space (32).
i.e. in expl3 catcode, ``~~`` get tokenized to two spaces.
* spaces with character code different from space (32) after a control sequence is not ignored.
i.e. in expl3 catcode, ``~`` always become a space.
* ``^^`` syntax are not supported. Use Python's escape syntax (e.g. ``\x01``) as usual
(of course that does not work in raw Python strings ``r"..."``).
:param get_catcode: A function that given a character code, return its desired category code.
"""
assert len(endlinechar)<=1
return cls(TokenList._iterable_from_string(s.replace('\n', endlinechar), get_catcode))
[docs] @classmethod
def e3(cls: Type[TokenListType], s: str)->TokenListType:
r"""
Approximate tokenizer in expl3 (``\ExplSyntaxOn``) catcode regime.
Refer to documentation of :meth:`from_string` for details.
Usage example::
>>> BalancedTokenList.e3(r'\cs_new_protected:Npn \__mymodule_myfunction:n #1 { #1 #1 }')
<BalancedTokenList: \cs_new_protected:Npn \__mymodule_myfunction:n #₆ 1₁₂ {₁ #₆ 1₁₂ #₆ 1₁₂ }₂>
>>> BalancedTokenList.e3('a\tb\n\nc')
<BalancedTokenList: a₁₁ b₁₁ c₁₁>
"""
return cls.from_string(s, lambda x: e3_catcode_table.get(x, Catcode.other), ' ')
@classmethod
def fstr_if_unicode(cls: Type[TokenListType], s: str|bytes, is_unicode: bool)->TokenListType:
if isinstance(str, bytes):
assert not is_unicode, "Cannot use bytes if is_unicode"
if not is_unicode and isinstance(s, str):
s=s.encode('u8')
return cls(space if ch in (32, ' ') else C.other(ch) for ch in s)
[docs] @classmethod
def fstr(cls: Type[TokenListType], s: str, is_unicode: Optional[bool]=None)->TokenListType:
r"""
Approximate tokenizer in detokenized catcode regime.
Refer to documentation of :meth:`from_string` for details.
``^^J`` (or ``\n``) is used to denote newlines.
>>> BalancedTokenList.fstr('hello world')
<BalancedTokenList: h₁₂ e₁₂ l₁₂ l₁₂ o₁₂ ₁₀ w₁₂ o₁₂ r₁₂ l₁₂ d₁₂>
>>> BalancedTokenList.fstr('ab\\c d\n \t')
<BalancedTokenList: a₁₂ b₁₂ \\₁₂ c₁₂ ₁₀ ₁₀ d₁₂ \n₁₂ ₁₀ \t₁₂>
Some care need to be taken for Unicode strings.
>>> with default_engine.set_engine(None): BalancedTokenList.fstr('α')
Traceback (most recent call last):
...
RuntimeError: Default engine not set for this thread!
>>> with default_engine.set_engine(luatex_engine): BalancedTokenList.fstr('α')
<BalancedTokenList: α₁₂>
>>> BalancedTokenList.fstr('α')
<BalancedTokenList: Î₁₂ ±₁₂>
"""
if is_unicode is None: is_unicode=engine.is_unicode
return cls.fstr_if_unicode(s, is_unicode=is_unicode)
[docs] @classmethod
def doc(cls: Type[TokenListType], s: str)->TokenListType:
r"""
Approximate tokenizer in document (normal) catcode regime.
Refer to documentation of :meth:`from_string` for details.
Usage example::
>>> BalancedTokenList.doc(r'\def\a{b}')
<BalancedTokenList: \def \a {₁ b₁₁ }₂>
>>> BalancedTokenList.doc('}')
Traceback (most recent call last):
...
pythonimmediate.UnbalancedTokenListError: Token list <BalancedTokenList: }₂> is not balanced
>>> BalancedTokenList.doc('\n\n')
Traceback (most recent call last):
...
NotImplementedError: Double-newline to \par not implemented yet!
>>> TokenList.doc('}')
<TokenList: }₂>
"""
if "\n\n" in s:
raise NotImplementedError(r"Double-newline to \par not implemented yet!")
return cls.from_string(s, lambda x: doc_catcode_table.get(x, Catcode.other), ' ')
def __init__(self, a: Iterable=(), string_tokenizer: Callable[[str], TokenList]=TokenList_e3)->None:
"""
Refer to :class:`TokenList` on how to use this function.
"""
super().__init__(TokenList.force_token_list(a, string_tokenizer))
def serialize(self)->str:
return "".join(t.serialize() for t in self)
[docs] def serialize_bytes(self)->bytes:
"""
Internal function.
Given an engine, serialize it in a form that is suitable for writing directly to the engine.
"""
if engine.is_unicode:
return self.serialize().encode('u8')
else:
result=self.serialize()
try:
return bytes(ord(ch) for ch in result)
except ValueError:
raise ValueError("Cannot serialize TokenList for non-Unicode engine!")
[docs] @classmethod
def deserialize(cls: Type[TokenListType], data: str|bytes)->TokenListType:
"""
Internal function?
"""
result: List[Token]=[]
i=0
# hack
data_was_bytes=isinstance(data, bytes)
if isinstance(data, bytes):
data="".join(chr(i) for i in data)
if not data: return cls()
if data[0]=="s":
return cls([
CharacterToken(ord(ch), Catcode.space if ch==' ' else Catcode.other)
for ch in re.sub("\xA0(.)", lambda match: chr(ord(match[1])-0x40), data[1:])
])
while i<len(data):
if data[i] in "\\>*":
start=data.find("\\", i)
pos=start+1
csname=""
for op in data[i:start]:
if op==">":
assert False
elif op=="*":
n=data.find(' ', pos)+2
csname+=data[pos:n-2]+chr(ord(data[n-1])-64)
pos=n
else:
assert False
i=data.find(' ', pos)+1
csname+=data[pos:i-1]
result.append(ControlSequenceToken(
bytes(map(ord, csname)) if data_was_bytes else csname,
is_unicode=not data_was_bytes))
elif data[i]=="R":
result.append(frozen_relax_token)
i+=1
elif data[i]=="^":
result.append(CharacterToken(index=ord(data[i+2])-0x40, catcode=Catcode(int(data[i+1], 16))))
i+=3
else:
result.append(CharacterToken(index=ord(data[i+1]), catcode=Catcode(int(data[i], 16))))
i+=2
return cls(result)
[docs] @classmethod
def deserialize_bytes(cls: Type[TokenListType], data: bytes)->TokenListType:
"""
Internal function.
Given a bytes object read directly from the engine, deserialize it.
"""
if engine.is_unicode:
return cls.deserialize(data.decode('u8'))
else:
return cls.deserialize(data)
def __repr__(self)->str:
return '<' + type(self).__name__ + ': ' + ' '.join(t.repr1() for t in self) + '>'
[docs] def execute(self)->None:
r"""
Execute this token list. It must not "peek ahead" in the input stream.
For example the token list ``\catcode1=2\relax`` can be executed safely
(and sets the corresponding category code),
but there's no guarantee what will be assigned to ``\tmp`` when ``\futurelet\tmp`` is executed.
"""
NTokenList(self).execute()
[docs] def expand_x(self)->"BalancedTokenList":
"""
Return the ``x``-expansion of this token list.
The result must be balanced, otherwise the behavior is undefined.
"""
return NTokenList(self).expand_x()
def is_str(self)->bool:
return all(t.is_str() for t in self)
def simple_detokenize(self, get_catcode: Callable[[int], Catcode])->str:
return "".join(token.simple_detokenize(get_catcode) for token in self)
[docs] def str_codes(self)->list[int]:
"""
``self`` must represent a [TeX] string. (i.e. equal to itself when detokenized)
:return: the string content.
>>> BalancedTokenList("abc").str_codes()
Traceback (most recent call last):
...
ValueError: this CharacterToken does not represent a string!
>>> BalancedTokenList("+-=").str_codes()
[43, 45, 61]
.. note::
In non-Unicode engines, each token will be replaced with a character
with character code equal to the character code of that token.
UTF-8 characters with character code ``>=0x80`` will be represented by multiple
characters in the returned string.
"""
return [t.str_code() for t in self]
[docs] def str_if_unicode(self, unicode: bool=True)->str:
"""
Assume this token list represents a string in a (Unicode/non-Unicode) engine, return the string content.
If the engine is not Unicode, assume the string is encoded in UTF-8.
"""
if unicode:
return "".join(map(chr, self.str_codes()))
else:
return bytes(self.str_codes()).decode('u8')
[docs] def str(self)->str:
"""
``self`` must represent a [TeX] string. (i.e. equal to itself when detokenized)
:return: the string content.
>>> BalancedTokenList([C.other(0xce), C.other(0xb1)]).str()
'α'
>>> with default_engine.set_engine(luatex_engine): BalancedTokenList([C.other('α')]).str()
'α'
"""
return self.str_if_unicode(engine.is_unicode)
[docs] def int(self)->int:
r"""
Assume this token list contains an integer (as valid result of ``\number ...``),
returns the integer value.
At the moment, not much error checking is done.
"""
return int(self.str_if_unicode())
[docs]class ImmutableBalancedTokenList(collections.abc.Sequence, collections.abc.Hashable):
r"""
Represents an immutable balanced token list.
Note that this class is not a subclass of :class:`TokenList`, and is not mutable.
Not many operations are supported. Convert to :class:`BalancedTokenList` to perform more operations.
Its main use is to be used as a key in a dictionary.
>>> a=ImmutableBalancedTokenList(BalancedTokenList.e3(r'\def\a{b}'))
>>> b=ImmutableBalancedTokenList(BalancedTokenList.e3(r'\def\a{b}'))
>>> c=ImmutableBalancedTokenList(BalancedTokenList.e3(r'\def\a{c}'))
>>> hash(a)==hash(b)
True
>>> a==b
True
>>> a!=b
False
>>> a==c
False
>>> a!=c
True
"""
def __init__(self, a: BalancedTokenList)->None:
self._data: Tuple[Token, ...]=tuple(a)
@typing.overload
def __getitem__(self, i: int)->Token: ...
@typing.overload
def __getitem__(self, i: slice)->ImmutableBalancedTokenList: ...
def __getitem__(self, i: int|slice)->Token|ImmutableBalancedTokenList:
if isinstance(i, slice): return ImmutableBalancedTokenList(BalancedTokenList(self._data[i]))
return self._data[i]
def __len__(self)->int:
return len(self._data)
def __repr__(self)->str:
return TokenList.__repr__(self) # type: ignore
def __str__(self)->str:
return repr(self)
def __hash__(self)->int:
return hash(self._data)
def __eq__(self, other: object)->bool:
if not isinstance(other, ImmutableBalancedTokenList): return NotImplemented
return self._data==other._data
[docs]class BalancedTokenList(TokenList):
"""
Represents a balanced token list.
Some useful methods to interact with [TeX]
include :meth:`expand_o`, :meth:`expand_x`, :meth:`get_next` and :meth:`put_next`.
See the corresponding methods' documentation for usage examples.
See also :ref:`token-list-construction` for shorthands to construct token lists in Python code.
.. note::
Runtime checking is not strictly enforced,
use :meth:`~TokenList.is_balanced()` method explicitly if you need to check.
"""
def __init__(self, a: Iterable=(), string_tokenizer: Callable[[str], TokenList]=TokenList.e3)->None:
"""
Constructor.
:raises UnbalancedTokenListError: if the token list is not balanced.
>>> BalancedTokenList("{")
Traceback (most recent call last):
...
pythonimmediate.UnbalancedTokenListError: Token list <BalancedTokenList: {₁> is not balanced
"""
super().__init__(a, string_tokenizer)
self.check_balanced()
[docs] def expand_o(self)->"BalancedTokenList":
"""
Return the ``o``-expansion of this token list.
The result must be balanced, otherwise the behavior is undefined.
"""
return typing.cast(Callable[[PTTBalancedTokenList], TTPBalancedTokenList], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__data)%
\exp_args:NNV \tl_set:No \__data \__data
%sync%
%send_arg0_var(\__data)%
\pythonimmediatelisten
}
""", recursive=expansion_only_can_call_Python))(PTTBalancedTokenList(self))
[docs] def expand_x(self)->"BalancedTokenList":
return typing.cast(Callable[[PTTBalancedTokenList], TTPBalancedTokenList], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__data)%
\tl_set:Nx \__data {\__data}
%sync%
%send_arg0_var(\__data)%
\pythonimmediatelisten
}
""", recursive=expansion_only_can_call_Python))(PTTBalancedTokenList(self))
[docs] def expand_estr(self)->str:
"""
Expand this token list according to :ref:`estr-expansion`.
It's undefined behavior if the expansion result is unbalanced.
"""
BalancedTokenList([self]).put_next()
return get_arg_estr()
[docs] def execute(self)->None:
typing.cast(Callable[[PTTBalancedTokenList], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__data)%
\__data
%optional_sync%
\pythonimmediatelisten
}
"""))(PTTBalancedTokenList(self))
[docs] def put_next(self)->None:
typing.cast(Callable[[PTTBalancedTokenList], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn \__put_next_tmp {
%optional_sync%
\pythonimmediatelisten
}
\cs_new_protected:Npn %name% {
%read_arg0(\__target)%
\expandafter \__put_next_tmp \__target
}
""", recursive=False))(PTTBalancedTokenList(self))
[docs] @staticmethod
def get_next()->"BalancedTokenList":
"""
Get an (undelimited) argument from the [TeX] input stream.
"""
return typing.cast(Callable[[], TTPBalancedTokenList], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% #1 {
%sync%
%send_arg0(#1)%
\pythonimmediatelisten
}
""", recursive=False))()
@staticmethod
def _get_until_raw(delimiter: BalancedTokenList, long: bool)->"BalancedTokenList":
"""
Internal function.
Get a delimited argument from the [TeX] input stream, delimited by `delimiter`.
This works the same way as delimited argument, so in particular the argument must be balanced,
and the delimiter must not contain any ``#`` or braces.
No error-checking is done.
The delimiter itself will also be removed.
As a special case, delimiter can be a token list consist of a single ``#``, in which case the corresponding [TeX] behavior
will be used and it takes from the input stream until a ``{``, and the ``{`` itself will not be removed.
"""
assert delimiter, "Delimiter cannot be empty!"
try:
return typing.cast(Callable[[PTTBalancedTokenList], TTPBalancedTokenList], Python_call_TeX_local(
# '#1' is either \long or [], '#2' is the delimiter
r"""
\cs_new_protected:Npn \__get_until_tmp #1 #2 {
#1 \def \__delimit_tmpii ##1 #2 {
%sync%
%send_arg0(##1)%
\pythonimmediatelisten
}
\__delimit_tmpii
}
\cs_new_protected:Npn %name% {
%read_arg0(\__arg)%
\expandafter \__get_until_tmp \__arg
}
""", recursive=False))(PTTBalancedTokenList(BalancedTokenList([r"\long" if long else [], delimiter])))
except:
print(f"Error in _get_until_raw with delimiter = {delimiter}")
raise
[docs] @staticmethod
def get_until(delimiter: BalancedTokenList, remove_braces: bool=True, long: bool=True)->"BalancedTokenList":
r"""
Get a delimited argument from the [TeX] input stream, delimited by `delimiter`.
The delimiter itself will also be removed from the input stream.
:param long: Works the same as ``\long`` primitive in [TeX] -- if this is ``False``
then [TeX] fatal error ``Runaway argument`` will be raised if there's a ``\par`` token in the argument.
"""
assert delimiter, "Delimiter cannot be empty!"
for t in delimiter:
if isinstance(t, CharacterToken):
assert t.catcode not in [Catcode.bgroup, Catcode.egroup, Catcode.param], f"A token with catcode {t.catcode} cannot be a delimiter!"
if not remove_braces:
auxiliary_token = T.empty
if delimiter[0]==auxiliary_token: auxiliary_token = T.relax
auxiliary_token.put_next()
result = BalancedTokenList._get_until_raw(delimiter, long=long)
if not remove_braces:
assert result[0]==auxiliary_token
del result[0]
return result
[docs] @staticmethod
def get_until_brace(long: bool=True)->"BalancedTokenList":
r"""
Get a TokenList from the input stream delimited by ``{``. The brace is not removed from the input stream.
"""
return BalancedTokenList._get_until_raw(BalancedTokenList("#"), long=long)
[docs] def detokenize(self)->str:
r"""
:return: a string, equal to the result of ``\detokenize`` applied to this token list.
"""
return BalancedTokenList([T.detokenize, self]).expand_x().str()
[docs] def strip_optional_braces(self)->"BalancedTokenList":
"""
Strip the optional braces from the given token list, if the whole token list is wrapped in braces.
For example::
>>> BalancedTokenList("{a}").strip_optional_braces()
<BalancedTokenList: a₁₁>
>>> BalancedTokenList("a").strip_optional_braces()
<BalancedTokenList: a₁₁>
>>> BalancedTokenList("{a},{b}").strip_optional_braces()
<BalancedTokenList: {₁ a₁₁ }₂ ,₁₂ {₁ b₁₁ }₂>
>>> BalancedTokenList([C.begin_group("X"), C.other("a"), C.end_group("Y")]).strip_optional_braces()
<BalancedTokenList: a₁₂>
Note that :class:`BalancedTokenList` is mutable. A copy is returned in any case::
>>> x=BalancedTokenList("a")
>>> y=x.strip_optional_braces()
>>> x is y
False
>>> x.append(C.letter("b"))
>>> x
<BalancedTokenList: a₁₁ b₁₁>
>>> y
<BalancedTokenList: a₁₁>
"""
if self and self[0] in C.begin_group and self[-1] in C.end_group and TokenList(self)[1:-1].is_balanced():
return self[1:-1]
return self[:]
[docs] def split_balanced(self, /, sep: "BalancedTokenList", maxsplit: int=-1, do_strip_braces_in_result: bool=True)->List["BalancedTokenList"]:
r"""
Split the given token list at the given delimiter, but only if the parts are balanced.
:param sep: the delimiter.
:param maxsplit: the maximum number of splits.
:param do_strip_braces_in_result: if ``True``, each element of the result will have the braces stripped, if any.
It is recommended to set this to ``True`` (the default),
otherwise the user will not have any way to "quote" the separator in each entry.
:raises ValueError: if ``self`` or ``sep`` is not balanced.
For example::
>>> BalancedTokenList("a{b,c},c{d}").split_balanced(BalancedTokenList(","))
[<BalancedTokenList: a₁₁ {₁ b₁₁ ,₁₂ c₁₁ }₂>, <BalancedTokenList: c₁₁ {₁ d₁₁ }₂>]
>>> BalancedTokenList("a{b,c},{d,d},e").split_balanced(BalancedTokenList(","), do_strip_braces_in_result=False)
[<BalancedTokenList: a₁₁ {₁ b₁₁ ,₁₂ c₁₁ }₂>, <BalancedTokenList: {₁ d₁₁ ,₁₂ d₁₁ }₂>, <BalancedTokenList: e₁₁>]
>>> BalancedTokenList("a{b,c},{d,d},e").split_balanced(BalancedTokenList(","))
[<BalancedTokenList: a₁₁ {₁ b₁₁ ,₁₂ c₁₁ }₂>, <BalancedTokenList: d₁₁ ,₁₂ d₁₁>, <BalancedTokenList: e₁₁>]
>>> BalancedTokenList.doc(" a = b = c ").split_balanced(BalancedTokenList("="), maxsplit=1)
[<BalancedTokenList: ₁₀ a₁₁ ₁₀>, <BalancedTokenList: ₁₀ b₁₁ ₁₀ =₁₂ ₁₀ c₁₁ ₁₀>]
>>> BalancedTokenList(r"\{,\}").split_balanced(BalancedTokenList(","))
[<BalancedTokenList: \{>, <BalancedTokenList: \}>]
"""
assert maxsplit>=-1, "maxsplit should be either -1 (unbounded) or the maximum number of splits"
assert self.is_balanced(), "Content is not balanced!"
assert sep.is_balanced(), "Separator is not balanced!"
if not sep:
raise ValueError("Empty separator")
result: List[BalancedTokenList]=[]
result_degree=0
remaining=TokenList()
i=0
self_=TokenList(self)
while i<len(self):
if len(result)!=maxsplit and i+len(sep)<=len(self) and self_[i:i+len(sep)]==sep and result_degree==0:
result.append(BalancedTokenList(remaining))
remaining=TokenList()
i+=len(sep)
else:
remaining.append(self[i])
result_degree+=self[i].degree()
assert result_degree>=0, "This cannot happen, the input is balanced"
i+=1
result.append(BalancedTokenList(remaining))
if do_strip_braces_in_result:
return [x.strip_optional_braces() for x in result]
return result
[docs] def strip_spaces(self)->"BalancedTokenList":
r"""
Strip spaces from the beginning and end of the token list.
For example::
>>> BalancedTokenList.doc(" a ").strip_spaces()
<BalancedTokenList: a₁₁>
>>> BalancedTokenList([C.space(' '), C.space(' '), " a b "], BalancedTokenList.doc).strip_spaces()
<BalancedTokenList: a₁₁ ₁₀ b₁₁>
>>> BalancedTokenList().strip_spaces()
<BalancedTokenList: >
Note that only spaces with charcode 32 are stripped::
>>> BalancedTokenList([C.space('X'), C.space(' '), "a", C.space(' ')]).strip_spaces()
<BalancedTokenList: X₁₀ ₁₀ a₁₁>
Similar to :meth:`strip_optional_braces`, a copy is returned in any case::
>>> x=BalancedTokenList("a")
>>> y=x.strip_spaces()
>>> x is y
False
"""
i=0
while i<len(self) and self[i]==C.space(' '):
i+=1
j=len(self)
while j>i and self[j-1]==C.space(' '):
j-=1
return self[i:j]
[docs] def parse_keyval_items(self)->list[tuple[BalancedTokenList, Optional[BalancedTokenList]]]:
r"""
Parse a key-value token list into a list of pairs.
>>> BalancedTokenList("a=b,c=d").parse_keyval_items()
[(<BalancedTokenList: a₁₁>, <BalancedTokenList: b₁₁>), (<BalancedTokenList: c₁₁>, <BalancedTokenList: d₁₁>)]
>>> BalancedTokenList("a,c=d").parse_keyval_items()
[(<BalancedTokenList: a₁₁>, None), (<BalancedTokenList: c₁₁>, <BalancedTokenList: d₁₁>)]
>>> BalancedTokenList.doc("a = b , c = d").parse_keyval_items()
[(<BalancedTokenList: a₁₁>, <BalancedTokenList: b₁₁>), (<BalancedTokenList: c₁₁>, <BalancedTokenList: d₁₁>)]
>>> BalancedTokenList.doc("a ={ b,c }, c = { d}").parse_keyval_items()
[(<BalancedTokenList: a₁₁>, <BalancedTokenList: ₁₀ b₁₁ ,₁₂ c₁₁ ₁₀>), (<BalancedTokenList: c₁₁>, <BalancedTokenList: ₁₀ d₁₁>)]
>>> BalancedTokenList.doc("{a=b},c=d").parse_keyval_items()
[(<BalancedTokenList: {₁ a₁₁ =₁₂ b₁₁ }₂>, None), (<BalancedTokenList: c₁₁>, <BalancedTokenList: d₁₁>)]
"""
parts=self.split_balanced(BalancedTokenList(","), do_strip_braces_in_result=False)
result: list[tuple[BalancedTokenList, Optional[BalancedTokenList]]]=[]
for part in parts:
kv=part.split_balanced(BalancedTokenList("="), maxsplit=1, do_strip_braces_in_result=False)
if len(kv)==1:
result.append((kv[0].strip_spaces(), None))
else:
assert len(kv)==2
result.append((kv[0].strip_spaces(), kv[1].strip_spaces().strip_optional_braces()))
return result
[docs] def parse_keyval(self, allow_duplicate: bool=False)->dict[ImmutableBalancedTokenList, Optional[BalancedTokenList]]:
r"""
Parse a key-value token list into a dictionary.
>>> BalancedTokenList("a=b,c=d").parse_keyval()
{<ImmutableBalancedTokenList: a₁₁>: <BalancedTokenList: b₁₁>, <ImmutableBalancedTokenList: c₁₁>: <BalancedTokenList: d₁₁>}
>>> BalancedTokenList("a,c=d").parse_keyval()
{<ImmutableBalancedTokenList: a₁₁>: None, <ImmutableBalancedTokenList: c₁₁>: <BalancedTokenList: d₁₁>}
>>> BalancedTokenList.doc("a = b , c = d").parse_keyval()
{<ImmutableBalancedTokenList: a₁₁>: <BalancedTokenList: b₁₁>, <ImmutableBalancedTokenList: c₁₁>: <BalancedTokenList: d₁₁>}
>>> BalancedTokenList.doc("a ={ b,c }, c = { d}").parse_keyval()
{<ImmutableBalancedTokenList: a₁₁>: <BalancedTokenList: ₁₀ b₁₁ ,₁₂ c₁₁ ₁₀>, <ImmutableBalancedTokenList: c₁₁>: <BalancedTokenList: ₁₀ d₁₁>}
>>> BalancedTokenList("a=b,a=c").parse_keyval()
Traceback (most recent call last):
...
ValueError: Duplicate key: <ImmutableBalancedTokenList: a₁₁>
>>> BalancedTokenList("a=b,a=c").parse_keyval(allow_duplicate=True)
{<ImmutableBalancedTokenList: a₁₁>: <BalancedTokenList: c₁₁>}
"""
items=[(ImmutableBalancedTokenList(k), v) for k, v in self.parse_keyval_items()]
if allow_duplicate: return dict(items)
result={}
for k, v in items:
if k in result:
raise ValueError(f"Duplicate key: {k!r}")
result[k]=v
return result
[docs]class TTPBalancedTokenList(TeXToPyData, BalancedTokenList):
# the whole reason why this class is here is because of abuse of inherentance. Will refactor some day probably.
send_code=_format(r"\__send_balanced_tl:n {{ {} }}%naive_ignore%")
send_code_var=_format(r"\exp_args:NV \__send_balanced_tl:n {}%naive_ignore%")
def __repr__(self)->str:
return repr(BalancedTokenList(self))
[docs] @staticmethod
def read()->"TTPBalancedTokenList":
if engine.is_unicode:
return TTPBalancedTokenList(BalancedTokenList.deserialize(_readline()))
else:
return TTPBalancedTokenList(BalancedTokenList.deserialize(engine.read()))
if typing.TYPE_CHECKING:
NTokenListBaseClass = collections.UserList[NToken]
else: # Python 3.8 compatibility
NTokenListBaseClass = collections.UserList
[docs]class NTokenList(NTokenListBaseClass):
"""
Similar to :class:`TokenList`, but can contain blue tokens.
The class can be used identical to a Python list consist of :class:`NToken` objects,
plus some additional methods to operate on token lists.
Refer to the documentation of :class:`TokenList` for some usage example.
"""
@staticmethod
def force_token_list(a: Iterable, string_tokenizer: Callable[[str], TokenList])->Iterable[NToken]:
if isinstance(a, str):
yield from string_tokenizer(a)
return
for x in a:
if isinstance(x, NToken):
yield x
elif isinstance(x, str):
yield from string_tokenizer(x)
elif isinstance(x, Sequence):
yield bgroup
child=BalancedTokenList(x)
assert child.is_balanced()
yield from child
yield egroup
else:
raise RuntimeError(f"Cannot make TokenList from object {x} of type {type(x)}")
def __init__(self, a: Iterable=(), string_tokenizer: Callable[[str], TokenList]=TokenList.e3)->None:
super().__init__(NTokenList.force_token_list(a, string_tokenizer))
[docs] def is_balanced(self)->bool:
"""
Check if this is balanced.
"""
return TokenList(self).is_balanced() # a bit inefficient (need to construct a TokenList) but good enough
[docs] def simple_parts(self)->List[Union[BalancedTokenList, Token, BlueToken]]:
"""
Internal function.
Split this :class:`NTokenList` into a list of balanced non-blue parts,
unbalanced ``{``/``}`` tokens, and blue tokens.
"""
parts: List[Union[TokenList, BlueToken]]=[TokenList()]
for i in self:
if isinstance(i, BlueToken):
parts+=i, TokenList()
else:
assert isinstance(i, Token)
last_part=parts[-1]
assert isinstance(last_part, TokenList)
last_part.append(i)
result: List[Union[BalancedTokenList, Token, BlueToken]]=[]
for large_part in parts:
if isinstance(large_part, BlueToken):
result.append(large_part)
else:
result+=large_part.balanced_parts()
return result
[docs] def put_next(self)->None:
"""
See :meth:`BalancedTokenList.put_next`.
"""
for part in reversed(self.simple_parts()): part.put_next()
[docs] def execute(self)->None:
"""
See :meth:`BalancedTokenList.execute`.
"""
parts=self.simple_parts()
if len(parts)==1:
x=parts[0]
if isinstance(x, BalancedTokenList):
x.execute()
return
NTokenList([*self, T.pythonimmediatecontinue, []]).put_next()
continue_until_passed_back()
[docs] def expand_x(self)->BalancedTokenList:
"""
See :meth:`BalancedTokenList.expand_x`.
"""
NTokenList([T.edef, P.tmp, bgroup, *self, egroup]).execute()
return P.tmp.tl()
class _NoFile: pass
_no_file=_NoFile()
file: Union[_NoFile, None, IO]=_no_file
[docs]class RedirectPrintTeX:
"""
A context manager. Use like this, where ``t`` is some file object::
with RedirectPrintTeX(t):
pass # some code
Then all :func:`.print_TeX` function calls will be redirected to ``t``.
"""
def __init__(self, t: Optional[IO])->None:
self.t=t
def __enter__(self)->None:
global file
self.old=file
file=self.t
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any)->None:
global file
file=self.old
def run_code_redirect_print_TeX(f: Callable[[], Any])->None:
"""
Extension of :class:`RedirectPrintTeX`, where the resulting code while the code
is executed will be interpreted as [TeX] code to be executed when the function returns.
Also, any return value of function ``f`` will be appended to the result.
:meta private:
"""
with io.StringIO() as t:
with RedirectPrintTeX(t):
result=f()
if result is not None:
t.write(str(result)+"%")
content=t.getvalue()
if content.endswith("\n"):
content=content[:-1]
elif not content:
if engine.status!=EngineStatus.running:
run_none_finish()
return
else:
#content+=r"\empty" # this works too
content+="%"
_run_block_finish(content)
def _make_param_spec(x: int)->BalancedTokenList:
r"""
Internal function.
>>> _make_param_spec(0)
<BalancedTokenList: >
>>> _make_param_spec(1)
<BalancedTokenList: #₆ 1₁₂>
>>> _make_param_spec(9)
<BalancedTokenList: #₆ 1₁₂ #₆ 2₁₂ #₆ 3₁₂ #₆ 4₁₂ #₆ 5₁₂ #₆ 6₁₂ #₆ 7₁₂ #₆ 8₁₂ #₆ 9₁₂>
>>> _make_param_spec(10)
Traceback (most recent call last):
...
AssertionError
"""
assert 0<=x<=9
return BalancedTokenList([t for i in range(1, x+1) for t in [C.param("#"), C.other(str(i))]])
[docs]def add_TeX_handler_param(t: BalancedTokenList, param: int|BalancedTokenList, *, continue_included: bool=False)->str:
r"""
Similar to :func:`add_TeX_handler`, however it will take parameters following in the input stream.
:param continue_included: See :func:`add_TeX_handler`.
>>> identifier=add_TeX_handler_param(BalancedTokenList(r"\def\l_tmpa_tl{#2,#1}"), 2)
>>> BalancedTokenList(r'{123}{456}').put_next()
>>> call_TeX_handler(identifier)
>>> T.l_tmpa_tl.tl()
<BalancedTokenList: 4₁₂ 5₁₂ 6₁₂ ,₁₂ 1₁₂ 2₁₂ 3₁₂>
>>> remove_TeX_handler(identifier)
"""
if not continue_included: t=t+[T.pythonimmediatecontinuenoarg]
identifier=get_random_TeX_identifier()
if isinstance(param, int): param=_make_param_spec(param)
BalancedTokenList([T.gdef, P["run_"+identifier+":"], *param, t]).execute()
return identifier
[docs]def add_TeX_handler(t: BalancedTokenList, *, continue_included: bool=False)->str:
r"""
See :func:`call_TeX_handler`.
:param continue_included: If this is set to True, ``\pythonimmediatecontinuenoarg`` token should be put when you want to return control to Python.
>>> with group: identifier=add_TeX_handler(BalancedTokenList(
... r"\afterassignment\pythonimmediatecontinuenoarg \toks0="), continue_included=True)
>>> BalancedTokenList([["abc"]]).put_next()
>>> call_TeX_handler(identifier) # this will assign \toks0 to be the following braced group
>>> toks[0]
<BalancedTokenList: a₁₁ b₁₁ c₁₁>
"""
if not continue_included: t=t+[T.pythonimmediatecontinuenoarg]
identifier=get_random_TeX_identifier()
P["run_"+identifier+":"].tl(t, global_=True)
return identifier
def call_TeX_handler_returns(identifier: str)->str:
if engine.status==EngineStatus.error:
raise TeXProcessError("error already happened")
assert engine.status==EngineStatus.waiting, engine.status
engine.write((identifier+"\n").encode('u8'))
engine.status=EngineStatus.running
result=run_main_loop()
assert result is not None
engine.status=EngineStatus.waiting
return result
[docs]def call_TeX_handler(identifier: str)->None:
r"""
Define some "handlers" in [TeX] that can be called quickly without re-sending the code every time it's called.
Analog for :func:`add_handler`, :func:`remove_handler`, but on the [TeX] side.
The advantage is that it's much faster than using :meth:`BalancedTokenList.execute` every time.
Otherwise the effect is identical.
Of course this is only for the current engine, and is global.
>>> identifier=add_TeX_handler(BalancedTokenList(r"\advance\count0 by 1"))
>>> count[0]=5
>>> count[0]
5
>>> call_TeX_handler(identifier)
>>> count[0]
6
>>> remove_TeX_handler(identifier)
"""
result=call_TeX_handler_returns(identifier)
assert result==""
[docs]def remove_TeX_handler(identifier: str)->None:
"""
See :func:`call_TeX_handler`.
"""
P["run_"+identifier+":"].set_eq(T.relax, global_=True)
_execute_cache: WeakKeyDictionary[Engine, Dict[tuple[Token, ...], str]]=WeakKeyDictionary()
def _execute_cached0(e: BalancedTokenList, *, continue_included: bool=False)->None:
r"""
Internal function, identical to :meth:`BalancedTokenList.execute` but cache the value of ``e``
such that re-execution of the same token list will be faster.
:param continue_included: See :func:`add_TeX_handler`.
>>> count[0]=5
>>> _execute_cached0(BalancedTokenList(r'\advance\count0 by 1'))
>>> count[0]
6
>>> _execute_cached0(BalancedTokenList(r'\advance\count0 by 1'))
>>> count[0]
7
"""
assert e.is_balanced()
l=_defaultget_with_cleanup(_execute_cache, dict)
identifier=l.get(tuple(e))
if identifier is None:
identifier=l[tuple(e)]=add_TeX_handler(e, continue_included=continue_included)
call_TeX_handler(identifier)
_execute_once_cache: WeakKeyDictionary[Engine, Set[tuple[Token, ...]]]=WeakKeyDictionary()
def _execute_once(e: BalancedTokenList)->bool:
r"""
Execute the token list, but only the first time for each engine.
>>> count[0]=5
>>> _execute_once(BalancedTokenList(r'\advance\count0 by 1'))
True
>>> count[0]
6
>>> _execute_once(BalancedTokenList(r'\advance\count0 by 1'))
False
>>> count[0]
6
>>> with default_engine.set_engine(luatex_engine):
... count[0]=7
... _execute_once(BalancedTokenList(r'\advance\count0 by 1')) # still executed because new engine
... count[0]
... _execute_once(BalancedTokenList(r'\advance\count0 by 1')) # not executed
... count[0]
True
8
False
8
>>> count[0] # old engine
6
"""
assert e.is_balanced()
l=_defaultget_with_cleanup(_execute_once_cache, set)
t=tuple(e)
if t not in l:
l.add(t)
e.execute()
return True
return False
_execute_arg_cache: WeakKeyDictionary[Engine, Dict[tuple[int, tuple[Token, ...]], str]]=WeakKeyDictionary()
def _execute_cached0_arg(e: BalancedTokenList, count: int)->None:
assert e.is_balanced()
l=_defaultget_with_cleanup(_execute_arg_cache, dict)
identifier=l.get((count, tuple(e)))
if identifier is None:
identifier=l[(count, tuple(e))]=add_TeX_handler_param(e, count)
call_TeX_handler(identifier)
_arg_tokens=[P.arga, P.argb, P.argc]
_arg1=_arg_tokens[0]
def _store_to_arg1(e: BalancedTokenList)->None:
r"""
Internal function for a few things...
..
>>> def test(t): _store_to_arg1(t); assert _arg1.tl()==t, (_arg1.tl(), t)
>>> for i in range(700): test(BalancedTokenList.fstr(chr(i)))
>>> with default_engine.set_engine(luatex_engine):
... for i in range(700): test(BalancedTokenList.fstr(chr(i)))
"""
if e.is_str():
_arg1.str(e.str())
else:
_arg1.tl(e)
def _putnext_braced_arg1()->None:
"""
>>> _store_to_arg1(BalancedTokenList('ab'))
>>> _putnext_braced_arg1()
>>> Token.get_next(4)
<TokenList: {₁ a₁₁ b₁₁ }₂>
"""
typing.cast(Callable[[], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%optional_sync%
\expandafter \pythonimmediatelisten \expandafter { \__arga }
}
""", recursive=False))()
def _copy_arg1_to(e: Token)->None:
if e==P.argb:
typing.cast(Callable[[], None], Python_call_TeX_local(
r"\cs_new_protected:Npn %name% { \let \__argb \__arga %optional_sync% \pythonimmediatelisten }", recursive=False))
return
if e==P.argc:
typing.cast(Callable[[], None], Python_call_TeX_local(
r"\cs_new_protected:Npn %name% { \let \__argc \__arga %optional_sync% \pythonimmediatelisten }", recursive=False))
return
assert False
e.set_eq(_arg1)
def _execute_cached(e: BalancedTokenList|str, *args: BalancedTokenList|str)->None:
r"""
Internal function, identical to :func:`_execute_cached0`, only *e* is cached, the rest are
passed in every time and accessible as ``_arg_tokens[0]`` etc.
>>> group.begin()
>>> _execute_cached(r'\catcode \_pythonimmediate_arga', '15=7')
>>> catcode[15].value
7
>>> group.end()
"""
assert len(args)<=len(_arg_tokens)
for a, t in reversed([*zip(args, _arg_tokens)]):
_store_to_arg1(BalancedTokenList.fstr(a) if isinstance(a, str) else a)
if t!=_arg1: _copy_arg1_to(t)
_execute_cached0(BalancedTokenList(e))
def _execute_cached_arg(e: BalancedTokenList|str, *args: BalancedTokenList|str)->None:
assert len(args)<=9
for a, t in reversed([*zip(args, _arg_tokens)]):
_store_to_arg1(BalancedTokenList.fstr(a) if isinstance(a, str) else a)
_putnext_braced_arg1()
_execute_cached0_arg(BalancedTokenList(e), len(args))
[docs]def continue_until_passed_back_str()->str:
r"""
Usage:
First put some tokens in the input stream that includes ``\pythonimmediatecontinue{...}``
(or ``%sync% \pythonimmediatelisten``), then call ``continue_until_passed_back()``.
The function will only return when the ``\pythonimmediatecontinue`` is called.
"""
return typing.cast(Callable[[], TTPEmbeddedLine], Python_call_TeX_local(
r"""
\cs_new_eq:NN %name% \relax
"""))()
[docs]def continue_until_passed_back()->None:
r"""
Same as ``continue_until_passed_back_str()`` but nothing can be returned from [TeX] to Python.
So, this resumes the execution of [TeX] code until ``\pythonimmediatecontinuenoarg`` is executed.
See :mod:`pythonimmediate` for some usage examples.
"""
result=continue_until_passed_back_str()
assert not result
[docs]def expand_once()->None:
r"""
Expand the following content in the input stream once.
>>> BalancedTokenList(r'\iffalse 1 \else 2 \fi').put_next() # now following tokens in the input stream is '\iffalse 1 \else 2 \fi'
>>> expand_once() # now following tokens in the input stream is '2 \fi'
>>> Token.get_next()
<Token: 2₁₂>
>>> Token.get_next()
<Token: \fi>
>>> BalancedTokenList(r'\fi').execute()
"""
typing.cast(Callable[[], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% { \expandafter \pythonimmediatecontinuenoarg }
""", recursive=False, sync=True))()
def _get_charcode(x: str|int)->int:
if isinstance(x, int): return x
assert len(x)==1
return ord(x)
"""
we need to put the docstring in the class instead of member
because although Sphinx supports docstring after member
https://stackoverflow.com/a/20230473
pytest doctest doesn't
https://github.com/pytest-dev/pytest/issues/6996
so we use :meta public: to force include docstring of private member in documentation
"""
class _GroupManagerStorage(threading.local):
# we separate out the storage so that mypy can type check the parent class _GroupManager
def __init__(self)->None:
self.running_instances: list=[]
[docs]class _GroupManager:
"""
Create a semi-simple group.
Use as ``group.begin()`` and ``group.end()``, or as a context manager::
>>> count[0]=5
>>> with group:
... count[0]=6
... count[0]
6
>>> count[0]
5
Note that the user must not manually change the group level in a context::
>>> with group:
... group.begin()
Traceback (most recent call last):
...
ValueError: Group level changed during group
They must not change the engine either::
>>> tmp_engine=ChildProcessEngine("pdftex")
>>> with group:
... c=default_engine.set_engine(tmp_engine)
Traceback (most recent call last):
...
ValueError: Engine changed during group
>>> tmp_engine.close()
>>> c.restore()
>>> group.end()
:meta public:
"""
def __init__(self)->None:
self._storage=_GroupManagerStorage()
@contextlib.contextmanager
def _run(self)->Generator[None, None, None]:
engine=default_engine.engine
self.begin()
level=T.currentgrouplevel.int()
try: yield
finally:
if engine is not default_engine.engine:
raise ValueError("Engine changed during group")
if T.currentgrouplevel.int()!=level:
raise ValueError("Group level changed during group")
self.end()
def begin(self)->None:
TokenList(r"\begingroup").execute()
def __enter__(self)->None:
instance: Any=self._run()
instance.__enter__()
self._storage.running_instances.append(instance)
def end(self)->None:
TokenList(r"\endgroup").execute()
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any)->None:
instance=self._storage.running_instances.pop()
instance.__exit__(exc_type, exc_value, traceback)
group=_GroupManager()
r"""
See :class:`_GroupManager`.
"""
[docs]class _CatcodeManager:
"""
Python interface to manage the category code. Example usage::
>>> catcode[97]
<Catcode.letter: 11>
>>> catcode["a"] = C.letter
:meta public:
"""
def __getitem__(self, x: str|int)->Catcode:
return Catcode.lookup(
BalancedTokenList([r"\the\catcode" + str(_get_charcode(x))]).expand_o().int()
)
def __setitem__(self, x: str|int, catcode: Catcode)->None:
#BalancedTokenList([r"\catcode" + str(_get_charcode(x)) + "=" + str(catcode.value)]).execute(); return
typing.cast(Callable[[PTTVerbatimLine], None], Python_call_TeX_local(
r"""
\cs_new_protected:Npn %name% {
%read_arg0(\__data)%
\catcode \__data \pythonimmediatecontinuenoarg
}
""" , sync=True))(PTTVerbatimLine(str(_get_charcode(x)) + "=" + str(catcode.value)))
catcode=_CatcodeManager()
r"""
See :class:`_CatcodeManager`.
"""
[docs]class MathClass(enum.Enum):
ord = 0
op = 1
bin = 2
rel = 3
open = 4
close = 5
punct = 6
variable_family = varfam = 7
@staticmethod
def lookup(x: int)->MathClass:
return _mathclass_value_to_member[x]
_mathclass_value_to_member = {item.value: item for item in MathClass}
[docs]@dataclass(frozen=True)
class Umathcode:
r"""
Example of using *active*::
>>> Umathcode.parse(0x1000000)
Umathcode.active
>>> Umathcode.active.family
1
:meta public:
"""
family: int
cls: MathClass
position: int
active=typing.cast("Umathcode", None) # class member
@staticmethod
def parse(x: int)->Umathcode:
if x==0x1000000: return Umathcode.active
assert -0x80000000 <= x <= 0x7fffffff
position = x&((1<<21)-1)
x>>=21
cls = MathClass.lookup(x&((1<<3)-1))
x>>=3
assert -0x80 <= x <= 0x7f
family = x&((1<<8)-1)
return Umathcode(family, cls, position)
@property
def value(self)->int:
return (self.family<<3|self.cls.value)<<21|self.position
def __repr__(self)->str:
if self==Umathcode.active: return "Umathcode.active"
try:
c = chr(self.position)
return f'Umathcode(family={self.family}, cls={self.cls!r}, position={self.position} {c!r})'
except ValueError:
return f'Umathcode(family={self.family}, cls={self.cls!r}, position={self.position})'
Umathcode.active = Umathcode(family=1, cls=MathClass.ord, position=0)
[docs]class _UmathcodeManager:
"""
Interface is similar to :const:`catcode`.
For example::
>>> umathcode[0]
Traceback (most recent call last):
...
RuntimeError: umathcode is not available for non-Unicode engines!
>>> from pythonimmediate.engine import ChildProcessEngine
>>> with default_engine.set_engine(luatex_engine): umathcode["A"]
Umathcode(family=1, cls=<MathClass.variable_family: 7>, position=65 'A')
:meta public:
"""
def _ensure_unicode(self)->None:
if not engine.is_unicode: raise RuntimeError("umathcode is not available for non-Unicode engines!")
def __getitem__(self, x: str|int)->Umathcode:
self._ensure_unicode()
return Umathcode.parse(
BalancedTokenList([r"\the\Umathcodenum" + str(_get_charcode(x))]).expand_o().int()
)
def __setitem__(self, x: str|int, code: Umathcode)->None:
self._ensure_unicode()
BalancedTokenList([r"\Umathcodenum" + str(_get_charcode(x)) + "=" + str(code.value)]).execute(); return
umathcode=_UmathcodeManager()
r"""
See :class:`_UmathcodeManager`.
"""
[docs]class _CountManager:
r"""
Manipulate count registers. Interface is similar to :const:`catcode`.
For example::
>>> count[5]=6 # equivalent to `\count5=6`
>>> count[5]
6
>>> count["endlinechar"]=10 # equivalent to `\endlinechar=10`
>>> T.endlinechar.int() # can also be accessed this way
10
>>> count["endlinechar"]=13
As shown in the last example, accessing named count registers can also be done through :meth:`Token.int`.
:meta public:
"""
def __getitem__(self, x: str|int)->int:
if isinstance(x, int):
return BalancedTokenList([r"\the\count" + str(_get_charcode(x))]).expand_o().int()
else:
assert isinstance(x, str)
return T[x].int()
def __setitem__(self, x: str|int, val: int)->None:
if isinstance(x, int):
BalancedTokenList([r"\count" + str(x) + "=" + str(val)]).execute()
else:
assert isinstance(x, str)
T[x].int(val)
count=_CountManager()
"""
See :class:`_CountManager`.
"""
[docs]class _ToksManager:
r"""
Manipulate tok registers. Interface is similar to :const:`catcode`.
For example::
>>> toks[0]=BalancedTokenList('abc')
>>> toks[0]
<BalancedTokenList: a₁₁ b₁₁ c₁₁>
:meta public:
"""
def __getitem__(self, x: int)->BalancedTokenList:
return BalancedTokenList([r"\the\toks" + str(x)]).expand_o()
def __setitem__(self, x: int, val: BalancedTokenList)->None:
BalancedTokenList([r"\toks" + str(x), val]).execute()
toks=_ToksManager()
"""
See :class:`_ToksManager`.
"""
[docs]def wlog(s: str)->None:
r"""
Wrapper around LaTeX's ``\wlog``.
"""
_execute_cached(r'\wlog{\_pythonimmediate_arga}', s)
[docs]def typeout(s: str)->None:
r"""
Wrapper around LaTeX's ``\typeout``.
"""
_execute_cached(r'\typeout{\_pythonimmediate_arga}', s)
def _ensure_lua_engine()->None:
assert default_engine.engine, "No current engine!"
assert default_engine.name=="luatex", f"Current engine is {default_engine.name}, not LuaTeX!"
def _lua_exec_cached(s: str)->None:
_ensure_lua_engine()
_execute_cached(BalancedTokenList([r'\directlua', BalancedTokenList.fstr(s)]))
[docs]def lua_try_eval(s: str)->Optional[str]:
r"""
Evaluate some Lua code, if fail then execute it.
Works like an interactive shell, first try to evaluate it as an expression, if fail execute it.
If you use IPython shell/Jupyter notebook, it may be desired to add a magic command to execute Lua code.
For example in IPython: Create a file ``.ipython/profile_default/startup/lua_magic.py``::
# Support %l <code> and %%l <newline> <line(s) of code> to execute Lua code in the LuaTeX engine.
from typing import Optional
from pythonimmediate import lua_try_eval
from IPython.core.magic import register_line_magic, register_cell_magic
register_line_magic("l")(lambda line: lua_try_eval(line))
@register_cell_magic("l")
def _cell_magic(line: str, cell: str)->Optional[str]:
assert not line.strip(), "first line after %%l must be empty!"
return lua_try_eval(cell)
>>> c=default_engine.set_engine(ChildProcessEngine("luatex"))
>>> lua_try_eval("2+3")
'5'
>>> lua_try_eval("do local a=2; return a+4 end")
'6'
>>> lua_try_eval("do local a=2 end")
>>> c.restore()
"""
_ensure_lua_engine()
_store_to_arg1(BalancedTokenList.fstr(s))
_execute_cached0(BalancedTokenList([r'\edef\_pythonimmediate_arga{\directlua',
BalancedTokenList.fstr(r'''
do
local result
local s=token.get_macro"_pythonimmediate_arga"
local function try_call_print(f)
local success, f_result=pcall(f)
if success then
if f_result==nil then
result="-"
else
result="+"..tostring(f_result)
end
else
result="!"..tostring(f_result)
end
end
local f, err=load("return "..s..";", "=stdin", "t")
if f~=nil then
try_call_print(f)
else
f, err=load(s)
if f~=nil then
try_call_print(f)
else
result="!"..tostring(err)
end
end
tex.sprint(-2, result)
end
'''.strip()), r'}']))
result=P.arga.str()
assert result
if result[0]=="+": return result[1:]
if result[0]=="!": raise RuntimeError(result[1:])
assert result=="-", result
return None
[docs]def peek_next_meaning()->str:
r"""
Get the meaning of the following token, as a string, using the current ``\escapechar``.
This is recommended over :meth:`Token.peek_next` as it will not tokenize an extra token.
It's undefined behavior if there's a newline (``\newlinechar`` or ``^^J``, the latter is OS-specific)
in the meaning string.
>>> BalancedTokenList("2").put_next()
>>> peek_next_meaning()
'the character 2'
>>> Token.get_next()
<Token: 2₁₂>
"""
return typing.cast(Callable[[], TTPEmbeddedLine], Python_call_TeX_local(
r"""
\cs_new_protected:Npn \__peek_next_meaning_callback: {
\edef \__tmp {\meaning \__tmp} % just in case ``\__tmp`` is outer, ``\write`` will not be able to handle it
\__send_content%naive_send%:e { r \__tmp }
\pythonimmediatelisten
}
\cs_new_protected:Npn %name% {
\futurelet \__tmp \__peek_next_meaning_callback:
}
""", recursive=False))()
meaning_str_to_catcode: Dict[str, Catcode]={
"begin-group character ": Catcode.bgroup,
"end-group character ": Catcode.egroup,
"math shift character ": Catcode.math,
"alignment tab character ": Catcode.alignment,
"macro parameter character ": Catcode.parameter,
"superscript character ": Catcode.superscript,
"subscript character ": Catcode.subscript,
"blank space ": Catcode.space,
"the letter ": Catcode.letter,
"the character ": Catcode.other,
}
def parse_meaning_str(s: str)->Optional[Tuple[Catcode, str]]:
if s and s[:-1] in meaning_str_to_catcode:
return meaning_str_to_catcode[s[:-1]], s[-1]
return None
scan_Python_call_TeX_module(__name__)
scan_Python_call_TeX_module("pythonimmediate.lowlevel")
from . import simple # this import also scan the source code and populate bootstrap_code because of scan_Python_call_TeX_module(__name__) call inside
from .simple import get_arg_estr # needed a few times above
# backwards compatibility
from .simple import execute, print_TeX
from . import texcmds