"""
Work with org file abstract syntax trees.
See https://orgmode.org/worg/dev/org-syntax.html for a description of the org
syntax.
"""
import re
from collections.abc import Iterable
from typing import NamedTuple
from collections import ChainMap
from copy import copy, deepcopy
from .util import SingleDispatchBase, parse_iso_date
_OrgNodeTypeBase = NamedTuple('OrgNodeType', [
('name', str),
('is_element', bool),
('is_greater_element', bool),
('is_recursive', bool),
('is_object_container', bool),
])
[docs]class OrgNodeType(_OrgNodeTypeBase):
"""The properties of an org AST node type.
Attributes
----------
name : str
The unique name of this node type.
is_element : bool
Whether this node type is an element. "An element defines syntactical
parts that are at the same level as a paragraph, i.e. which cannot
contain or be included in a paragraph."
is_object : bool
Whether this node type is an object. All nodes which are not elements
are objects. "An object is a part that could be included in an element."
is_greater_element : bool
Whether this node type is a greater element. "Greater elements are all
parts that can contain an element."
is_recursive : bool
Whether this node type is a recursive object.
is_object_container : bool
Whether this node type is an object container, i.e. can directly contain
objects.
References
----------
`Org Syntax <https://orgmode.org/worg/dev/org-syntax.html>`_
"""
@property
def is_object(self):
return not self.is_element
def __repr__(self):
return '%s(%r)' % (type(self).__name__, self.name)
#: Mapping from names of all AST node types to :class:`.OrgNodeType` instances.
ORG_NODE_TYPES = {nt.name: nt for nt in [
# Name Element Greater Recursive Container
OrgNodeType('org-data', True, True, False, False, ),
OrgNodeType('babel-call', True, False, False, False, ),
OrgNodeType('center-block', True, True, False, False, ),
OrgNodeType('clock', True, False, False, False, ),
OrgNodeType('comment', True, False, False, False, ),
OrgNodeType('comment-block', True, False, False, False, ),
OrgNodeType('diary-sexp', True, False, False, False, ),
OrgNodeType('drawer', True, True, False, False, ),
OrgNodeType('dynamic-block', True, True, False, False, ),
OrgNodeType('example-block', True, False, False, False, ),
OrgNodeType('export-block', True, False, False, False, ),
OrgNodeType('fixed-width', True, False, False, False, ),
OrgNodeType('footnote-definition', True, True, False, False, ),
OrgNodeType('headline', True, True, False, False, ),
OrgNodeType('horizontal-rule', True, False, False, False, ),
OrgNodeType('inlinetask', True, True, False, False, ),
OrgNodeType('item', True, True, False, False, ),
OrgNodeType('keyword', True, False, False, False, ),
OrgNodeType('latex-environment', True, False, False, False, ),
OrgNodeType('node-property', True, False, False, False, ),
OrgNodeType('paragraph', True, False, False, True, ),
OrgNodeType('plain-list', True, True, False, False, ),
OrgNodeType('planning', True, False, False, False, ),
OrgNodeType('property-drawer', True, True, False, False, ),
OrgNodeType('quote-block', True, True, False, False, ),
OrgNodeType('section', True, True, False, False, ),
OrgNodeType('special-block', True, True, False, False, ),
OrgNodeType('src-block', True, False, False, False, ),
OrgNodeType('table', True, True, False, False, ),
OrgNodeType('table-row', True, False, False, True, ),
OrgNodeType('verse-block', True, False, False, True, ),
OrgNodeType('bold', False, False, True, True, ),
OrgNodeType('code', False, False, False, False, ),
OrgNodeType('entity', False, False, False, False, ),
OrgNodeType('export-snippet', False, False, False, False, ),
OrgNodeType('footnote-reference', False, False, True, True, ),
OrgNodeType('inline-babel-call', False, False, False, False, ),
OrgNodeType('inline-src-block', False, False, False, False, ),
OrgNodeType('italic', False, False, True, True, ),
OrgNodeType('latex-fragment', False, False, False, False, ),
OrgNodeType('line-break', False, False, False, False, ),
OrgNodeType('link', False, False, True, True, ),
OrgNodeType('macro', False, False, False, False, ),
OrgNodeType('radio-target', False, False, True, True, ),
OrgNodeType('statistics-cookie', False, False, False, False, ),
OrgNodeType('strike-through', False, False, True, True, ),
OrgNodeType('subscript', False, False, True, True, ),
OrgNodeType('superscript', False, False, True, True, ),
OrgNodeType('table-cell', False, False, True, True, ),
OrgNodeType('target', False, False, False, False, ),
OrgNodeType('timestamp', False, False, False, False, ),
OrgNodeType('underline', False, False, True, True, ),
OrgNodeType('verbatim', False, False, False, False, ),
]}
#: Mapping from org element/node types to their Python class
NODE_CLASSES = {}
[docs]def node_cls(type_):
"""Register a node class for a particular type in :data:`.NODE_CLASSES`.
"""
def decorator(cls):
NODE_CLASSES[type_] = cls
return cls
return decorator
[docs]def dump_ast(value, properties=False, indent=' ', _level=0):
"""Print a debug representation of an org AST node and its descendants.
Parameters
----------
value : .OrgNode
properties : bool
Also print node properties.
indent : str
Characters to indent with.
"""
if isinstance(value, OrgNode):
print(value.type.name)
if properties:
for key in sorted(value.properties):
print('%s:%-15s = ' % (indent * (_level + 1), key), end='')
dump_ast(value.properties[key], properties, indent, _level + 1)
for i, child in enumerate(value.contents):
print('%s%d ' % (indent * (_level + 1), i), end='')
dump_ast(child, properties, indent, _level + 1)
# Special printing for secondary strings, which are lists containing more nodes
elif isinstance(value, list) and any(isinstance(item, OrgNode) for item in value):
print('[')
for item in value:
print(indent * (_level + 1), end='')
dump_ast(item, properties, indent, _level + 1)
print((indent * _level) + ']')
else:
print(repr(value))
[docs]class OrgTimestampInterval:
"""An interval of time stored in an Org mode time stamp's repeater or warning.
Attributes
----------
type : str
unit : str
value : float
"""
def __init__(self, type_, unit, value):
self.type = type_
self.unit = unit
self.value = value
[docs]class OrgTimestamp:
"""Stores Org mode timestamp data, without the whole AST node.
Attributes
----------
tstype : str
start : datetime.datetime
end : datetime.datetime
repeater : .OrgTimestampInterval
warning : .OrgTimestampInterval
"""
def __init__(self, tstype, start, end=None, repeater=None, warning=None):
self.tstype = tstype
self.start = start
self.end = start if end is None else end
self.repeater = repeater
self.warning = warning
@property
def is_range(self):
return self.start and self.end and (self.start != self.end)
@property
def interval(self):
return self.end - self.start if self.start and self.end else None
def __repr__(self):
if self.is_range:
return '<%s %s %s to %s>' % (type(self).__name__, self.tstype, self.start, self.end)
else:
return '<%s %s %s>' % (type(self).__name__, self.tstype, self.start or self.end)
[docs]class OrgNode:
"""A node in an org file abstract syntax tree.
Implements the sequence protocol as a sequence containing its child nodes
(identically to :attr:`contents`). Also allows accessing property values by
indexing with a string key.
Attributes
----------
type: .OrgNodeType
Node type, obtained from `org-element-type`.
properties : dict
Dictionary of property values, obtained from `org-element-property`.
contents : list
List of contents (org nodes or strings), obtained from
`org-element-contents`.
ref : str
A unique ID assigned to the node during the export process.
keywords : dict
Dictionary of keyword values.
meta : dict
A dictionary containing arbitrary application-specific metadata.
is_outline : bool
Whether this node is an outline node.
"""
is_outline = False
def __init__(self, type_, properties=None, contents=None, keywords=None, ref=None, meta=None):
if isinstance(type_, str):
type_ = ORG_NODE_TYPES[type_]
if not isinstance(type_, OrgNodeType):
raise TypeError(type(type_))
self.type = type_
self.properties = dict(properties or {})
self.keywords = dict(keywords or {})
self.ref = ref
self.contents = list(contents or [])
self.meta = dict(meta or [])
def __copy__(self, deep=False):
cp = deepcopy if deep else copy
return type(self)(
self.type,
properties=cp(self.properties),
contents=cp(self.contents),
keywords=cp(self.keywords),
ref=self.ref,
)
def __deepcopy__(self):
return self.__copy__(deep=True)
@staticmethod
def _iter_children_recursive(obj):
"""
Iterate through child nodes through recursive data structures (e.g.
property values that are lists that contain nodes) but don't recurse
into the children themselves.
"""
# Return nodes directly
if isinstance(obj, OrgNode):
yield obj
# Skip strings - otherwise we get infinite recursion trying to iterate
elif isinstance(obj, str):
return
# Iterate through lists and other collections
elif isinstance(obj, Iterable):
for item in obj:
yield from OrgNode._iter_children_recursive(item)
# Ignore
@property
def children(self):
"""Iterator over all child AST nodes (in contents or keyword/property values."""
for collection in (self.properties.values(), self.keywords.values(), self.contents):
yield from self._iter_children_recursive(collection)
[docs] def descendants(self, incself=False, properties=False):
"""Recursively iterate over all of the node's descendants.
Parameters
----------
incself : bool
Include self.
properties : bool
Include children in the node's properties, not just :attr:`contents`
(see :attr:`children`).
Yields
-------
.OrgNode
"""
if incself:
yield self
for item in (self.children if properties else self.contents):
yield from item.descendants(incself=True, properties=properties)
def __repr__(self):
return '%s(type=%r)' % (type(self).__name__, self.type.name)
def __len__(self):
return len(self.contents)
def __iter__(self):
return iter(self.contents)
def __getitem__(self, key):
if isinstance(key, int):
return self.contents[key]
elif isinstance(key, str):
return self.properties[key]
else:
raise TypeError('Expected str or int, got %r' % type(key))
[docs] def dump(self, properties=False, indent=' '):
"""Print a debug representation of the node and its descendants.
Parameters
----------
value : .OrgNode
properties : bool
Also print node properties.
indent : str
Characters to indent with.
"""
dump_ast(self, properties, indent)
[docs]class OrgOutlineNode(OrgNode):
"""Abstract base class for org node that is a component of the outline tree.
Corresponds to the root org-data node or a headline node.
Attributes
----------
level : int
Outline level. 0 corresponds to the root node of the file.
section : OrgNode
Org node with type `"section"` that contains the outline node's direct
content (not part of any nested outline nodes).
subheadings : list
List of nested headings.
"""
is_outline = True
def __new__(cls, *args, **kwargs):
if cls is OrgOutlineNode:
raise TypeError("Can't instantiate abstract base class directly")
return object.__new__(cls)
@property
def section(self):
if self.contents and self.contents[0].type.name == 'section':
return self.contents[0]
return None
@property
def subheadings(self):
if self.contents and self.contents[0].type.name == 'section':
return self.contents[1:]
return self.contents[:]
[docs] def outline_tree(self):
"""Create a list of ``(child, child_tree)`` pairs."""
return [(child, child.outline_tree()) for child in self.subheadings]
[docs] def dump_outline(self, depth=None, indent=' '):
"""Print representation of node's outline subtree.
Parameters
----------
depth : int
Maximum depth to print.
indent : str
String to indent with.
"""
self._dump_outline(None, 0, depth, indent)
def _dump_outline(self, n, depth, maxdepth, indent):
print(indent * depth, end='')
if n is not None:
print('%d. ' % n, end='')
print(self._dump_name())
if maxdepth is None or depth < maxdepth:
nextdepth = None if depth is None else depth + 1
for (i, child) in enumerate(self.subheadings):
child._dump_outline(i, nextdepth, maxdepth, indent)
def _dump_name(self):
"""Get the name to show for this node when dumping outline."""
raise NotImplementedError()
[docs]@node_cls('headline')
class OrgHeadlineNode(OrgOutlineNode):
"""Org header element.
Attributes
----------
title : str
Title of headline as plain text.
id : str
Unique ID for TOC tree.
has_todo : bool
Whether this outline has a TODO keyword.
priority_chr : str
Priority character if headline with priority, otherwise None.
scheduled : OrgTimestamp
The timestamp in the "scheduled" property of the headline, if present.
deadline : OrgTimestamp
The timestamp in the "deadline" property of the headline, if present.
closed : OrgTimestamp
The timestamp in the "closed" property of the headline, if present.
"""
def __init__(self, type_, *args, title=None, id=None, **kw):
super().__init__(type_, *args, **kw)
assert self.type.name == 'headline'
# Default title
if title is None:
from pyorg.convert.plaintext import to_plaintext
title = to_plaintext(self['title'], blanks=True)
self.title = title
self.id = self.ref if id is None else id
self.level = self['level']
@property
def has_todo(self):
return self['todo-type'] is not None
@property
def priority_chr(self):
return None if self['priority'] is None else chr(self['priority'])
@property
def deadline(self):
return self.properties.get('deadline')
@property
def scheduled(self):
return self.properties.get('scheduled')
@property
def closed(self):
return self.properties.get('closed')
def _dump_name(self):
return self.title
[docs]@node_cls('org-data')
class OrgDataNode(OrgOutlineNode):
"""Root node for an org mode parse tree.
Doesn't do anything special, aside from being the outline node at level 0.
"""
def __init__(self, type_, *args, **kw):
super().__init__(type_, *args, **kw)
assert self.type.name == 'org-data'
def _dump_name(self):
return 'Root'
[docs]@node_cls('timestamp')
class OrgTimestampNode(OrgNode, OrgTimestamp):
"""An org node with type "timestamp"."""
def __init__(self, type_, *args, **kwargs):
OrgNode.__init__(self, type_, *args, **kwargs)
assert self.type.name == 'timestamp'
OrgTimestamp.__init__(
self,
self['type'],
start=parse_iso_date(self['start']) if self.properties.get('start') else None,
end=parse_iso_date(self['end']) if self.properties.get('end') else None,
)
[docs]@node_cls('table')
class OrgTableNode(OrgNode):
"""An org node with type "table".
Attributes
----------
rows : list of OrgNode
List of standard rows.
nrows : int
Number of (non-rule) rows in table. This includes the header.
ncols: int
Number of columns in table.
"""
[docs] def blocks(self):
"""Standard rows divided into "blocks", which were separated by rule rows.
Returns
-------
list of list of OrgNode
"""
current_block = []
blocks = [current_block]
for row in self.contents:
assert row.type.name == 'table-row'
if row['type'] == 'rule':
# New block
current_block = []
blocks.append(current_block)
elif row['type'] == 'standard':
current_block.append(row.contents)
else:
raise ValueError()
return blocks
@property
def rows(self):
return [row for row in self.contents if row['type'] == 'standard']
@property
def nrows(self):
return sum(row['type'] == 'standard' for row in self.contents)
@property
def ncols(self):
return len(self.contents[0])
[docs] def cells(self):
return [list(row.contents) for row in self.rows]
[docs]def get_node_type(obj, name=False):
"""Get type of AST node, returning None for other Python types."""
if isinstance(obj, OrgNode):
return obj.type.name if name else obj.type
return None
[docs]def as_node_type(t):
"""Convert to node type object, looking up strings by name."""
if isinstance(t, str):
return ORG_NODE_TYPES[t]
if isinstance(t, OrgNodeType):
return t
raise TypeError(type(t))
[docs]def as_secondary_string(obj):
"""Convert argument to a "secondary string" (list of nodes or strings).
Parameters
----------
obj : .OrgNode or str or list
Returns
-------
list
Raises
------
TypeError : if ``obj`` is not a str or :class:`.OrgNode` or iterable of these.
"""
if isinstance(obj, (str, OrgNode)):
return [obj]
ss = list(obj)
for item in ss:
if not isinstance(item, (str, OrgNode)):
raise TypeError('Items must be OrgNode or str, got %r' % type(item))
return ss
[docs]class OrgDocument:
"""Represents an entire Org mode document.
Attributes
----------
root : OrgOutlineNode
The root of the document's Abstract Syntax Tree.
properties : dict
Additional file-level properties attached to the document, such as the
author or date. Values may be strings or secondary strings.
meta : dict
A dictionary containing arbitrary application-specific metadata.
"""
def __init__(self, root, properties=None, meta=None):
self.root = root
self.properties = dict(properties or [])
self.meta = dict(meta or [])
def _assign_header_ids(self, header, assigned, depth):
id_ = self._make_header_id(header, assigned)
header.id = id_
assigned[id_] = header
if depth > 1:
for child in header.subheadings:
self._assign_header_ids(child, assigned, depth - 1)
def _make_header_id(self, header, assigned=None):
if assigned is None:
assigned = []
id = base = re.sub(r'[^\w_-]+', '-', header.title).strip('-')
i = 1
while id in assigned:
i += 1
id = '%s-%d' % (base, i)
return id
[docs]class DispatchNodeType(SingleDispatchBase):
"""Generic function which dispatches on the node type of its first argument.
"""
[docs] def get_key(self, node):
return node.type.name
[docs]def dispatch_node_type(parent=None):
"""Decorator to create DispatchNodeType instance from default implementation."""
registry = {} if parent is None else ChainMap(parent, {})
def decorator(default):
return DispatchNodeType(default, registry)
return decorator