Source code for pyorg.parse

"""(Partially) parse org files."""

import re


[docs]def read_file_keywords(file):
	"""Read file-level keywords from an .org file (without using Emacs).

	Limitations: only reads up to the first element in the initial section
	(excluding comments). If the initial section does contain such an element,
	any keywords directly preceding it (not separated with a blank line) will be
	considered affiliated keywords of that element and ignored.

	Will not parse org markup in keyword values.

	All keys are converted to uppercase.

	Keys which appear more than once will have values in a list.

	Parameters
	----------
	file
		String or open file object or stream in text mode.

	Returns
	-------
	dict
	"""
	if isinstance(file, str):
		file = file.splitlines()

	# The keywords we're sure of
	keywords = {}

	# Current set of keywords on consecutive lines. These might be affiliated
	# keywords for the following element instead of file-level keywords so we
	# can't be sure until we get to the next non-keyword line.
	current = {}

	# Merge current into keywords and clear it.
	def usecurrent():
		for key, values in current.items():
			keywords.setdefault(key, []).extend(values)
		current.clear()

	for line in file:
		line = line.rstrip('\n')

		# Match keyword
		match = re.fullmatch(r'#\+(\w+):(\s+.*)?', line)
		if match:
			key, value = match.groups()
			key = key.upper()
			value = (value or '').strip()
			current.setdefault(key, []).append(value)
			continue

		# Empty line
		if re.fullmatch(r'\s*', line):
			usecurrent()
			continue

		# Comment
		if re.match(r'\s*#', line):
			usecurrent()
			continue

		# Headline
		if re.match(r'\*+\s', line):
			break

		# Otherwise, it's some element. Don't use keywords directly preceding it
		# because they may be affiliated keywords not belonging to the file.
		current.clear()
		break

	usecurrent()

	return {key: values[0] if len(values) == 1 else values for key, values in keywords.items()}


[docs]def parse_tags(string):
	"""Parse tags from string.

	Parameters
	----------
	string : str
		Tags separated by colons.

	Returns
	-------
	list[str]
		List of tags.
	"""
	string = string.strip(':')
	if not string:
		return []
	return string.split(':')