Source code for ooodev.io.xml.xml

# region Imports
from __future__ import annotations
from typing import Any, cast, TYPE_CHECKING, overload, Tuple, List, Sequence
import uno
from com.sun.star.xml.dom import XNode
from com.sun.star.xml.dom import XNodeList
from ooo.dyn.xml.dom.node_type import NodeType

from ooodev.utils.table_helper import TableHelper
from ooodev.adapter.xml.dom.document_builder_comp import DocumentBuilderComp
from ooodev.adapter.io.pipe_comp import PipeComp
from ooodev.adapter.io.text_input_stream_comp import TextInputStreamComp
from ooodev.adapter.ucb.simple_file_access_comp import SimpleFileAccessComp

import urllib.request
from ooodev.loader import lo as mLo
from ooodev.utils import file_io as mFileIO
from ooodev.utils.type_var import PathOrStr
from ooodev.adapter.xml.dom.node_list_comp import NodeListComp
from ooodev.utils.string.text_stream import TextStream

if TYPE_CHECKING:
    from com.sun.star.xml.dom import XDocument
    from com.sun.star.xml.dom import XNode
# endregion Imports


[docs]class XML: # region Load / Save
[docs] @classmethod def load_doc(cls, fnm: PathOrStr) -> XDocument: """ Gets a document from a file Args: fnm (PathOrStr): XML file to load. Raises: Exception: if unable to open document. Returns: XDocument: XML Document. """ # sourcery skip: raise-specific-error try: pth = mFileIO.FileIO.get_absolute_path(fnm) uri = uno.systemPathToFileUrl(str(pth)) builder = DocumentBuilderComp.from_lo() doc = builder.parse_uri(uri) cls._remove_whitespace(doc.getFirstChild()) doc.normalize() return doc except Exception as e: print(e) raise Exception(f"Opening of document failed: '{fnm}'") from e
[docs] @classmethod def url_2_doc(cls, url: str) -> XDocument: """ Gets a XML Document from remote source. Args: url (str): URL for a remote XML Document Raises: Exception: if unable to open document. Returns: XDocument: XML Document """ # sourcery skip: raise-specific-error try: builder = DocumentBuilderComp.from_lo() with urllib.request.urlopen(url) as url_data: doc = builder.parse(url_data.read().decode()) cls._remove_whitespace(doc.getFirstChild()) doc.normalize() return doc except Exception as e: print(e) raise Exception(f"Opening of document failed: '{url}'") from e
[docs] @classmethod def str_to_doc(cls, xml_str: str) -> XDocument: """ Gets a XML document from xml string. Args: xml_str (str): XML string. Raises: Exception: if unable to create document from xml. Returns: XDocument: XML Document on successful load; Otherwise, None. """ # sourcery skip: raise-specific-error try: builder = DocumentBuilderComp.from_lo() stream = TextStream.get_text_input_stream_from_str(xml_str) doc = builder.parse(stream.component) cls._remove_whitespace(doc.getFirstChild()) doc.normalize() return doc except Exception as e: print(e) raise Exception("Error get xml document from xml string") from e
@classmethod def _remove_whitespace(cls, node: XNode): """ Removes whites from xml node Args: node (node): xml node, or xml document Note: it is necessary .normalize() the document to combine adjacent text nodes. Otherwise, you could end up with a bunch of redundant XML elements with just whitespace. Again, recursion is the only way to visit tree elements since you can’t iterate over the document and its elements with a loop. Finally, this should give you the expected result: """ # https://realpython.com/python-xml-parser/ # e.g. # document = parse("smiley.svg") # cls._remove_whitespace(document) # document.normalize() if node.getNodeType() == NodeType.TEXT_NODE and node.getNodeValue().strip() == "": node.setNodeValue("") for child in NodeListComp(node.getChildNodes()): cls._remove_whitespace(child)
[docs] @staticmethod def get_xml_string(xml_element: Any) -> str: builder = DocumentBuilderComp.from_lo() doc = cast(Any, builder.new_document()) el_new = cast(XNode, doc.importNode(xml_element, True)) doc.appendChild(el_new) pipe = PipeComp.from_lo() txt_stream = TextInputStreamComp.from_lo() txt_stream.set_input_stream(pipe.component) doc.setOutputStream(pipe.component) doc.start() pipe.close_output() return txt_stream.read_string(True)
[docs] @staticmethod def save_doc(doc: XDocument, xml_fnm: PathOrStr) -> None: """ Save doc to xml file. Args: doc (Document): doc to save. xml_fnm (PathOrStr): Output file path. Raises: Exception: If unable to save document """ # sourcery skip: raise-specific-error try: pth = mFileIO.FileIO.get_absolute_path(xml_fnm) file_access = SimpleFileAccessComp.from_lo() stream = file_access.open_file_write(pth.as_uri()) doc.setOutputStream(stream) # type: ignore doc.start() # type: ignore stream.closeOutput() except Exception as e: raise Exception(f"Unable to save document to {xml_fnm}") from e
# endregion Load / Save # region DOM data extraction
[docs] @staticmethod def get_node(tag_name: str, nodes: XNodeList) -> XNode | None: """ Gets the fist tag_name found in nodes. Args: tag_name (str): tag name to find in nodes. nodes (XNodeList): Nodes to search Returns: XNode | None: First found node; Otherwise, None """ name = tag_name.casefold() for node in NodeListComp(nodes): if node.getNodeType() == NodeType.ELEMENT_NODE and node.getNodeName().casefold() == name: return node return None
# region get_node_value() @overload @classmethod def get_node_value(cls, node: XNode) -> str: """ Get the text stored in the node Args: node (XNode): Node to get value of. Returns: str: Node value. """ ... @overload @classmethod def get_node_value(cls, tag_name: str, nodes: XNodeList) -> str: """ Gets first tag_name node in the list and returns it text. Args: tag_name (str): tag_name to search for. nodes (XNodeList): List of nodes to search. Returns: str: Node value if found; Otherwise empty str. """ ...
[docs] @classmethod def get_node_value(cls, *args, **kwargs) -> str: """ Gets first ``tag_name`` node in the list and returns it text. Args: node (XNode): Node to get value of. tag_name (str): ``tag_name`` to search for. nodes (XNodeList): List of nodes to search. Returns: str: Node value if found; Otherwise empty str. """ ordered_keys = (1, 2) kargs_len = len(kwargs) count = len(args) + kargs_len def get_kwargs() -> dict: ka = {} if kargs_len == 0: return ka valid_keys = ("tag_name", "nodes", "node") check = all(key in valid_keys for key in kwargs) if not check: raise TypeError("get_node_value() got an unexpected keyword argument") keys = ("tag_name", "node") for key in keys: if key in kwargs: ka[1] = kwargs[key] break if count == 1: return ka ka[2] = kwargs.get("nodes", None) return ka if count not in (1, 2): raise TypeError("get_node_value() got an invalid number of arguments") kargs = get_kwargs() for i, arg in enumerate(args): kargs[ordered_keys[i]] = arg if count == 1: return cls._get_node_val(kargs[1]) return cls._get_node_val2(kargs[1], kargs[2])
@staticmethod def _get_node_val(node: XNode) -> str: if node is None: return "" if not node.hasChildNodes(): return "" child_nodes = NodeListComp(node.getChildNodes()) if len(child_nodes) == 0: return "" for child in child_nodes: if child.getNodeType() == NodeType.TEXT_NODE: return child.getNodeValue().strip() return "" @classmethod def _get_node_val2(cls, tag_name: str, nodes: XNodeList) -> str: if nodes is None: return "" name = tag_name.casefold() node_list = NodeListComp(nodes) for node in node_list: if node.getNodeType() == NodeType.ELEMENT_NODE and node.getNodeName().casefold() == name: return cls._get_node_val(node) return "" # endregion get_node_value()
[docs] @classmethod def get_node_values(cls, nodes: XNodeList) -> Tuple[str, ...]: """ Gets all the node values Args: nodes (XNodeList): Nodes to get values of. Returns: Tuple[str, ...]: Node Values """ vals = [] node_list = NodeListComp(nodes) for node in node_list: val = cls._get_node_val(node) if val != "": vals.append(val) return tuple(vals) if vals else ()
[docs] @staticmethod def get_node_attr(attr_name: str, node: XNode) -> str: """ Get the named attribute value from node Args: attr_name (str): Attribute Name node (XNode): Node to get attribute of. Returns: str: Attribute value if found; Otherwise empty str. """ if not attr_name: raise ValueError("Attribute name is empty") node_map = node.getAttributes() map_len = node_map.getLength() if map_len == 0: return "" atc = attr_name.casefold() for i in range(map_len): attr = node_map.item(i) if attr.getNodeName().casefold() == atc: return attr.getNodeValue() return ""
[docs] @classmethod def get_all_node_values(cls, row_nodes: XNodeList, col_ids: Sequence[str]) -> List[list] | None: """ Gets all node values. The data from a sequence of <col> becomes one row in the generated 2D array. The first row of the 2D array contains the col ID strings. Args: row_nodes (NodeList): rows col_ids (Sequence[str]): Column ids Returns: List[list] | None: 2D-list of values on success; Otherwise, None Note: col_ids must match the column names: ``col_ids = ("purpose", "amount", "tax", "maturity")`` """ node_list = NodeListComp(row_nodes) num_rows = len(node_list) + 1 num_cols = len(col_ids) if num_cols == 0 or num_rows == 0: return None data = TableHelper.make_2d_array(num_rows=num_rows, num_cols=num_cols) # data = [[1] * num_cols for _ in range(num_rows + 1)] # put column strings in first row of list for col, _ in enumerate(col_ids): data[0][col] = mLo.Lo.capitalize(col_ids[col]) for i, node in enumerate(node_list): # extract all the column strings for ith row col_nodes = NodeListComp(node.getChildNodes()) for col in range(num_cols): data[i + 1][col] = cls.get_node_value(col_ids[col], col_nodes.component) return data
# endregion DOM data extraction