#
# SPDX-FileCopyrightText: <text>Copyright 2025 Arm Limited and/or its
# affiliates <open-source-office@arm.com></text>
#
# SPDX-License-Identifier: MIT
"""
Module to remove all extra and unnecessary characters from log files.
"""
import re
from typing import TextIO
# To remove ESC/ESC7/ESC8 single-byte C1 CSI OSC sequences
[docs]
ANSI_ESCAPE = re.compile(
r"""
\x1B
(?:
[78]
| [@-Z\\-_]
| \[ [0-?]* [ -/]* [@-~]
| \] .*? (?:\x07|\x1B\\)
)
""",
re.VERBOSE | re.DOTALL,
)
# To remove orphan digits/letters with optional trailing bracket
[docs]
ORPHAN_CSI = re.compile(r"\[[0-9]+(?:;[0-9]+)*[A-Za-z]\]?")
# To remove any C0 controls except newline/tab
[docs]
CONTROL_CHARS = re.compile(r"[\x00-\x08\x0B-\x1F\x7F]")
[docs]
_CLEANER = re.compile(
rf"(?:{ANSI_ESCAPE.pattern})|"
rf"(?:{ORPHAN_CSI.pattern})|"
rf"{CONTROL_CHARS.pattern}",
re.VERBOSE | re.DOTALL,
)
[docs]
def strip_ansi_and_controls(text: str) -> str:
"""
Removes ANSI escape codes, orphan CSI sequences, and stray control
characters.
:param text: Raw string to clean.
:returns: Cleaned string without ANSI/control sequences.
"""
cleaned = _CLEANER.sub("", text)
return cleaned.replace("\r", "")
[docs]
class AnsiStrippingStream:
"""
A wrapper that strips ANSI/control sequences before writing.
This is typically used to wrap a file handle so that all log writes
are cleaned of terminal formatting characters automatically.
"""
def __init__(self, underlying: TextIO):
"""
Initialize the wrapper.
:param underlying: The original stream to write cleaned output to.
:type underlying: TextIO
"""
[docs]
self._stream = underlying
[docs]
def write(self, data: str) -> int:
"""
Strip ANSI/control sequences from `data` and write to the underlying
stream.
:param data: Raw string data to sanitize and write.
:returns: Number of characters written.
"""
cleaned = strip_ansi_and_controls(data)
written = self._stream.write(cleaned)
self._stream.flush()
return written
[docs]
def flush(self) -> None:
"""
Flush the underlying stream.
"""
self._stream.flush()