terminal code eaters: implement OSC

This is a useful piece of functionality to being able to eat URL
hyperlinks, for instance, which is a bug that Lix has while dealing with
terminal output today.

Change-Id: I77b2de107b2525cad7ea5dea28bfba2cc78b9e6d
This commit is contained in:
jade 2024-12-05 18:12:39 -08:00
parent faf00ad022
commit 5530de4673
4 changed files with 74 additions and 3 deletions

View file

@ -9,6 +9,8 @@ class State(enum.Enum):
ExpectESCSeq = 2
InCSIParams = 3
InCSIIntermediates = 4
InOSCParams = 5
InOSCST = 6
@dataclasses.dataclass
@ -33,10 +35,14 @@ class TerminalCodeEater:
ret.append(c)
case State.ExpectESCSeq:
match c:
# CSI ('[')
case 0x5b:
# CSI ('[')
self._transition(State.InCSIParams)
continue
case 0x5d:
# OSC (']')
self._transition(State.InOSCParams)
continue
# FIXME(jade): whatever this was, we do not know how to
# delimit it, so we just eat the next character and
# keep going. Should we actually eat it?
@ -64,7 +70,34 @@ class TerminalCodeEater:
elif is_intermediate_char(c):
continue
else:
raise ValueError(f'Corrupt escape sequence in intermediates, at {c:x}')
raise ValueError(
f'Corrupt escape sequence in intermediates, at {c:x}'
)
# An OSC is OSC [\x20-\x7e]* ST per ECMA-48
# where OSC is \x1b ] and ST is \x1b \.
case State.InOSCParams:
# first part of ST
if c == 0x1b:
self._transition(State.InOSCST)
continue
# OSC sequences can be ended by BEL on old xterms
elif c == 0x07:
self._transition(State.ExpectESC)
continue
elif c < 0x20 or c == 0x7f:
raise ValueError(f'Corrupt OSC sequence, at {c:x}')
# either way, eat it
continue
case State.InOSCST:
# ST ends by \
if c == 0x5c: # \
self._transition(State.ExpectESC)
elif c < 0x20 or c > 0x7e:
raise ValueError(
f'Corrupt OSC sequence in ST, at {c:x}')
else:
self._transition(State.InOSCParams)
continue
return bytes(ret)

View file

@ -0,0 +1,8 @@
from functional2.testlib.terminal_code_eater import eat_terminal_codes
def test_eats_color():
assert eat_terminal_codes(b'\x1b[7mfoo blah bar\x1b[0m') == b'foo blah bar'
def test_eats_osc():
assert eat_terminal_codes(b'\x1b]8;;http://example.com\x1b\\This is a link\x1b]8;;\x1b\\') == b'This is a link'

View file

@ -27,7 +27,8 @@ void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
// Just eat \r, since it is part of clearing a line
case '\r':
return;
default: break;
default:
break;
}
if constexpr (DEBUG_EATER) {
std::cerr << "eater uneat" << MaybeHexEscapedChar{c} << "\n";
@ -40,6 +41,9 @@ void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
case '[':
transition(State::InCSIParams);
return;
case ']':
transition(State::InOSCParams);
return;
// FIXME(jade): whatever this was, we do not know how to delimit it, so
// we just eat the next character and keep going
default:
@ -79,6 +83,30 @@ void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
return;
}
break;
// An OSC is OSC [\x20-\x7e]* ST
// where OSC is \x1b ] and ST is \x1b \.
case State::InOSCParams:
if (c == '\e') {
// first part of ST
transition(State::InOSCST);
} else if (c == '\a') {
// OSC sequences can be ended by BEL on old xterms
transition(State::ExpectESC);
} else if (c < 0x20 or c > 0x7e) {
assert(false && "Corrupt OSC sequence");
}
// either way, eat it
return;
case State::InOSCST:
// ST ends by \.
if (c == '\\') {
transition(State::ExpectESC);
} else if (c < 0x20 || c == 0x7f) {
assert(false && "Corrupt OSC sequence, in ST");
} else {
transition(State::InOSCParams);
}
return;
}
}

View file

@ -20,6 +20,8 @@ private:
ExpectESCSeq,
InCSIParams,
InCSIIntermediates,
InOSCParams,
InOSCST,
};
State state = State::ExpectESC;