diff options
author | Kae <80987908+Novaenia@users.noreply.github.com> | 2023-06-20 14:33:09 +1000 |
---|---|---|
committer | Kae <80987908+Novaenia@users.noreply.github.com> | 2023-06-20 14:33:09 +1000 |
commit | 6352e8e3196f78388b6c771073f9e03eaa612673 (patch) | |
tree | e23772f79a7fbc41bc9108951e9e136857484bf4 /source/core/StarShellParser.cpp | |
parent | 6741a057e5639280d85d0f88ba26f000baa58f61 (diff) |
everything everywhere
all at once
Diffstat (limited to 'source/core/StarShellParser.cpp')
-rw-r--r-- | source/core/StarShellParser.cpp | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/source/core/StarShellParser.cpp b/source/core/StarShellParser.cpp new file mode 100644 index 0000000..31a927e --- /dev/null +++ b/source/core/StarShellParser.cpp @@ -0,0 +1,208 @@ +#include "StarShellParser.hpp" + +namespace Star { + +ShellParser::ShellParser() + : m_current(), m_end(), m_quotedType('\0') {} + +auto ShellParser::tokenize(String const& command) -> List<Token> { + List<Token> res; + + init(command); + + while (notDone()) { + res.append(Token{TokenType::Word, word()}); + } + + return res; +} + +StringList ShellParser::tokenizeToStringList(String const& command) { + StringList res; + for (auto token : tokenize(command)) { + if (token.type == TokenType::Word) { + res.append(move(token.token)); + } + } + + return res; +} + +void ShellParser::init(String const& string) { + m_begin = string.begin(); + m_current = m_begin; + m_end = string.end(); + m_quotedType = '\0'; +} + +String ShellParser::word() { + String res; + + while (notDone()) { + auto letter = *current(); + bool escapedLetter = false; + + if (letter == '\\') { + escapedLetter = true; + letter = parseBackslash(); + } + + if (!escapedLetter) { + if (isSpace(letter) && !inQuotedString()) { + next(); + if (res.size()) { + return res; + } + continue; + } + + if (isQuote(letter)) { + if (inQuotedString() && letter == m_quotedType) { + m_quotedType = '\0'; + next(); + continue; + } + + if (!inQuotedString()) { + m_quotedType = letter; + next(); + continue; + } + } + } + + res.append(letter); + next(); + } + + return res; +} + +bool ShellParser::isSpace(Char letter) const { + return String::isSpace(letter); +} + +bool ShellParser::isQuote(Char letter) const { + return letter == '\'' || letter == '"'; +} + +bool ShellParser::inQuotedString() const { + return m_quotedType != '\0'; +} + +auto ShellParser::current() const -> Maybe<Char> { + if (m_current == m_end) { + return {}; + } + + return *m_current; +} + +auto ShellParser::next() -> Maybe<Char> { + if (m_current != m_end) { + ++m_current; + } + + return current(); +} + +auto ShellParser::previous() -> Maybe<Char> { + if (m_current != m_begin) { + --m_current; + } + + return current(); +} + +auto ShellParser::parseBackslash() -> Char { + auto letter = next(); + + if (!letter) { + return '\\'; + } + + switch (*letter) { + case ' ': + return ' '; + case 'n': + return '\n'; + case 't': + return '\t'; + case 'r': + return '\r'; + case 'b': + return '\b'; + case 'v': + return '\v'; + case 'f': + return '\f'; + case 'a': + return '\a'; + case '\'': + return '\''; + case '"': + return '"'; + case '\\': + return '\\'; + case '0': + return '\0'; + case 'u': { + auto letter = parseUnicodeEscapeSequence(); + if (isUtf16LeadSurrogate(letter)) { + auto shouldBeSlash = next(); + if (shouldBeSlash && shouldBeSlash == '\\') { + auto shouldBeU = next(); + if (shouldBeU && shouldBeU == 'u') { + return parseUnicodeEscapeSequence(letter); + } else { + previous(); + } + } + previous(); + return STAR_UTF32_REPLACEMENT_CHAR; + } else { + return letter; + } + } + default: + return *letter; + } +} + +auto ShellParser::parseUnicodeEscapeSequence(Maybe<Char> previousCodepoint) -> Char { + String codepoint; + + auto letter = current(); + + while (!isSpace(*letter) && codepoint.size() < 4) { + auto letter = next(); + if (!letter) { + break; + } + + if (!isxdigit(*letter)) { + return STAR_UTF32_REPLACEMENT_CHAR; + } + + codepoint.append(*letter); + } + + if (!codepoint.size()) { + return 'u'; + } + + if (codepoint.size() != 4) // exactly 4 digits are required by \u + return STAR_UTF32_REPLACEMENT_CHAR; + + try { + return hexStringToUtf32(codepoint.utf8(), previousCodepoint); + } catch (UnicodeException const&) { + return STAR_UTF32_REPLACEMENT_CHAR; + } +} + +bool ShellParser::notDone() const { + return m_current != m_end; +} + +} |