Веб-сайт самохостера Lotigara

summaryrefslogtreecommitdiff
path: root/source/core/StarShellParser.cpp
diff options
context:
space:
mode:
authorKae <80987908+Novaenia@users.noreply.github.com>2023-06-20 14:33:09 +1000
committerKae <80987908+Novaenia@users.noreply.github.com>2023-06-20 14:33:09 +1000
commit6352e8e3196f78388b6c771073f9e03eaa612673 (patch)
treee23772f79a7fbc41bc9108951e9e136857484bf4 /source/core/StarShellParser.cpp
parent6741a057e5639280d85d0f88ba26f000baa58f61 (diff)
everything everywhere
all at once
Diffstat (limited to 'source/core/StarShellParser.cpp')
-rw-r--r--source/core/StarShellParser.cpp208
1 files changed, 208 insertions, 0 deletions
diff --git a/source/core/StarShellParser.cpp b/source/core/StarShellParser.cpp
new file mode 100644
index 0000000..31a927e
--- /dev/null
+++ b/source/core/StarShellParser.cpp
@@ -0,0 +1,208 @@
+#include "StarShellParser.hpp"
+
+namespace Star {
+
+ShellParser::ShellParser()
+ : m_current(), m_end(), m_quotedType('\0') {}
+
+auto ShellParser::tokenize(String const& command) -> List<Token> {
+ List<Token> res;
+
+ init(command);
+
+ while (notDone()) {
+ res.append(Token{TokenType::Word, word()});
+ }
+
+ return res;
+}
+
+StringList ShellParser::tokenizeToStringList(String const& command) {
+ StringList res;
+ for (auto token : tokenize(command)) {
+ if (token.type == TokenType::Word) {
+ res.append(move(token.token));
+ }
+ }
+
+ return res;
+}
+
+void ShellParser::init(String const& string) {
+ m_begin = string.begin();
+ m_current = m_begin;
+ m_end = string.end();
+ m_quotedType = '\0';
+}
+
+String ShellParser::word() {
+ String res;
+
+ while (notDone()) {
+ auto letter = *current();
+ bool escapedLetter = false;
+
+ if (letter == '\\') {
+ escapedLetter = true;
+ letter = parseBackslash();
+ }
+
+ if (!escapedLetter) {
+ if (isSpace(letter) && !inQuotedString()) {
+ next();
+ if (res.size()) {
+ return res;
+ }
+ continue;
+ }
+
+ if (isQuote(letter)) {
+ if (inQuotedString() && letter == m_quotedType) {
+ m_quotedType = '\0';
+ next();
+ continue;
+ }
+
+ if (!inQuotedString()) {
+ m_quotedType = letter;
+ next();
+ continue;
+ }
+ }
+ }
+
+ res.append(letter);
+ next();
+ }
+
+ return res;
+}
+
+bool ShellParser::isSpace(Char letter) const {
+ return String::isSpace(letter);
+}
+
+bool ShellParser::isQuote(Char letter) const {
+ return letter == '\'' || letter == '"';
+}
+
+bool ShellParser::inQuotedString() const {
+ return m_quotedType != '\0';
+}
+
+auto ShellParser::current() const -> Maybe<Char> {
+ if (m_current == m_end) {
+ return {};
+ }
+
+ return *m_current;
+}
+
+auto ShellParser::next() -> Maybe<Char> {
+ if (m_current != m_end) {
+ ++m_current;
+ }
+
+ return current();
+}
+
+auto ShellParser::previous() -> Maybe<Char> {
+ if (m_current != m_begin) {
+ --m_current;
+ }
+
+ return current();
+}
+
+auto ShellParser::parseBackslash() -> Char {
+ auto letter = next();
+
+ if (!letter) {
+ return '\\';
+ }
+
+ switch (*letter) {
+ case ' ':
+ return ' ';
+ case 'n':
+ return '\n';
+ case 't':
+ return '\t';
+ case 'r':
+ return '\r';
+ case 'b':
+ return '\b';
+ case 'v':
+ return '\v';
+ case 'f':
+ return '\f';
+ case 'a':
+ return '\a';
+ case '\'':
+ return '\'';
+ case '"':
+ return '"';
+ case '\\':
+ return '\\';
+ case '0':
+ return '\0';
+ case 'u': {
+ auto letter = parseUnicodeEscapeSequence();
+ if (isUtf16LeadSurrogate(letter)) {
+ auto shouldBeSlash = next();
+ if (shouldBeSlash && shouldBeSlash == '\\') {
+ auto shouldBeU = next();
+ if (shouldBeU && shouldBeU == 'u') {
+ return parseUnicodeEscapeSequence(letter);
+ } else {
+ previous();
+ }
+ }
+ previous();
+ return STAR_UTF32_REPLACEMENT_CHAR;
+ } else {
+ return letter;
+ }
+ }
+ default:
+ return *letter;
+ }
+}
+
+auto ShellParser::parseUnicodeEscapeSequence(Maybe<Char> previousCodepoint) -> Char {
+ String codepoint;
+
+ auto letter = current();
+
+ while (!isSpace(*letter) && codepoint.size() < 4) {
+ auto letter = next();
+ if (!letter) {
+ break;
+ }
+
+ if (!isxdigit(*letter)) {
+ return STAR_UTF32_REPLACEMENT_CHAR;
+ }
+
+ codepoint.append(*letter);
+ }
+
+ if (!codepoint.size()) {
+ return 'u';
+ }
+
+ if (codepoint.size() != 4) // exactly 4 digits are required by \u
+ return STAR_UTF32_REPLACEMENT_CHAR;
+
+ try {
+ return hexStringToUtf32(codepoint.utf8(), previousCodepoint);
+ } catch (UnicodeException const&) {
+ return STAR_UTF32_REPLACEMENT_CHAR;
+ }
+}
+
+bool ShellParser::notDone() const {
+ return m_current != m_end;
+}
+
+}