Веб-сайт самохостера Lotigara

summaryrefslogtreecommitdiff
path: root/source/core/StarJsonParser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/core/StarJsonParser.hpp')
-rw-r--r--source/core/StarJsonParser.hpp733
1 files changed, 733 insertions, 0 deletions
diff --git a/source/core/StarJsonParser.hpp b/source/core/StarJsonParser.hpp
new file mode 100644
index 0000000..87a4bd9
--- /dev/null
+++ b/source/core/StarJsonParser.hpp
@@ -0,0 +1,733 @@
+#ifndef STAR_JSON_PARSER_HPP
+#define STAR_JSON_PARSER_HPP
+
+#include <vector>
+
+#include "StarUnicode.hpp"
+
+namespace Star {
+
+struct JsonStream {
+ virtual ~JsonStream() {}
+
+ virtual void beginObject() = 0;
+ virtual void objectKey(char32_t const*, size_t) = 0;
+ virtual void endObject() = 0;
+
+ virtual void beginArray() = 0;
+ virtual void endArray() = 0;
+
+ virtual void putString(char32_t const*, size_t) = 0;
+ virtual void putDouble(char32_t const*, size_t) = 0;
+ virtual void putInteger(char32_t const*, size_t) = 0;
+ virtual void putBoolean(bool) = 0;
+ virtual void putNull() = 0;
+
+ virtual void putWhitespace(char32_t const*, size_t) = 0;
+ virtual void putColon() = 0;
+ virtual void putComma() = 0;
+};
+
+// Will parse JSON and output to a given JsonStream. Parses an *extension* to
+// the JSON format that includes comments.
+template <typename InputIterator>
+class JsonParser {
+public:
+ JsonParser(JsonStream& stream)
+ : m_line(0), m_column(0), m_stream(stream) {}
+ virtual ~JsonParser() {}
+
+ // Does not throw. On error, returned iterator will not be equal to end, and
+ // error() will be non-null. Set fragment to true to parse any JSON type
+ // rather than just object or array.
+ InputIterator parse(InputIterator begin, InputIterator end, bool fragment = false) {
+ init(begin, end);
+
+ try {
+ white();
+ if (fragment)
+ value();
+ else
+ top();
+ white();
+ } catch (ParsingException const&) {
+ }
+
+ return m_current;
+ }
+
+ // Human readable parsing error, does not include line or column info.
+ char const* error() const {
+ if (m_error.empty())
+ return nullptr;
+ else
+ return m_error.c_str();
+ }
+
+ size_t line() const {
+ return m_line + 1;
+ }
+
+ size_t column() const {
+ return m_column + 1;
+ }
+
+private:
+ typedef std::basic_string<char32_t> CharArray;
+
+ // Thrown internally to abort parsing.
+ class ParsingException {};
+
+ void top() {
+ switch (m_char) {
+ case '{':
+ object();
+ break;
+ case '[':
+ array();
+ break;
+ default:
+ error("expected JSON object or array at top level");
+ return;
+ }
+ }
+
+ void value() {
+ switch (m_char) {
+ case '{':
+ object();
+ break;
+ case '[':
+ array();
+ break;
+ case '"':
+ string();
+ break;
+ case '-':
+ number();
+ break;
+ case 0:
+ error("unexpected end of stream parsing value");
+ return;
+ default:
+ m_char >= '0' && m_char <= '9' ? number() : word();
+ break;
+ }
+ }
+
+ void object() {
+ if (m_char != '{')
+ error("bad object, should be '{'");
+
+ next();
+ m_stream.beginObject();
+
+ white();
+ if (m_char == '}') {
+ next();
+ m_stream.endObject();
+ return;
+ }
+
+ while (true) {
+ CharArray s = parseString();
+ m_stream.objectKey(s.c_str(), s.length());
+
+ white();
+ if (m_char != ':')
+ error("bad object, should be ':'");
+ next();
+ m_stream.putColon();
+ white();
+
+ value();
+
+ white();
+ if (m_char == '}') {
+ next();
+ m_stream.endObject();
+ return;
+ } else if (m_char == ',') {
+ next();
+ m_stream.putComma();
+ white();
+ } else if (m_char == 0) {
+ error("unexpected end of stream parsing object.");
+ } else {
+ error("bad object, should be '}' or ','");
+ }
+ }
+ }
+
+ void array() {
+ if (m_char == '[') {
+ next();
+ m_stream.beginArray();
+ white();
+ if (m_char == ']') {
+ next();
+ m_stream.endArray();
+ } else {
+ while (true) {
+ value();
+ white();
+ if (m_char == ']') {
+ next();
+ m_stream.endArray();
+ break;
+ } else if (m_char == ',') {
+ next();
+ m_stream.putComma();
+ white();
+ } else if (m_char == 0) {
+ error("unexpected end of stream parsing array.");
+ } else {
+ error("bad array, should be ',' or ']'");
+ }
+ }
+ }
+ } else {
+ error("bad array");
+ }
+ }
+
+ void string() {
+ CharArray s = parseString();
+ m_stream.putString(s.c_str(), s.length());
+ }
+
+ void number() {
+ std::basic_string<char32_t> buffer;
+ bool hasDot = false;
+
+ if (m_char == '-') {
+ buffer += '-';
+ next();
+ }
+
+ if (m_char == '0') {
+ buffer += '0';
+ next();
+ } else if (m_char > '0' && m_char <= '9') {
+ while (m_char >= '0' && m_char <= '9') {
+ buffer += m_char;
+ next();
+ }
+ } else {
+ error("bad number, must start with digit");
+ }
+
+ if (m_char == '.') {
+ hasDot = true;
+ buffer += '.';
+ next();
+ while (m_char >= '0' && m_char <= '9') {
+ buffer += m_char;
+ next();
+ }
+ }
+
+ if (m_char == 'e' || m_char == 'E') {
+ buffer += m_char;
+ next();
+ if (m_char == '-' || m_char == '+') {
+ buffer += m_char;
+ next();
+ }
+ while (m_char >= '0' && m_char <= '9') {
+ buffer += m_char;
+ next();
+ }
+ }
+
+ if (hasDot) {
+ try {
+ m_stream.putDouble(buffer.c_str(), buffer.length());
+ } catch (std::exception const& e) {
+ error(std::string("Bad double: ") + e.what());
+ }
+ } else {
+ try {
+ m_stream.putInteger(buffer.c_str(), buffer.length());
+ } catch (std::exception const& e) {
+ error(std::string("Bad integer: ") + e.what());
+ }
+ }
+ }
+
+ // true, false, or null
+ void word() {
+ switch (m_char) {
+ case 't':
+ next();
+ check('r');
+ check('u');
+ check('e');
+ m_stream.putBoolean(true);
+ break;
+ case 'f':
+ next();
+ check('a');
+ check('l');
+ check('s');
+ check('e');
+ m_stream.putBoolean(false);
+ break;
+ case 'n':
+ next();
+ check('u');
+ check('l');
+ check('l');
+ m_stream.putNull();
+ break;
+ default:
+ error("unexpected character parsing word");
+ return;
+ }
+ }
+
+ CharArray parseString() {
+ if (m_char != '"')
+ error("bad string, should be '\"'");
+ next();
+
+ CharArray str;
+
+ while (true) {
+ if (m_char == '\\') {
+ next();
+ if (m_char == 'u') {
+ std::string hexString;
+ next();
+ for (int i = 0; i < 4; ++i) {
+ hexString.push_back(m_char);
+ next();
+ }
+ char32_t codepoint = hexStringToUtf32(hexString);
+ if (isUtf16LeadSurrogate(codepoint)) {
+ check('\\');
+ check('u');
+ hexString.clear();
+ for (int i = 0; i < 4; ++i) {
+ hexString.push_back(m_char);
+ next();
+ }
+ codepoint = hexStringToUtf32(hexString, codepoint);
+ }
+ str += codepoint;
+ } else {
+ switch (m_char) {
+ case '"':
+ str += '"';
+ break;
+ case '\\':
+ str += '\\';
+ break;
+ case '/':
+ str += '/';
+ break;
+ case 'b':
+ str += '\b';
+ break;
+ case 'f':
+ str += '\f';
+ break;
+ case 'n':
+ str += '\n';
+ break;
+ case 'r':
+ str += '\r';
+ break;
+ case 't':
+ str += '\t';
+ break;
+ default:
+ error("bad string escape character");
+ break;
+ }
+ next();
+ }
+ } else if (m_char == '\"') {
+ next();
+ return str;
+ } else if (m_char == 0) {
+ error("unexpected end of stream reading string!");
+ } else {
+ str += m_char;
+ next();
+ }
+ }
+ error("parser bug");
+ return {};
+ }
+
+ // Checks current char then moves on to the next one
+ void check(char32_t c) {
+ if (m_char == 0)
+ error("unexpected end of stream parsing word");
+ if (m_char != c)
+ error("unexpected character in word");
+ next();
+ }
+
+ void init(InputIterator begin, InputIterator end) {
+ m_current = begin;
+ m_end = end;
+ m_line = 0;
+ m_column = 0;
+
+ if (m_current != m_end)
+ m_char = *m_current;
+ else
+ m_char = 0;
+ }
+
+ // Consumes next character.
+ void next() {
+ if (m_current == m_end)
+ return;
+
+ if (m_char == '\n') {
+ ++m_line;
+ m_column = 0;
+ } else {
+ ++m_column;
+ }
+ ++m_current;
+
+ if (m_current != m_end)
+ m_char = *m_current;
+ else
+ m_char = 0;
+ }
+
+ // Will skip whitespace and comments between tokens.
+ void white() {
+ CharArray buffer;
+ while (m_current != m_end) {
+ if (m_char == '/') {
+ // Always consume '/' found in whitespace, because that is never valid
+ // JSON (other than comments)
+ buffer += m_char;
+ next();
+ if (m_current != m_end && m_char == '/') {
+ // eat "/"
+ buffer += m_char;
+ next();
+
+ // Read '//' style comments up until eol/eof.
+ while (m_current != m_end && m_char != '\n') {
+ buffer += m_char;
+ next();
+ }
+ } else if (m_current != m_end && m_char == '*') {
+ // eat "*"
+ buffer += m_char;
+ next();
+
+ // Read '/*' style comments up until '*/'.
+ while (m_current != m_end) {
+ if (m_char == '*') {
+ buffer += m_char;
+ next();
+ if (m_char == '/') {
+ buffer += m_char;
+ next();
+ break;
+ }
+ } else {
+ buffer += m_char;
+ next();
+ if (m_current == m_end)
+ error("/* comment has no matching */");
+ }
+ }
+ } else {
+ // The only allowed characters following / in whitespace are / and *
+ error("/ character in whitespace is not follwed by '/' or '*', invalid comment");
+ return;
+ }
+ } else if (isSpace(m_char)) {
+ buffer += m_char;
+ next();
+ } else {
+ if (buffer.size() != 0)
+ m_stream.putWhitespace(buffer.c_str(), buffer.length());
+ return;
+ }
+ }
+ if (buffer.size() != 0)
+ m_stream.putWhitespace(buffer.c_str(), buffer.length());
+ }
+
+ void error(std::string msg) {
+ m_error = move(msg);
+ throw ParsingException();
+ }
+
+ bool isSpace(char32_t c) {
+ // Only whitespace allowed by JSON
+ return c == 0x20 || // space
+ c == 0x09 || // horizontal tab
+ c == 0x0a || // newline
+ c == 0x0d || // carriage return
+ c == 0xfeff; // BOM or ZWNBSP
+ }
+
+ char32_t m_char;
+ InputIterator m_current;
+ InputIterator m_end;
+ size_t m_line;
+ size_t m_column;
+ std::string m_error;
+ JsonStream& m_stream;
+};
+
+// Write JSON through JsonStream interface.
+template <typename OutputIterator>
+class JsonWriter : public JsonStream {
+public:
+ JsonWriter(OutputIterator out, unsigned pretty = 0)
+ : m_out(out), m_pretty(pretty) {}
+
+ void beginObject() {
+ startValue();
+ pushState(Object);
+ write('{');
+ }
+
+ void objectKey(char32_t const* s, size_t len) {
+ if (currentState() == ObjectElement) {
+ if (m_pretty)
+ write('\n');
+ indent();
+ } else {
+ pushState(ObjectElement);
+ if (m_pretty)
+ write('\n');
+ indent();
+ }
+
+ write('"');
+ char32_t c = *s;
+ while (c && len) {
+ write(c);
+ c = *++s;
+ --len;
+ }
+ write('"');
+ if (m_pretty)
+ write(' ');
+ }
+
+ void endObject() {
+ popState(Object);
+
+ if (m_pretty)
+ write('\n');
+ indent();
+ write('}');
+ }
+
+ void beginArray() {
+ startValue();
+ pushState(Array);
+ write('[');
+ }
+
+ void endArray() {
+ popState(Array);
+ write(']');
+ }
+
+ void putString(char32_t const* s, size_t len) {
+ startValue();
+
+ write('"');
+ char32_t c = *s;
+ while (c && (len > 0)) {
+ if (!isPrintable(c)) {
+ switch (c) {
+ case '"':
+ write('\\');
+ write('"');
+ break;
+ case '\\':
+ write('\\');
+ write('\\');
+ break;
+ case '\b':
+ write('\\');
+ write('b');
+ break;
+ case '\f':
+ write('\\');
+ write('f');
+ break;
+ case '\n':
+ write('\\');
+ write('n');
+ break;
+ case '\r':
+ write('\\');
+ write('r');
+ break;
+ case '\t':
+ write('\\');
+ write('t');
+ break;
+ default:
+ auto hex = hexStringFromUtf32(c);
+ if (hex.size() == 4) {
+ write('\\');
+ write('u');
+ for (auto c : hex) {
+ write(c);
+ }
+ } else if (hex.size() == 8) {
+ write('\\');
+ write('u');
+ for (auto c : hex.substr(0, 4)) {
+ write(c);
+ }
+ write('\\');
+ write('u');
+ for (auto c : hex.substr(4)) {
+ write(c);
+ }
+ } else {
+ throw UnicodeException("Internal Error: Received invalid unicode hex from hexStringFromUtf32.");
+ }
+ break;
+ }
+ } else {
+ write(c);
+ }
+ c = *++s;
+ --len;
+ }
+ write('"');
+ }
+
+ void putDouble(char32_t const* s, size_t len) {
+ startValue();
+ for (size_t i = 0; i < len; ++i)
+ write(s[i]);
+ }
+
+ void putInteger(char32_t const* s, size_t len) {
+ startValue();
+ for (size_t i = 0; i < len; ++i)
+ write(s[i]);
+ }
+
+ void putBoolean(bool b) {
+ startValue();
+ if (b) {
+ write('t');
+ write('r');
+ write('u');
+ write('e');
+ } else {
+ write('f');
+ write('a');
+ write('l');
+ write('s');
+ write('e');
+ }
+ }
+
+ void putNull() {
+ startValue();
+ write('n');
+ write('u');
+ write('l');
+ write('l');
+ }
+
+ void putWhitespace(char32_t const* s, size_t len) {
+ // If m_pretty is true, extra spurious whitespace will be inserted.
+ for (size_t i = 0; i < len; ++i)
+ write(s[i]);
+ }
+
+ void putColon() {
+ write(':');
+ if (m_pretty)
+ write(' ');
+ }
+
+ void putComma() {
+ write(',');
+ }
+
+private:
+ enum State {
+ Top,
+ Object,
+ ObjectElement,
+ Array,
+ ArrayElement
+ };
+
+ // Handles separating array elements if currently adding to an array
+ void startValue() {
+ if (currentState() == ArrayElement) {
+ if (m_pretty)
+ write(' ');
+ } else if (currentState() == Array) {
+ pushState(ArrayElement);
+ }
+ }
+
+ void indent() {
+ for (unsigned i = 0; i < m_state.size() / 2; ++i) {
+ for (unsigned j = 0; j < m_pretty; ++j) {
+ write(' ');
+ }
+ }
+ }
+
+ // Push state onto stack.
+ void pushState(State state) {
+ m_state.push_back(state);
+ }
+
+ // Pop state stack down to given state.
+ void popState(State state) {
+ while (true) {
+ if (m_state.empty())
+ return;
+
+ State last = currentState();
+ m_state.pop_back();
+ if (last == state)
+ return;
+ }
+ }
+
+ State currentState() {
+ if (m_state.empty())
+ return Top;
+ else
+ return *prev(m_state.end());
+ }
+
+ void write(char32_t c) {
+ *m_out = c;
+ ++m_out;
+ }
+
+ // Only chars that are unescaped according to JSON spec.
+ bool isPrintable(char32_t c) {
+ return (c >= 0x20 && c <= 0x21) || (c >= 0x23 && c <= 0x5b) || (c >= 0x5d && c <= 0x10ffff);
+ }
+
+ OutputIterator m_out;
+ unsigned m_pretty;
+ std::vector<State> m_state;
+};
+
+}
+
+#endif