Веб-сайт самохостера Lotigara

summaryrefslogtreecommitdiff
path: root/source/utility/word_count.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/utility/word_count.cpp')
-rw-r--r--source/utility/word_count.cpp191
1 files changed, 191 insertions, 0 deletions
diff --git a/source/utility/word_count.cpp b/source/utility/word_count.cpp
new file mode 100644
index 0000000..683a3a9
--- /dev/null
+++ b/source/utility/word_count.cpp
@@ -0,0 +1,191 @@
+#include "StarFile.hpp"
+#include "StarLexicalCast.hpp"
+#include "StarImage.hpp"
+#include "StarRootLoader.hpp"
+#include "StarAssets.hpp"
+#include "StarItemDatabase.hpp"
+#include "StarJson.hpp"
+
+using namespace Star;
+
+int main(int argc, char** argv) {
+ try {
+ RootLoader rootLoader({{}, {}, {}, LogLevel::Error, false, {}});
+
+ rootLoader.setSummary("Calculate a (very approximate) word count of user-facing text in assets");
+
+ RootUPtr root;
+ OptionParser::Options options;
+ tie(root, options) = rootLoader.commandInitOrDie(argc, argv);
+
+ StringMap<int> wordCounts;
+ auto assets = Root::singleton().assets();
+
+ auto countWordsInType = [&](String const& type, function<int(Json const&)> countFunction, Maybe<function<bool(String const&)>> filterFunction = {}, Maybe<String> wordCountKey = {}) {
+ auto files = assets->scanExtension(type);
+ if (filterFunction)
+ files.filter(*filterFunction);
+ assets->queueJsons(files);
+ for (auto path : files) {
+ auto json = assets->json(path);
+ if (json.isNull())
+ continue;
+
+ String countKey = wordCountKey ? *wordCountKey : strf(".%s files", type);
+ wordCounts[countKey] += countFunction(json);
+ }
+ };
+
+ StringList itemFileTypes = {
+ "tech",
+ "item",
+ "liqitem",
+ "matitem",
+ "miningtool",
+ "flashlight",
+ "wiretool",
+ "beamaxe",
+ "tillingtool",
+ "painttool",
+ "harvestingtool",
+ "head",
+ "chest",
+ "legs",
+ "back",
+ "currencyitem",
+ "consumable",
+ "blueprint",
+ "inspectiontool",
+ "instrument",
+ "thrownitem",
+ "unlock",
+ "activeitem",
+ "augment" };
+
+ for (auto itemFileType : itemFileTypes) {
+ countWordsInType(itemFileType, [](Json const& json) {
+ int wordCount = 0;
+ wordCount += json.getString("shortdescription", "").split(" ").count();
+ wordCount += json.getString("description", "").split(" ").count();
+ return wordCount;
+ });
+ }
+
+ countWordsInType("object", [](Json const& json) {
+ int wordCount = 0;
+ wordCount += json.getString("shortdescription", "").split(" ").count();
+ wordCount += json.getString("description", "").split(" ").count();
+ wordCount += json.getString("apexDescription", "").split(" ").count();
+ wordCount += json.getString("avianDescription", "").split(" ").count();
+ wordCount += json.getString("glitchDescription", "").split(" ").count();
+ wordCount += json.getString("floranDescription", "").split(" ").count();
+ wordCount += json.getString("humanDescription", "").split(" ").count();
+ wordCount += json.getString("hylotlDescription", "").split(" ").count();
+ wordCount += json.getString("novakidDescription", "").split(" ").count();
+ return wordCount;
+ });
+
+ countWordsInType("codex", [](Json const& json) {
+ int wordCount = 0;
+ wordCount += json.getString("title", "").split(" ").count();
+ wordCount += json.getString("description", "").split(" ").count();
+ for (auto contentPage : json.getArray("contentPages", JsonArray()))
+ wordCount += contentPage.toString().split(" ").count();
+ return wordCount;
+ });
+
+ countWordsInType("monstertype", [](Json const& json) {
+ return json.getString("description", "").split(" ").count();
+ });
+
+ countWordsInType("radiomessages", [](Json const& json) {
+ auto wordCount = 0;
+ for (auto messageConfigPair : json.iterateObject())
+ wordCount += messageConfigPair.second.getString("text", "").split(" ").count();
+ return wordCount;
+ });
+
+ function<int(Json const& json)> countOnlyStrings;
+ countOnlyStrings = [&](Json const& json) {
+ int wordCount = 0;
+ if (json.isType(Json::Type::Object)) {
+ for (auto entry : json.iterateObject())
+ wordCount += countOnlyStrings(entry.second);
+ } else if (json.isType(Json::Type::Array)) {
+ for (auto entry : json.iterateArray())
+ wordCount += countOnlyStrings(entry);
+ } else if (json.isType(Json::Type::String)) {
+ if (!json.toString().beginsWith("/")) {
+ wordCount += json.toString().split(" ").count();
+ }
+ }
+ return wordCount;
+ };
+
+ function<bool(String const&)> dialogFilter = [](String const& filePath) { return filePath.beginsWith("/dialog/"); };
+ countWordsInType("config", countOnlyStrings, dialogFilter, String("NPC dialog (.config files)"));
+
+ countWordsInType("npctype", [&](Json const& json) {
+ if (auto scriptConfig = json.get("scriptConfig", Json()))
+ return countOnlyStrings(scriptConfig.get("dialog", Json()));
+ return 0;
+ }, {}, String("NPC dialog (.npctype files)"));
+
+ countWordsInType("questtemplate", [&](Json const& json) {
+ int wordCount = 0;
+ wordCount += json.getString("title", "").split(" ").count();
+ wordCount += json.getString("text", "").split(" ").count();
+ wordCount += json.getString("completionText", "").split(" ").count();
+ if (auto scriptConfig = json.get("scriptConfig", Json()))
+ wordCount += countOnlyStrings(scriptConfig.get("generatedText", Json()));
+ return wordCount;
+ });
+
+ countWordsInType("collection", [&](Json const& json) {
+ int wordCount = 0;
+ for (auto entry : json.get("collectables", Json()).iterateObject())
+ wordCount += entry.second.getString("description", "").split(" ").count();
+ return wordCount;
+ });
+
+ countWordsInType("cinematic", [&](Json const& json) {
+ int wordCount = 0;
+ for (auto panel : json.get("panels", Json()).iterateArray()) {
+ auto panelText = panel.optString("text");
+ // filter on pipes to ignore those long lists of backer names in the credits
+ if (panelText && !panelText->contains("|"))
+ wordCount += panelText->split(" ").count();
+ }
+ return wordCount;
+ });
+
+ countWordsInType("aimission", [&](Json const& json) {
+ int wordCount = 0;
+ for (auto entry : json.get("speciesText", Json()).iterateObject()) {
+ wordCount += entry.second.getString("buttonText", "").split(" ").count();
+ wordCount += entry.second.getString("repeatButtonText", "").split(" ").count();
+ if (auto selectSpeech = entry.second.get("selectSpeech"))
+ wordCount += selectSpeech.getString("text", "").split(" ").count();
+ }
+ return wordCount;
+ });
+
+ auto cockpitConfig = assets->json("/interface/cockpit/cockpit.config");
+ int cockpitWordCount = 0;
+ cockpitWordCount += countOnlyStrings(cockpitConfig.get("visitableTypeDescription"));
+ cockpitWordCount += countOnlyStrings(cockpitConfig.get("worldTypeDescription"));
+ wordCounts["planet descriptions (cockpit.config)"] = cockpitWordCount;
+
+ int totalWordCount = 0;
+ for (auto countPair : wordCounts) {
+ coutf("%d words in %s\n", countPair.second, countPair.first);
+ totalWordCount += countPair.second;
+ }
+ coutf("approximately %s words total\n", totalWordCount);
+
+ return 0;
+ } catch (std::exception const& e) {
+ cerrf("exception caught: %s\n", outputException(e, true));
+ return 1;
+ }
+}