summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRĂ©mi Verschelde <remi@verschelde.fr>2022-03-22 12:48:14 +0100
committerGitHub <noreply@github.com>2022-03-22 12:48:14 +0100
commit44267327a9632cc7b55d2bafb4097fce4753941d (patch)
treea405f030f24ead8b53539312a43e5c308613c524
parent2a116f601b839df9e4a00cb6c4242fcc24b4853e (diff)
parent35e8fd4d047748acf98ddb5a7319579853d333ab (diff)
Merge pull request #59276 from bruvzg/mo_trans
-rw-r--r--core/io/translation_loader_po.cpp439
1 files changed, 273 insertions, 166 deletions
diff --git a/core/io/translation_loader_po.cpp b/core/io/translation_loader_po.cpp
index 8d3e58cad1..801bd8b0bf 100644
--- a/core/io/translation_loader_po.cpp
+++ b/core/io/translation_loader_po.cpp
@@ -35,98 +35,160 @@
#include "core/string/translation_po.h"
RES TranslationLoaderPO::load_translation(FileAccess *f, Error *r_error) {
- enum Status {
- STATUS_NONE,
- STATUS_READING_ID,
- STATUS_READING_STRING,
- STATUS_READING_CONTEXT,
- STATUS_READING_PLURAL,
- };
-
- Status status = STATUS_NONE;
-
- String msg_id;
- String msg_str;
- String msg_context;
- Vector<String> msgs_plural;
- String config;
-
if (r_error) {
*r_error = ERR_FILE_CORRUPT;
}
- Ref<TranslationPO> translation = Ref<TranslationPO>(memnew(TranslationPO));
- int line = 1;
- int plural_forms = 0;
- int plural_index = -1;
- bool entered_context = false;
- bool skip_this = false;
- bool skip_next = false;
- bool is_eof = false;
const String path = f->get_path();
+ Ref<TranslationPO> translation = Ref<TranslationPO>(memnew(TranslationPO));
+ String config;
- while (!is_eof) {
- String l = f->get_line().strip_edges();
- is_eof = f->eof_reached();
+ uint32_t magic = f->get_32();
+ if (magic == 0x950412de) {
+ // Load binary MO file.
- // If we reached last line and it's not a content line, break, otherwise let processing that last loop
- if (is_eof && l.is_empty()) {
- if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line));
- } else {
- break;
- }
+ uint16_t version_maj = f->get_16();
+ uint16_t version_min = f->get_16();
+ if (version_maj > 1) {
+ ERR_FAIL_V_MSG(RES(), vformat("Unsupported MO file %s, version %d.%d.", path, version_maj, version_min));
}
- if (l.begins_with("msgctxt")) {
- if (status != STATUS_READING_STRING && status != STATUS_READING_PLURAL) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));
+ uint32_t num_strings = f->get_32();
+ uint32_t id_table_offset = f->get_32();
+ uint32_t trans_table_offset = f->get_32();
+
+ // Read string tables.
+ for (uint32_t i = 0; i < num_strings; i++) {
+ String msg_id;
+ String msg_id_plural;
+ String msg_context;
+
+ // Read id strings and context.
+ {
+ Vector<uint8_t> data;
+ f->seek(id_table_offset + i * 8);
+ uint32_t str_start = 0;
+ uint32_t str_len = f->get_32();
+ uint32_t str_offset = f->get_32();
+
+ data.resize(str_len + 1);
+ f->seek(str_offset);
+ f->get_buffer(data.ptrw(), str_len);
+ data.write[str_len] = 0;
+
+ bool is_plural = false;
+ for (uint32_t j = 0; j < str_len + 1; j++) {
+ if (data[j] == 0x04) {
+ msg_context.parse_utf8((const char *)data.ptr(), j);
+ str_start = j + 1;
+ }
+ if (data[j] == 0x00) {
+ if (is_plural) {
+ msg_id_plural.parse_utf8((const char *)(data.ptr() + str_start), j - str_start);
+ } else {
+ msg_id.parse_utf8((const char *)(data.ptr() + str_start), j - str_start);
+ is_plural = true;
+ }
+ str_start = j + 1;
+ }
+ }
}
- // In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
- // and set "entered_context" to true to prevent adding twice.
- if (!skip_this && !msg_id.is_empty()) {
- if (status == STATUS_READING_STRING) {
- translation->add_message(msg_id, msg_str, msg_context);
- } else if (status == STATUS_READING_PLURAL) {
- if (plural_index != plural_forms - 1) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
+ // Read translated strings.
+ {
+ Vector<uint8_t> data;
+ f->seek(trans_table_offset + i * 8);
+ uint32_t str_start = 0;
+ uint32_t str_len = f->get_32();
+ uint32_t str_offset = f->get_32();
+
+ data.resize(str_len + 1);
+ f->seek(str_offset);
+ f->get_buffer(data.ptrw(), str_len);
+ data.write[str_len] = 0;
+
+ if (msg_id.is_empty()) {
+ config = String::utf8((const char *)data.ptr(), str_len);
+ // Record plural rule.
+ int p_start = config.find("Plural-Forms");
+ if (p_start != -1) {
+ int p_end = config.find("\n", p_start);
+ translation->set_plural_rule(config.substr(p_start, p_end - p_start));
+ }
+ } else {
+ Vector<String> plural_msg;
+ for (uint32_t j = 0; j < str_len + 1; j++) {
+ if (data[j] == 0x00) {
+ if (msg_id_plural.is_empty()) {
+ translation->add_message(msg_id, String::utf8((const char *)(data.ptr() + str_start), j - str_start), msg_context);
+ } else {
+ plural_msg.push_back(String::utf8((const char *)(data.ptr() + str_start), j - str_start));
+ }
+ str_start = j + 1;
+ }
+ }
+ if (!plural_msg.is_empty()) {
+ translation->add_plural_message(msg_id, plural_msg, msg_context);
}
- translation->add_plural_message(msg_id, msgs_plural, msg_context);
}
}
- msg_context = "";
- l = l.substr(7, l.length()).strip_edges();
- status = STATUS_READING_CONTEXT;
- entered_context = true;
}
- if (l.begins_with("msgid_plural")) {
- if (plural_forms == 0) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line));
- } else if (status != STATUS_READING_ID) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
- }
- // We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already.
- // We just have to reset variables related to plurals for "msgstr[]" later on.
- l = l.substr(12, l.length()).strip_edges();
- plural_index = -1;
- msgs_plural.clear();
- msgs_plural.resize(plural_forms);
- status = STATUS_READING_PLURAL;
- } else if (l.begins_with("msgid")) {
- if (status == STATUS_READING_ID) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));
+ memdelete(f);
+ } else {
+ // Try to load as text PO file.
+ f->seek(0);
+
+ enum Status {
+ STATUS_NONE,
+ STATUS_READING_ID,
+ STATUS_READING_STRING,
+ STATUS_READING_CONTEXT,
+ STATUS_READING_PLURAL,
+ };
+
+ Status status = STATUS_NONE;
+
+ String msg_id;
+ String msg_str;
+ String msg_context;
+ Vector<String> msgs_plural;
+
+ if (r_error) {
+ *r_error = ERR_FILE_CORRUPT;
+ }
+
+ int line = 1;
+ int plural_forms = 0;
+ int plural_index = -1;
+ bool entered_context = false;
+ bool skip_this = false;
+ bool skip_next = false;
+ bool is_eof = false;
+
+ while (!is_eof) {
+ String l = f->get_line().strip_edges();
+ is_eof = f->eof_reached();
+
+ // If we reached last line and it's not a content line, break, otherwise let processing that last loop
+ if (is_eof && l.is_empty()) {
+ if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line));
+ } else {
+ break;
+ }
}
- if (!msg_id.is_empty()) {
- if (!skip_this && !entered_context) {
+ if (l.begins_with("msgctxt")) {
+ if (status != STATUS_READING_STRING && status != STATUS_READING_PLURAL) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));
+ }
+
+ // In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
+ // and set "entered_context" to true to prevent adding twice.
+ if (!skip_this && !msg_id.is_empty()) {
if (status == STATUS_READING_STRING) {
translation->add_message(msg_id, msg_str, msg_context);
} else if (status == STATUS_READING_PLURAL) {
@@ -137,119 +199,163 @@ RES TranslationLoaderPO::load_translation(FileAccess *f, Error *r_error) {
translation->add_plural_message(msg_id, msgs_plural, msg_context);
}
}
- } else if (config.is_empty()) {
- config = msg_str;
- // Record plural rule.
- int p_start = config.find("Plural-Forms");
- if (p_start != -1) {
- int p_end = config.find("\n", p_start);
- translation->set_plural_rule(config.substr(p_start, p_end - p_start));
- plural_forms = translation->get_plural_forms();
+ msg_context = "";
+ l = l.substr(7, l.length()).strip_edges();
+ status = STATUS_READING_CONTEXT;
+ entered_context = true;
+ }
+
+ if (l.begins_with("msgid_plural")) {
+ if (plural_forms == 0) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line));
+ } else if (status != STATUS_READING_ID) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
+ }
+ // We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already.
+ // We just have to reset variables related to plurals for "msgstr[]" later on.
+ l = l.substr(12, l.length()).strip_edges();
+ plural_index = -1;
+ msgs_plural.clear();
+ msgs_plural.resize(plural_forms);
+ status = STATUS_READING_PLURAL;
+ } else if (l.begins_with("msgid")) {
+ if (status == STATUS_READING_ID) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));
}
+
+ if (!msg_id.is_empty()) {
+ if (!skip_this && !entered_context) {
+ if (status == STATUS_READING_STRING) {
+ translation->add_message(msg_id, msg_str, msg_context);
+ } else if (status == STATUS_READING_PLURAL) {
+ if (plural_index != plural_forms - 1) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
+ }
+ translation->add_plural_message(msg_id, msgs_plural, msg_context);
+ }
+ }
+ } else if (config.is_empty()) {
+ config = msg_str;
+ // Record plural rule.
+ int p_start = config.find("Plural-Forms");
+ if (p_start != -1) {
+ int p_end = config.find("\n", p_start);
+ translation->set_plural_rule(config.substr(p_start, p_end - p_start));
+ plural_forms = translation->get_plural_forms();
+ }
+ }
+
+ l = l.substr(5, l.length()).strip_edges();
+ status = STATUS_READING_ID;
+ // If we did not encounter msgctxt, we reset context to empty to reset it.
+ if (!entered_context) {
+ msg_context = "";
+ }
+ msg_id = "";
+ msg_str = "";
+ skip_this = skip_next;
+ skip_next = false;
+ entered_context = false;
}
- l = l.substr(5, l.length()).strip_edges();
- status = STATUS_READING_ID;
- // If we did not encounter msgctxt, we reset context to empty to reset it.
- if (!entered_context) {
- msg_context = "";
+ if (l.begins_with("msgstr[")) {
+ if (status != STATUS_READING_PLURAL) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
+ }
+ plural_index++; // Increment to add to the next slot in vector msgs_plural.
+ l = l.substr(9, l.length()).strip_edges();
+ } else if (l.begins_with("msgstr")) {
+ if (status != STATUS_READING_ID) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
+ }
+
+ l = l.substr(6, l.length()).strip_edges();
+ status = STATUS_READING_STRING;
}
- msg_id = "";
- msg_str = "";
- skip_this = skip_next;
- skip_next = false;
- entered_context = false;
- }
- if (l.begins_with("msgstr[")) {
- if (status != STATUS_READING_PLURAL) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
+ if (l.is_empty() || l.begins_with("#")) {
+ if (l.contains("fuzzy")) {
+ skip_next = true;
+ }
+ line++;
+ continue; // Nothing to read or comment.
}
- plural_index++; // Increment to add to the next slot in vector msgs_plural.
- l = l.substr(9, l.length()).strip_edges();
- } else if (l.begins_with("msgstr")) {
- if (status != STATUS_READING_ID) {
+
+ if (!l.begins_with("\"") || status == STATUS_NONE) {
memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
+ ERR_FAIL_V_MSG(RES(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));
}
- l = l.substr(6, l.length()).strip_edges();
- status = STATUS_READING_STRING;
- }
-
- if (l.is_empty() || l.begins_with("#")) {
- if (l.contains("fuzzy")) {
- skip_next = true;
- }
- line++;
- continue; // Nothing to read or comment.
- }
+ l = l.substr(1, l.length());
+ // Find final quote, ignoring escaped ones (\").
+ // The escape_next logic is necessary to properly parse things like \\"
+ // where the backslash is the one being escaped, not the quote.
+ int end_pos = -1;
+ bool escape_next = false;
+ for (int i = 0; i < l.length(); i++) {
+ if (l[i] == '\\' && !escape_next) {
+ escape_next = true;
+ continue;
+ }
- if (!l.begins_with("\"") || status == STATUS_NONE) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));
- }
+ if (l[i] == '"' && !escape_next) {
+ end_pos = i;
+ break;
+ }
- l = l.substr(1, l.length());
- // Find final quote, ignoring escaped ones (\").
- // The escape_next logic is necessary to properly parse things like \\"
- // where the backslash is the one being escaped, not the quote.
- int end_pos = -1;
- bool escape_next = false;
- for (int i = 0; i < l.length(); i++) {
- if (l[i] == '\\' && !escape_next) {
- escape_next = true;
- continue;
+ escape_next = false;
}
- if (l[i] == '"' && !escape_next) {
- end_pos = i;
- break;
+ if (end_pos == -1) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));
}
- escape_next = false;
- }
+ l = l.substr(0, end_pos);
+ l = l.c_unescape();
- if (end_pos == -1) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));
- }
+ if (status == STATUS_READING_ID) {
+ msg_id += l;
+ } else if (status == STATUS_READING_STRING) {
+ msg_str += l;
+ } else if (status == STATUS_READING_CONTEXT) {
+ msg_context += l;
+ } else if (status == STATUS_READING_PLURAL && plural_index >= 0) {
+ if (plural_index >= plural_forms) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Unexpected plural form while parsing: " + path + ":" + itos(line));
+ }
+ msgs_plural.write[plural_index] = msgs_plural[plural_index] + l;
+ }
- l = l.substr(0, end_pos);
- l = l.c_unescape();
-
- if (status == STATUS_READING_ID) {
- msg_id += l;
- } else if (status == STATUS_READING_STRING) {
- msg_str += l;
- } else if (status == STATUS_READING_CONTEXT) {
- msg_context += l;
- } else if (status == STATUS_READING_PLURAL && plural_index >= 0) {
- msgs_plural.write[plural_index] = msgs_plural[plural_index] + l;
+ line++;
}
- line++;
- }
-
- memdelete(f);
+ memdelete(f);
- // Add the last set of data from last iteration.
- if (status == STATUS_READING_STRING) {
- if (!msg_id.is_empty()) {
- if (!skip_this) {
- translation->add_message(msg_id, msg_str, msg_context);
+ // Add the last set of data from last iteration.
+ if (status == STATUS_READING_STRING) {
+ if (!msg_id.is_empty()) {
+ if (!skip_this) {
+ translation->add_message(msg_id, msg_str, msg_context);
+ }
+ } else if (config.is_empty()) {
+ config = msg_str;
}
- } else if (config.is_empty()) {
- config = msg_str;
- }
- } else if (status == STATUS_READING_PLURAL) {
- if (!skip_this && !msg_id.is_empty()) {
- if (plural_index != plural_forms - 1) {
- memdelete(f);
- ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
+ } else if (status == STATUS_READING_PLURAL) {
+ if (!skip_this && !msg_id.is_empty()) {
+ if (plural_index != plural_forms - 1) {
+ memdelete(f);
+ ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
+ }
+ translation->add_plural_message(msg_id, msgs_plural, msg_context);
}
- translation->add_plural_message(msg_id, msgs_plural, msg_context);
}
}
@@ -290,6 +396,7 @@ RES TranslationLoaderPO::load(const String &p_path, const String &p_original_pat
void TranslationLoaderPO::get_recognized_extensions(List<String> *p_extensions) const {
p_extensions->push_back("po");
+ p_extensions->push_back("mo");
}
bool TranslationLoaderPO::handles_type(const String &p_type) const {
@@ -297,7 +404,7 @@ bool TranslationLoaderPO::handles_type(const String &p_type) const {
}
String TranslationLoaderPO::get_resource_type(const String &p_path) const {
- if (p_path.get_extension().to_lower() == "po") {
+ if (p_path.get_extension().to_lower() == "po" || p_path.get_extension().to_lower() == "mo") {
return "Translation";
}
return "";