summaryrefslogtreecommitdiff
path: root/doc/translations/extract.py
diff options
context:
space:
mode:
authorRĂ©mi Verschelde <rverschelde@gmail.com>2020-03-20 09:03:31 +0100
committerGitHub <noreply@github.com>2020-03-20 09:03:31 +0100
commit63f77efdc6a1e178d356f33509cebb33b0e2c8e2 (patch)
tree5358a07c54e44103a45083ac2e0339747defd1fc /doc/translations/extract.py
parentbec9fe2c2af1027a53ec52479527f974e7382090 (diff)
parente817792c0ad219582a28438452d2123d0c302108 (diff)
Merge pull request #37114 from ThakeeNathees/translation-catalog-maker
Add script to generate translation catalog for the class reference
Diffstat (limited to 'doc/translations/extract.py')
-rw-r--r--doc/translations/extract.py239
1 files changed, 239 insertions, 0 deletions
diff --git a/doc/translations/extract.py b/doc/translations/extract.py
new file mode 100644
index 0000000000..9d7c073b67
--- /dev/null
+++ b/doc/translations/extract.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import re
+import shutil
+from collections import OrderedDict
+
+EXTRACT_TAGS = ["description", "brief_description", "member", "constant", "theme_item", "link"]
+HEADER = '''\
+# LANGUAGE translation of the Godot Engine class reference
+# Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.
+# Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).
+# This file is distributed under the same license as the Godot source code.
+#
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: Godot Engine class reference\\n"
+"Content-Type: text/plain; charset=UTF-8\\n"
+"Content-Transfer-Encoding: 8-bit\\n"
+
+'''
+
+## <xml-line-number-hack from="https://stackoverflow.com/a/36430270/10846399">
+import sys
+sys.modules['_elementtree'] = None
+import xml.etree.ElementTree as ET
+
+## override the parser to get the line number
+class LineNumberingParser(ET.XMLParser):
+ def _start(self, *args, **kwargs):
+ ## Here we assume the default XML parser which is expat
+ ## and copy its element position attributes into output Elements
+ element = super(self.__class__, self)._start(*args, **kwargs)
+ element._start_line_number = self.parser.CurrentLineNumber
+ element._start_column_number = self.parser.CurrentColumnNumber
+ element._start_byte_index = self.parser.CurrentByteIndex
+ return element
+
+ def _end(self, *args, **kwargs):
+ element = super(self.__class__, self)._end(*args, **kwargs)
+ element._end_line_number = self.parser.CurrentLineNumber
+ element._end_column_number = self.parser.CurrentColumnNumber
+ element._end_byte_index = self.parser.CurrentByteIndex
+ return element
+## </xml-line-number-hack>
+
+class Desc:
+ def __init__(self, line_no, msg, desc_list=None):
+ ## line_no : the line number where the desc is
+ ## msg : the description string
+ ## desc_list : the DescList it belongs to
+ self.line_no = line_no
+ self.msg = msg
+ self.desc_list = desc_list
+
+class DescList:
+ def __init__(self, doc, path):
+ ## doc : root xml element of the document
+ ## path : file path of the xml document
+ ## list : list of Desc objects for this document
+ self.doc = doc
+ self.path = path
+ self.list = []
+
+def print_error(error):
+ print("ERROR: {}".format(error))
+
+## build classes with xml elements recursively
+def _collect_classes_dir(path, classes):
+ if not os.path.isdir(path):
+ print_error("Invalid directory path: {}".format(path))
+ exit(1)
+ for _dir in map(lambda dir : os.path.join(path, dir), os.listdir(path)):
+ if os.path.isdir(_dir):
+ _collect_classes_dir(_dir, classes)
+ elif os.path.isfile(_dir):
+ if not _dir.endswith(".xml"):
+ #print("Got non-.xml file '{}', skipping.".format(path))
+ continue
+ _collect_classes_file(_dir, classes)
+
+## opens a file and parse xml add to classes
+def _collect_classes_file(path, classes):
+ if not os.path.isfile(path) or not path.endswith(".xml"):
+ print_error("Invalid xml file path: {}".format(path))
+ exit(1)
+ print('Collecting file: {}'.format(os.path.basename(path)))
+
+ try:
+ tree = ET.parse(path, parser=LineNumberingParser())
+ except ET.ParseError as e:
+ print_error("Parse error reading file '{}': {}".format(path, e))
+ exit(1)
+
+ doc = tree.getroot()
+
+ if 'name' in doc.attrib:
+ if 'version' not in doc.attrib:
+ print_error("Version missing from 'doc', file: {}".format(path))
+
+ name = doc.attrib["name"]
+ if name in classes:
+ print_error("Duplicate class {} at path {}".format(name, path))
+ exit(1)
+ classes[name] = DescList(doc, path)
+ else:
+ print_error('Unknown XML file {}, skipping'.format(path))
+
+
+## regions are list of tuples with size 3 (start_index, end_index, indent)
+## indication in string where the codeblock starts, ends, and it's indent
+## if i inside the region returns the indent, else returns -1
+def _get_xml_indent(i, regions):
+ for region in regions:
+ if region[0] < i < region[1] :
+ return region[2]
+ return -1
+
+## find and build all regions of codeblock which we need later
+def _make_codeblock_regions(desc, path=''):
+ code_block_end = False
+ code_block_index = 0
+ code_block_regions = []
+ while not code_block_end:
+ code_block_index = desc.find("[codeblock]", code_block_index)
+ if code_block_index < 0: break
+ xml_indent=0
+ while True :
+ ## [codeblock] always have a trailing new line and some tabs
+ ## those tabs are belongs to xml indentations not code indent
+ if desc[code_block_index+len("[codeblock]\n")+xml_indent] == '\t':
+ xml_indent+=1
+ else: break
+ end_index = desc.find("[/codeblock]", code_block_index)
+ if end_index < 0 :
+ print_error('Non terminating codeblock: {}'.format(path))
+ exit(1)
+ code_block_regions.append( (code_block_index, end_index, xml_indent) )
+ code_block_index += 1
+ return code_block_regions
+
+def _strip_and_split_desc(desc, code_block_regions):
+ desc_strip = '' ## a stripped desc msg
+ total_indent = 0 ## code indent = total indent - xml indent
+ for i in range(len(desc)):
+ c = desc[i]
+ if c == '\n' : c = '\\n'
+ if c == '"': c = '\\"'
+ if c == '\\': c = '\\\\' ## <element \> is invalid for msgmerge
+ if c == '\t':
+ xml_indent = _get_xml_indent(i, code_block_regions)
+ if xml_indent >= 0:
+ total_indent += 1
+ if xml_indent < total_indent:
+ c = '\\t'
+ else:
+ continue
+ else:
+ continue
+ desc_strip += c
+ if c == '\\n':
+ total_indent = 0
+ return desc_strip
+
+## make catlog strings from xml elements
+def _make_translation_catalog(classes):
+ unique_msgs = OrderedDict()
+ for class_name in classes:
+ desc_list = classes[class_name]
+ for elem in desc_list.doc.iter():
+ if elem.tag in EXTRACT_TAGS:
+ if not elem.text or len(elem.text) == 0 : continue
+ line_no = elem._start_line_number if elem.text[0]!='\n' else elem._start_line_number+1
+ desc_str = elem.text.strip()
+ code_block_regions = _make_codeblock_regions(desc_str, desc_list.path)
+ desc_msg = _strip_and_split_desc(desc_str, code_block_regions)
+ desc_obj = Desc(line_no, desc_msg, desc_list)
+ desc_list.list.append(desc_obj)
+
+ if desc_msg not in unique_msgs:
+ unique_msgs[desc_msg] = [desc_obj]
+ else:
+ unique_msgs[desc_msg].append(desc_obj)
+ return unique_msgs
+
+## generate the catlog file
+def _generate_translation_catalog_file(unique_msgs, output):
+ with open(output, 'w', encoding='utf8') as f:
+ f.write(HEADER)
+ for msg in unique_msgs:
+ if len(msg) == 0: continue ## ignore
+
+ f.write('#:')
+ desc_list = unique_msgs[msg]
+ for desc in desc_list:
+ path = desc.desc_list.path.replace('\\', '/')
+ if path.startswith('./'):
+ path = path[2:]
+ f.write(' {}:{}'.format(path, desc.line_no))
+ f.write('\n')
+
+ f.write('msgid "{}"\n'.format(msg))
+ f.write('msgstr ""\n\n')
+
+ ## TODO: what if 'nt'?
+ if (os.name == "posix"):
+ print("Wrapping template at 79 characters for compatibility with Weblate.")
+ os.system("msgmerge -w79 {0} {0} > {0}.wrap".format(output))
+ shutil.move("{}.wrap".format(output), output)
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--path", "-p", default=".", help="The directory containing XML files to collect.")
+ parser.add_argument("--output", "-o", default="translation_catlog.pot", help="The path to the output file.")
+ args = parser.parse_args()
+
+ output = os.path.abspath(args.output)
+ if not os.path.isdir(os.path.dirname(output)) or not output.endswith('.pot'):
+ print_error("Invalid output path: {}".format(output))
+ exit(1)
+ if not os.path.isdir(args.path):
+ print_error("Invalid working directory path: {}".format(args.path))
+ exit(1)
+
+ os.chdir(args.path)
+ print("Current working dir: {}\n".format(os.getcwd()))
+
+ classes = OrderedDict() ## dictionary of key=class_name, value=DescList objects
+ _collect_classes_dir('.', classes)
+ classes = OrderedDict(sorted(classes.items(), key = lambda kv: kv[0].lower() ))
+ unique_msgs = _make_translation_catalog(classes)
+ _generate_translation_catalog_file(unique_msgs, output)
+
+if __name__ == '__main__':
+ main()