summaryrefslogtreecommitdiffstats
path: root/poxml/po2xml.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'poxml/po2xml.cpp')
-rw-r--r--poxml/po2xml.cpp261
1 files changed, 261 insertions, 0 deletions
diff --git a/poxml/po2xml.cpp b/poxml/po2xml.cpp
new file mode 100644
index 00000000..9e8bc1a5
--- /dev/null
+++ b/poxml/po2xml.cpp
@@ -0,0 +1,261 @@
+ // #define POXML_DEBUG
+
+#include "parser.h"
+#include <stdlib.h>
+#include <iostream>
+#include <assert.h>
+#include <qregexp.h>
+
+#include <fstream>
+#include "GettextLexer.hpp"
+#include "GettextParser.hpp"
+#include "antlr/AST.hpp"
+#include "antlr/CommonAST.hpp"
+
+using namespace std;
+
+QString translate(QString xml, QString orig, QString translation)
+{
+ QString prefix;
+ while (xml.at(0) == '<' && orig.at(0) != '<') {
+ // a XML tag as prefix
+ int index = xml.find('>');
+ assert(index != -1);
+ index++;
+ while (xml.at(index) == ' ')
+ index++;
+ prefix = prefix + xml.left(index);
+ xml = xml.mid(index, xml.length());
+ }
+
+ int index = xml.find(orig);
+ if (index == -1) {
+ qWarning("can't find\n%s\nin\n%s", orig.latin1(), xml.latin1());
+ exit(1);
+ }
+ if (!translation.isEmpty())
+ xml.replace(index, orig.length(), translation);
+ return prefix + xml;
+}
+
+int main( int argc, char **argv )
+{
+ if (argc != 3) {
+ qWarning("usage: %s english-XML translated-PO", argv[0]);
+ ::exit(1);
+ }
+
+ MsgList english = parseXML(argv[1]);
+ MsgList translated;
+
+ try {
+ ifstream s(argv[2]);
+ GettextLexer lexer(s);
+ GettextParser parser(lexer);
+ translated = parser.file();
+
+ } catch(exception& e) {
+ cerr << "exception: " << e.what() << endl;
+ return 1;
+ }
+
+ QMap<QString, QString> translations;
+ for (MsgList::ConstIterator it = translated.begin();
+ it != translated.end(); ++it)
+ {
+ QString msgstr;
+ QString msgid = escapePO((*it).msgid);
+ if ((*it).comment.find("fuzzy") < 0)
+ msgstr = escapePO((*it).msgstr);
+
+#ifdef POXML_DEBUG
+ qDebug("inserting translations '%s' -> '%s'", msgid.latin1(),msgstr.latin1());
+#endif
+ translations.insert(msgid, msgstr);
+ }
+
+ QFile xml(argv[1]);
+ xml.open(IO_ReadOnly);
+ QTextStream ds(&xml);
+ ds.setEncoding(QTextStream::UnicodeUTF8);
+ QString xml_text = ds.read();
+ xml.close();
+ QString output;
+ QTextStream ts(&output, IO_WriteOnly);
+ StructureParser::cleanupTags(xml_text);
+
+ QValueList<int> line_offsets;
+ line_offsets.append(0);
+ int index = 0;
+ while (true) {
+ index = xml_text.find('\n', index) + 1;
+ if (index <= 0)
+ break;
+ line_offsets.append(index);
+ }
+
+ int old_start_line = -1, old_start_col = -1;
+ QString old_text;
+ MsgList::Iterator old_it = english.end();
+
+ for (MsgList::Iterator it = english.begin();
+ it != english.end(); ++it)
+ {
+ BlockInfo bi = (*it).lines.first();
+ int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
+ if (!bi.end_line)
+ continue;
+ int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;
+
+ (*it).start = start_pos;
+ if (old_start_line == bi.start_line &&
+ old_start_col == bi.start_col)
+ {
+ (*old_it).end = bi.offset;
+ (*it).end = end_pos;
+ } else {
+ (*it).lines.first().offset = 0;
+ (*it).end = 0;
+ }
+
+ old_start_line = bi.start_line;
+ old_start_col = bi.start_col;
+ old_it = it;
+ }
+
+ int old_pos = 0;
+
+ for (MsgList::Iterator it = english.begin();
+ it != english.end(); ++it)
+ {
+ BlockInfo bi = (*it).lines.first();
+ int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
+ if (!bi.end_line)
+ continue;
+ int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;
+
+ QString xml = xml_text.mid(start_pos, end_pos - start_pos);
+ int index = 0;
+ while (true) {
+ index = xml.find("<!--");
+ if (index == -1)
+ break;
+ int end_index = index + 4;
+ while (xml.at(end_index) != '>' ||
+ xml.at(end_index-1) != '-' ||
+ xml.at(end_index-2) != '-')
+ {
+ end_index++;
+ }
+ xml.replace(index, end_index + 1 - index, " ");
+ index = end_index;
+ }
+ StructureParser::descape(xml);
+
+ QString descaped = StructureParser::descapeLiterals((*it).msgid);
+ if (translations.contains(descaped))
+ descaped = translations[descaped];
+
+#ifdef POXML_DEBUG
+ // assert(!descaped.isEmpty());
+#endif
+
+ if ((*it).msgid.at(0) == '<' && StructureParser::isClosure((*it).msgid)) {
+ // if the id starts with a tag, then we remembered the
+ // correct line information and need to strip the target
+ // now, so it fits
+ int index = 0;
+ while ((*it).msgid.at(index) != '>')
+ index++;
+ index++;
+ while ((*it).msgid.at(index) == ' ')
+ index++;
+ QString omsgid = (*it).msgid;
+ (*it).msgid = (*it).msgid.mid(index);
+
+ index = (*it).msgid.length() - 1;
+ while ((*it).msgid.at(index) != '<')
+ index--;
+
+ (*it).msgid = (*it).msgid.left(index);
+
+ if (!descaped.isEmpty()) {
+ if (descaped.at(0) != '<') {
+ qWarning("the translation of '%s' doesn't start with a tag.", omsgid.latin1());
+ exit(1);
+ }
+ index = 0;
+ while (index <= (int)descaped.length() && descaped.at(index) != '>')
+ index++;
+ index++;
+ while (descaped.at(index) == ' ')
+ index++;
+ descaped = descaped.mid(index);
+
+ index = descaped.length() - 1;
+ while (index >= 0 && descaped.at(index) != '<')
+ index--;
+
+ descaped = descaped.left(index);
+ }
+ }
+
+#ifdef POXML_DEBUG
+ qDebug("english \"%s\" ORIG \"%s\" %d(%d-%d) %d(%d-%d) %d %d TRANS \"%s\" %d '%s'", xml.latin1(), (*it).msgid.latin1(),
+ start_pos, bi.start_line, bi.start_col,
+ end_pos, bi.end_line, bi.end_col,
+ (*it).lines.first().offset,
+ (*it).end,
+ translations[(*it).msgid].latin1(), (*it).end,
+ descaped.latin1()
+ );
+#endif
+
+ if ((*it).end) {
+ if (!(*it).lines.first().offset && end_pos != old_pos) {
+ assert(start_pos >= old_pos);
+ ts << xml_text.mid(old_pos, start_pos - old_pos);
+ }
+ assert((*it).end >= bi.offset);
+ ts << translate(xml.mid(bi.offset, (*it).end - bi.offset),
+ (*it).msgid, descaped);
+ old_pos = end_pos;
+ } else {
+ if (start_pos != old_pos) {
+ if (start_pos < old_pos) {
+ qDebug("so far: '%s'", output.latin1());
+ }
+ assert(start_pos > old_pos);
+ ts << xml_text.mid(old_pos, start_pos - old_pos);
+ }
+ old_pos = end_pos;
+ ts << translate(xml,
+ (*it).msgid, descaped);
+ }
+ }
+
+ ts << xml_text.mid(old_pos);
+
+ output.replace(QRegExp("<trans_comment\\s*>"), "");
+ output.replace(QRegExp("</trans_comment\\s*>"), "");
+
+ StructureParser::removeEmptyTags(output);
+
+ index = 0;
+ while (true) {
+ index = output.find(QRegExp(">[^\n]"), index );
+ if ( index == -1 )
+ break;
+ if ( output.at( index - 1 ) == '/' || output.at( index - 1 ) == '-' ||
+ output.at( index - 1 ) == ']' || output.at( index - 1 ) == '?' )
+ index = index + 1;
+ else {
+ output.replace( index, 1, "\n>" );
+ index = index + 2;
+ }
+ }
+ output = StructureParser::descapeLiterals(output);
+
+ cout << output.utf8().data();
+ return 0;
+}