| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
| 3 # | 3 # |
| 4 # Copyright 2002-2007 Zuza Software Foundation | 4 # Copyright 2002-2007 Zuza Software Foundation |
| 5 # | 5 # |
| 6 # This file is part of translate. | 6 # This file is part of translate. |
| 7 # | 7 # |
| 8 # translate is free software; you can redistribute it and/or modify | 8 # translate is free software; you can redistribute it and/or modify |
| 9 # it under the terms of the GNU General Public License as published by | 9 # it under the terms of the GNU General Public License as published by |
| 10 # the Free Software Foundation; either version 2 of the License, or | 10 # the Free Software Foundation; either version 2 of the License, or |
| 11 # (at your option) any later version. | 11 # (at your option) any later version. |
| 12 # | 12 # |
| 13 # translate is distributed in the hope that it will be useful, | 13 # translate is distributed in the hope that it will be useful, |
| 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 # GNU General Public License for more details. | 16 # GNU General Public License for more details. |
| 17 # | 17 # |
| 18 # You should have received a copy of the GNU General Public License | 18 # You should have received a copy of the GNU General Public License |
| 19 # along with translate; if not, write to the Free Software | 19 # along with translate; if not, write to the Free Software |
| 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 21 | 21 |
| 22 """classes that hold units of .po files (pounit) or entire files (pofile) | 22 """classes that hold units of .po files (pounit) or entire files (pofile) |
| 23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kb
abel)""" | 23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kb
abel)""" |
| 24 | 24 |
| 25 from __future__ import generators | 25 from __future__ import generators |
| 26 from translate.misc.multistring import multistring | 26 from translate.misc.multistring import multistring |
| 27 from translate.misc import quote | 27 from translate.misc import quote |
| 28 from translate.misc import textwrap | 28 from translate.misc import textwrap |
| 29 from translate.lang import data | 29 from translate.lang import data |
| 30 from translate.storage import pocommon, base | 30 from translate.storage import pocommon, base |
| 31 import re | 31 import re |
| 32 | 32 |
| 33 lsep = "\n#: " | 33 lsep = "\n#: " |
| 34 """Seperator for #: entries""" | 34 """Seperator for #: entries""" |
| 35 | 35 |
| 36 # general functions for quoting / unquoting po strings | 36 # general functions for quoting / unquoting po strings |
| 37 | 37 |
| 38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\
\'} | 38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\
\'} |
| 39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) | 39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) |
| 40 | 40 |
| 41 def escapeforpo(line): | 41 def escapeforpo(line): |
| 42 """Escapes a line for po format. assumes no \n occurs in the line. | 42 """Escapes a line for po format. assumes no \n occurs in the line. |
| 43 | 43 |
| 44 @param line: unescaped text | 44 @param line: unescaped text |
| 45 """ | 45 """ |
| 46 special_locations = [] | 46 special_locations = [] |
| 47 for special_key in po_escape_map: | 47 for special_key in po_escape_map: |
| 48 special_locations.extend(quote.find_all(line, special_key)) | 48 special_locations.extend(quote.find_all(line, special_key)) |
| 49 special_locations = dict.fromkeys(special_locations).keys() | 49 special_locations = dict.fromkeys(special_locations).keys() |
| 50 special_locations.sort() | 50 special_locations.sort() |
| (...skipping 641 matching lines...) Show 10 above Show 10 below |
| 692 return "".join(lines) | 692 return "".join(lines) |
| 693 lines.extend(self.automaticcomments) | 693 lines.extend(self.automaticcomments) |
| 694 lines.extend(self.sourcecomments) | 694 lines.extend(self.sourcecomments) |
| 695 lines.extend(self.typecomments) | 695 lines.extend(self.typecomments) |
| 696 if self.msgctxt: | 696 if self.msgctxt: |
| 697 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) | 697 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) |
| 698 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments
)) | 698 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments
)) |
| 699 if self.msgid_plural or self.msgid_pluralcomments: | 699 if self.msgid_plural or self.msgid_pluralcomments: |
| 700 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural,
self.msgid_pluralcomments)) | 700 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural,
self.msgid_pluralcomments)) |
| 701 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) | 701 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) |
| 702 lines = [self._encodeifneccessary(line) for line in lines] | 702 lines = [self._encodeifneccessary(line) for line in lines] |
| 703 postr = "".join(lines) | 703 postr = "".join(lines) |
| 704 return postr | 704 return postr |
| 705 | 705 |
| 706 def getlocations(self): | 706 def getlocations(self): |
| 707 """Get a list of locations from sourcecomments in the PO unit | 707 """Get a list of locations from sourcecomments in the PO unit |
| 708 | 708 |
| 709 rtype: List | 709 rtype: List |
| 710 return: A list of the locations with '#: ' stripped | 710 return: A list of the locations with '#: ' stripped |
| 711 | 711 |
| 712 """ | 712 """ |
| 713 locations = [] | 713 locations = [] |
| 714 for sourcecomment in self.sourcecomments: | 714 for sourcecomment in self.sourcecomments: |
| 715 locations += quote.rstripeol(sourcecomment)[3:].split() | 715 locations += quote.rstripeol(sourcecomment)[3:].split() |
| 716 return locations | 716 return locations |
| 717 | 717 |
| 718 def addlocation(self, location): | 718 def addlocation(self, location): |
| 719 """Add a location to sourcecomments in the PO unit | 719 """Add a location to sourcecomments in the PO unit |
| 720 | 720 |
| 721 @param location: Text location e.g. 'file.c:23' does not include #: | 721 @param location: Text location e.g. 'file.c:23' does not include #: |
| 722 @type location: String | 722 @type location: String |
| 723 | 723 |
| 724 """ | 724 """ |
| 725 self.sourcecomments.append("#: %s\n" % location) | 725 self.sourcecomments.append("#: %s\n" % location) |
| 726 | 726 |
| 727 def _extract_msgidcomments(self, text=None): | 727 def _extract_msgidcomments(self, text=None): |
| 728 """Extract KDE style msgid comments from the unit. | 728 """Extract KDE style msgid comments from the unit. |
| 729 | 729 |
| 730 @rtype: String | 730 @rtype: String |
| 731 @return: Returns the extracted msgidcomments found in this unit's msgid. | 731 @return: Returns the extracted msgidcomments found in this unit's msgid. |
| 732 | 732 |
| 733 """ | 733 """ |
| 734 | 734 |
| 735 if not text: | 735 if not text: |
| 736 text = unquotefrompo(self.msgidcomments) | 736 text = unquotefrompo(self.msgidcomments) |
| 737 return text.split('\n')[0].replace('_: ', '', 1) | 737 return text.split('\n')[0].replace('_: ', '', 1) |
| 738 | 738 |
| 739 def getcontext(self): | 739 def getcontext(self): |
| 740 """Get the message context.""" | 740 """Get the message context.""" |
| 741 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments() | 741 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments() |
| 742 |
| 743 def setcontext(self, context): |
| 744 """Sets the message context. |
| 745 |
| 746 @param context: unescaped context message |
| 747 """ |
| 748 if not self.msgctxt and context: |
| 749 self.msgctxt = quoteforpo(context) |
| 750 else: |
| 751 self.msgctxt = [] |
| 742 | 752 |
| 743 def getid(self): | 753 def getid(self): |
| 744 """Returns a unique identifier for this unit.""" | 754 """Returns a unique identifier for this unit.""" |
| 745 context = self.getcontext() | 755 context = self.getcontext() |
| 746 # Gettext does not consider the plural to determine duplicates, only | 756 # Gettext does not consider the plural to determine duplicates, only |
| 747 # the msgid. For generation of .mo files, we might want to use this | 757 # the msgid. For generation of .mo files, we might want to use this |
| 748 # code to generate the entry for the hash table, but for now, it is | 758 # code to generate the entry for the hash table, but for now, it is |
| 749 # commented out for conformance to gettext. | 759 # commented out for conformance to gettext. |
| 750 # id = '\0'.join(self.source.strings) | 760 # id = '\0'.join(self.source.strings) |
| 751 id = self.source | 761 id = self.source |
| 752 if self.msgidcomments: | 762 if self.msgidcomments: |
| 753 id = "_: %s\n%s" % (context, id) | 763 id = "_: %s\n%s" % (context, id) |
| 754 elif context: | 764 elif context: |
| 755 id = "%s\04%s" % (context, id) | 765 id = "%s\04%s" % (context, id) |
| 756 return id | 766 return id |
| 757 | 767 |
| 758 class pofile(pocommon.pofile): | 768 class pofile(pocommon.pofile): |
| 759 """this represents a .po file containing various units""" | 769 """this represents a .po file containing various units""" |
| 760 UnitClass = pounit | 770 UnitClass = pounit |
| 761 def __init__(self, inputfile=None, encoding=None, unitclass=pounit): | 771 def __init__(self, inputfile=None, encoding=None, unitclass=pounit): |
| 762 """construct a pofile, optionally reading in from inputfile. | 772 """construct a pofile, optionally reading in from inputfile. |
| 763 encoding can be specified but otherwise will be read from the PO header"
"" | 773 encoding can be specified but otherwise will be read from the PO header"
"" |
| 764 self.UnitClass = unitclass | 774 self.UnitClass = unitclass |
| 765 pocommon.pofile.__init__(self, unitclass=unitclass) | 775 pocommon.pofile.__init__(self, unitclass=unitclass) |
| 766 self.units = [] | 776 self.units = [] |
| 767 self.filename = '' | 777 self.filename = '' |
| 768 self._encoding = encodingToUse(encoding) | 778 self._encoding = encodingToUse(encoding) |
| 769 if inputfile is not None: | 779 if inputfile is not None: |
| 770 self.parse(inputfile) | 780 self.parse(inputfile) |
| 771 | 781 |
| 772 def changeencoding(self, newencoding): | 782 def changeencoding(self, newencoding): |
| 773 """changes the encoding on the file""" | 783 """changes the encoding on the file""" |
| 774 self._encoding = encodingToUse(newencoding) | 784 self._encoding = encodingToUse(newencoding) |
| 775 if not self.units: | 785 if not self.units: |
| 776 return | 786 return |
| 777 header = self.header() | 787 header = self.header() |
| 778 if not header or header.isblank(): | 788 if not header or header.isblank(): |
| 779 return | 789 return |
| 780 charsetline = None | 790 charsetline = None |
| 781 headerstr = unquotefrompo(header.msgstr) | 791 headerstr = unquotefrompo(header.msgstr) |
| 782 for line in headerstr.split("\n"): | 792 for line in headerstr.split("\n"): |
| 783 if not ":" in line: continue | 793 if not ":" in line: continue |
| 784 key, value = line.strip().split(":", 1) | 794 key, value = line.strip().split(":", 1) |
| 785 if key.strip() != "Content-Type": continue | 795 if key.strip() != "Content-Type": continue |
| 786 charsetline = line | 796 charsetline = line |
| 787 if charsetline is None: | 797 if charsetline is None: |
| 788 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding | 798 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding |
| 789 else: | 799 else: |
| 790 charset = re.search("charset=([^ ]*)", charsetline) | 800 charset = re.search("charset=([^ ]*)", charsetline) |
| 791 if charset is None: | 801 if charset is None: |
| (...skipping 109 matching lines...) Show 10 above Show 10 below |
| 901 output = self._getoutput() | 911 output = self._getoutput() |
| 902 if isinstance(output, unicode): | 912 if isinstance(output, unicode): |
| 903 return output.encode(getattr(self, "encoding", "UTF-8")) | 913 return output.encode(getattr(self, "encoding", "UTF-8")) |
| 904 return output | 914 return output |
| 905 | 915 |
| 906 def _getoutput(self): | 916 def _getoutput(self): |
| 907 """convert the units back to lines""" | 917 """convert the units back to lines""" |
| 908 lines = [] | 918 lines = [] |
| 909 for unit in self.units: | 919 for unit in self.units: |
| 910 unitsrc = str(unit) + "\n" | 920 unitsrc = str(unit) + "\n" |
| 911 lines.append(unitsrc) | 921 lines.append(unitsrc) |
| 912 lines = "".join(self.encode(lines)).rstrip() | 922 lines = "".join(self.encode(lines)).rstrip() |
| 913 #After the last pounit we will have \n\n and we only want to end in \n: | 923 #After the last pounit we will have \n\n and we only want to end in \n: |
| 914 if lines: lines += "\n" | 924 if lines: lines += "\n" |
| 915 return lines | 925 return lines |
| 916 | 926 |
| 917 def encode(self, lines): | 927 def encode(self, lines): |
| 918 """encode any unicode strings in lines in self._encoding""" | 928 """encode any unicode strings in lines in self._encoding""" |
| 919 newlines = [] | 929 newlines = [] |
| 920 encoding = self._encoding | 930 encoding = self._encoding |
| 921 if encoding is None or encoding.lower() == "charset": | 931 if encoding is None or encoding.lower() == "charset": |
| 922 encoding = 'UTF-8' | 932 encoding = 'UTF-8' |
| 923 for line in lines: | 933 for line in lines: |
| 924 if isinstance(line, unicode): | 934 if isinstance(line, unicode): |
| 925 line = line.encode(encoding) | 935 line = line.encode(encoding) |
| 926 newlines.append(line) | 936 newlines.append(line) |
| 927 return newlines | 937 return newlines |
| 928 | 938 |
| 929 def decode(self, lines): | 939 def decode(self, lines): |
| 930 """decode any non-unicode strings in lines with self._encoding""" | 940 """decode any non-unicode strings in lines with self._encoding""" |
| 931 newlines = [] | 941 newlines = [] |
| 932 for line in lines: | 942 for line in lines: |
| 933 if isinstance(line, str) and self._encoding is not None and self._en
coding.lower() != "charset": | 943 if isinstance(line, str) and self._encoding is not None and self._en
coding.lower() != "charset": |
| 934 try: | 944 try: |
| 935 line = line.decode(self._encoding) | 945 line = line.decode(self._encoding) |
| 936 except UnicodeError, e: | 946 except UnicodeError, e: |
| 937 raise UnicodeError("Error decoding line with encoding %r: %s
. Line is %r" % (self._encoding, e, line)) | 947 raise UnicodeError("Error decoding line with encoding %r: %s
. Line is %r" % (self._encoding, e, line)) |
| 938 newlines.append(line) | 948 newlines.append(line) |
| 939 return newlines | 949 return newlines |
| 940 | 950 |
| 941 def unit_iter(self): | 951 def unit_iter(self): |
| 942 for unit in self.units: | 952 for unit in self.units: |
| 943 if not (unit.isheader() or unit.isobsolete()): | 953 if not (unit.isheader() or unit.isobsolete()): |
| 944 yield unit | 954 yield unit |
| 945 | 955 |
| 946 if __name__ == '__main__': | 956 if __name__ == '__main__': |
| 947 import sys | 957 import sys |
| 948 pf = pofile(sys.stdin) | 958 pf = pofile(sys.stdin) |
| 949 sys.stdout.write(str(pf)) | 959 sys.stdout.write(str(pf)) |
| 950 | 960 |
| OLD | NEW |