| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
| 3 # | 3 # |
| 4 # Copyright 2002-2007 Zuza Software Foundation | 4 # Copyright 2002-2007 Zuza Software Foundation |
| 5 # | 5 # |
| 6 # This file is part of translate. | 6 # This file is part of translate. |
| 7 # | 7 # |
| 8 # translate is free software; you can redistribute it and/or modify | 8 # translate is free software; you can redistribute it and/or modify |
| 9 # it under the terms of the GNU General Public License as published by | 9 # it under the terms of the GNU General Public License as published by |
| 10 # the Free Software Foundation; either version 2 of the License, or | 10 # the Free Software Foundation; either version 2 of the License, or |
| 11 # (at your option) any later version. | 11 # (at your option) any later version. |
| 12 # | 12 # |
| 13 # translate is distributed in the hope that it will be useful, | 13 # translate is distributed in the hope that it will be useful, |
| 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 # GNU General Public License for more details. | 16 # GNU General Public License for more details. |
| 17 # | 17 # |
| 18 # You should have received a copy of the GNU General Public License | 18 # You should have received a copy of the GNU General Public License |
| 19 # along with translate; if not, write to the Free Software | 19 # along with translate; if not, write to the Free Software |
| 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 21 | 21 |
| 22 """classes that hold units of .po files (pounit) or entire files (pofile) | 22 """classes that hold units of .po files (pounit) or entire files (pofile) |
| 23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kb
abel)""" | 23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kb
abel)""" |
| 24 | 24 |
| 25 from __future__ import generators | 25 from __future__ import generators |
| 26 from translate.misc.multistring import multistring | 26 from translate.misc.multistring import multistring |
| 27 from translate.misc import quote | 27 from translate.misc import quote |
| 28 from translate.misc import textwrap | 28 from translate.misc import textwrap |
| 29 from translate.lang import data | 29 from translate.lang import data |
| 30 from translate.storage import pocommon, base | 30 from translate.storage import pocommon, base |
| 31 import re | 31 import re |
| 32 | 32 |
| 33 lsep = "\n#: " | 33 lsep = "\n#: " |
| 34 """Seperator for #: entries""" | 34 """Seperator for #: entries""" |
| 35 | 35 |
| 36 # general functions for quoting / unquoting po strings | 36 # general functions for quoting / unquoting po strings |
| 37 | 37 |
| 38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\
\'} | 38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\
\'} |
| 39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) | 39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) |
| 40 | 40 |
| 41 def escapeforpo(line): | 41 def escapeforpo(line): |
| 42 """Escapes a line for po format. assumes no \n occurs in the line. | 42 """Escapes a line for po format. assumes no \n occurs in the line. |
| 43 | 43 |
| 44 @param line: unescaped text | 44 @param line: unescaped text |
| 45 """ | 45 """ |
| 46 special_locations = [] | 46 special_locations = [] |
| 47 for special_key in po_escape_map: | 47 for special_key in po_escape_map: |
| 48 special_locations.extend(quote.find_all(line, special_key)) | 48 special_locations.extend(quote.find_all(line, special_key)) |
| 49 special_locations = dict.fromkeys(special_locations).keys() | 49 special_locations = dict.fromkeys(special_locations).keys() |
| 50 special_locations.sort() | 50 special_locations.sort() |
| (...skipping 641 matching lines...) Show 10 above Show 10 below |
| 692 return "".join(lines) | 692 return "".join(lines) |
| 693 lines.extend(self.automaticcomments) | 693 lines.extend(self.automaticcomments) |
| 694 lines.extend(self.sourcecomments) | 694 lines.extend(self.sourcecomments) |
| 695 lines.extend(self.typecomments) | 695 lines.extend(self.typecomments) |
| 696 if self.msgctxt: | 696 if self.msgctxt: |
| 697 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) | 697 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) |
| 698 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments
)) | 698 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments
)) |
| 699 if self.msgid_plural or self.msgid_pluralcomments: | 699 if self.msgid_plural or self.msgid_pluralcomments: |
| 700 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural,
self.msgid_pluralcomments)) | 700 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural,
self.msgid_pluralcomments)) |
| 701 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) | 701 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) |
| 702 lines = [self._encodeifneccessary(line) for line in lines] | 702 lines = [self._encodeifneccessary(line) for line in lines] |
| 703 postr = "".join(lines) | 703 postr = "".join(lines) |
| 704 return postr | 704 return postr |
| 705 | 705 |
| 706 def getlocations(self): | 706 def getlocations(self): |
| 707 """Get a list of locations from sourcecomments in the PO unit | 707 """Get a list of locations from sourcecomments in the PO unit |
| 708 | 708 |
| 709 rtype: List | 709 rtype: List |
| 710 return: A list of the locations with '#: ' stripped | 710 return: A list of the locations with '#: ' stripped |
| 711 | 711 |
| 712 """ | 712 """ |
| 713 locations = [] | 713 locations = [] |
| 714 for sourcecomment in self.sourcecomments: | 714 for sourcecomment in self.sourcecomments: |
| 715 locations += quote.rstripeol(sourcecomment)[3:].split() | 715 locations += quote.rstripeol(sourcecomment)[3:].split() |
| 716 return locations | 716 return locations |
| 717 | 717 |
| 718 def addlocation(self, location): | 718 def addlocation(self, location): |
| 719 """Add a location to sourcecomments in the PO unit | 719 """Add a location to sourcecomments in the PO unit |
| 720 | 720 |
| 721 @param location: Text location e.g. 'file.c:23' does not include #: | 721 @param location: Text location e.g. 'file.c:23' does not include #: |
| 722 @type location: String | 722 @type location: String |
| 723 | 723 |
| 724 """ | 724 """ |
| 725 self.sourcecomments.append("#: %s\n" % location) | 725 self.sourcecomments.append("#: %s\n" % location) |
| 726 | 726 |
| 727 def _extract_msgidcomments(self, text=None): | 727 def _extract_msgidcomments(self, text=None): |
| 728 """Extract KDE style msgid comments from the unit. | 728 """Extract KDE style msgid comments from the unit. |
| 729 | 729 |
| 730 @rtype: String | 730 @rtype: String |
| 731 @return: Returns the extracted msgidcomments found in this unit's msgid. | 731 @return: Returns the extracted msgidcomments found in this unit's msgid. |
| 732 | 732 |
| 733 """ | 733 """ |
| 734 | 734 |
| 735 if not text: | 735 if not text: |
| 736 text = unquotefrompo(self.msgidcomments) | 736 text = unquotefrompo(self.msgidcomments) |
| 737 return text.split('\n')[0].replace('_: ', '', 1) | 737 return text.split('\n')[0].replace('_: ', '', 1) |
| 738 | 738 |
| 739 def getcontext(self): | 739 def getcontext(self): |
| 740 """Get the message context.""" | 740 """Get the message context.""" |
| 741 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments() | 741 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments() |
| 742 |
| 743 def setcontext(self, context): |
| 744 """Sets the message context""" |
| 745 self.msgctxt = quoteforpo(context) |
| 742 | 746 |
| 743 def getid(self): | 747 def getid(self): |
| 744 """Returns a unique identifier for this unit.""" | 748 """Returns a unique identifier for this unit.""" |
| 745 context = self.getcontext() | 749 context = self.getcontext() |
| 746 # Gettext does not consider the plural to determine duplicates, only | 750 # Gettext does not consider the plural to determine duplicates, only |
| 747 # the msgid. For generation of .mo files, we might want to use this | 751 # the msgid. For generation of .mo files, we might want to use this |
| 748 # code to generate the entry for the hash table, but for now, it is | 752 # code to generate the entry for the hash table, but for now, it is |
| 749 # commented out for conformance to gettext. | 753 # commented out for conformance to gettext. |
| 750 # id = '\0'.join(self.source.strings) | 754 # id = '\0'.join(self.source.strings) |
| 751 id = self.source | 755 id = self.source |
| 752 if self.msgidcomments: | 756 if self.msgidcomments: |
| 753 id = "_: %s\n%s" % (context, id) | 757 id = "_: %s\n%s" % (context, id) |
| 754 elif context: | 758 elif context: |
| 755 id = "%s\04%s" % (context, id) | 759 id = "%s\04%s" % (context, id) |
| 756 return id | 760 return id |
| 757 | 761 |
| 758 class pofile(pocommon.pofile): | 762 class pofile(pocommon.pofile): |
| 759 """this represents a .po file containing various units""" | 763 """this represents a .po file containing various units""" |
| 760 UnitClass = pounit | 764 UnitClass = pounit |
| 761 def __init__(self, inputfile=None, encoding=None, unitclass=pounit): | 765 def __init__(self, inputfile=None, encoding=None, unitclass=pounit): |
| 762 """construct a pofile, optionally reading in from inputfile. | 766 """construct a pofile, optionally reading in from inputfile. |
| 763 encoding can be specified but otherwise will be read from the PO header"
"" | 767 encoding can be specified but otherwise will be read from the PO header"
"" |
| 764 self.UnitClass = unitclass | 768 self.UnitClass = unitclass |
| 765 pocommon.pofile.__init__(self, unitclass=unitclass) | 769 pocommon.pofile.__init__(self, unitclass=unitclass) |
| 766 self.units = [] | 770 self.units = [] |
| 767 self.filename = '' | 771 self.filename = '' |
| 768 self._encoding = encodingToUse(encoding) | 772 self._encoding = encodingToUse(encoding) |
| 769 if inputfile is not None: | 773 if inputfile is not None: |
| 770 self.parse(inputfile) | 774 self.parse(inputfile) |
| 771 | 775 |
| 772 def changeencoding(self, newencoding): | 776 def changeencoding(self, newencoding): |
| 773 """changes the encoding on the file""" | 777 """changes the encoding on the file""" |
| 774 self._encoding = encodingToUse(newencoding) | 778 self._encoding = encodingToUse(newencoding) |
| 775 if not self.units: | 779 if not self.units: |
| 776 return | 780 return |
| 777 header = self.header() | 781 header = self.header() |
| 778 if not header or header.isblank(): | 782 if not header or header.isblank(): |
| 779 return | 783 return |
| 780 charsetline = None | 784 charsetline = None |
| 781 headerstr = unquotefrompo(header.msgstr) | 785 headerstr = unquotefrompo(header.msgstr) |
| 782 for line in headerstr.split("\n"): | 786 for line in headerstr.split("\n"): |
| 783 if not ":" in line: continue | 787 if not ":" in line: continue |
| 784 key, value = line.strip().split(":", 1) | 788 key, value = line.strip().split(":", 1) |
| 785 if key.strip() != "Content-Type": continue | 789 if key.strip() != "Content-Type": continue |
| 786 charsetline = line | 790 charsetline = line |
| 787 if charsetline is None: | 791 if charsetline is None: |
| 788 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding | 792 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding |
| 789 else: | 793 else: |
| 790 charset = re.search("charset=([^ ]*)", charsetline) | 794 charset = re.search("charset=([^ ]*)", charsetline) |
| 791 if charset is None: | 795 if charset is None: |
| (...skipping 109 matching lines...) Show 10 above Show 10 below |
| 901 output = self._getoutput() | 905 output = self._getoutput() |
| 902 if isinstance(output, unicode): | 906 if isinstance(output, unicode): |
| 903 return output.encode(getattr(self, "encoding", "UTF-8")) | 907 return output.encode(getattr(self, "encoding", "UTF-8")) |
| 904 return output | 908 return output |
| 905 | 909 |
| 906 def _getoutput(self): | 910 def _getoutput(self): |
| 907 """convert the units back to lines""" | 911 """convert the units back to lines""" |
| 908 lines = [] | 912 lines = [] |
| 909 for unit in self.units: | 913 for unit in self.units: |
| 910 unitsrc = str(unit) + "\n" | 914 unitsrc = str(unit) + "\n" |
| 911 lines.append(unitsrc) | 915 lines.append(unitsrc) |
| 912 lines = "".join(self.encode(lines)).rstrip() | 916 lines = "".join(self.encode(lines)).rstrip() |
| 913 #After the last pounit we will have \n\n and we only want to end in \n: | 917 #After the last pounit we will have \n\n and we only want to end in \n: |
| 914 if lines: lines += "\n" | 918 if lines: lines += "\n" |
| 915 return lines | 919 return lines |
| 916 | 920 |
| 917 def encode(self, lines): | 921 def encode(self, lines): |
| 918 """encode any unicode strings in lines in self._encoding""" | 922 """encode any unicode strings in lines in self._encoding""" |
| 919 newlines = [] | 923 newlines = [] |
| 920 encoding = self._encoding | 924 encoding = self._encoding |
| 921 if encoding is None or encoding.lower() == "charset": | 925 if encoding is None or encoding.lower() == "charset": |
| 922 encoding = 'UTF-8' | 926 encoding = 'UTF-8' |
| 923 for line in lines: | 927 for line in lines: |
| 924 if isinstance(line, unicode): | 928 if isinstance(line, unicode): |
| 925 line = line.encode(encoding) | 929 line = line.encode(encoding) |
| 926 newlines.append(line) | 930 newlines.append(line) |
| 927 return newlines | 931 return newlines |
| 928 | 932 |
| 929 def decode(self, lines): | 933 def decode(self, lines): |
| 930 """decode any non-unicode strings in lines with self._encoding""" | 934 """decode any non-unicode strings in lines with self._encoding""" |
| 931 newlines = [] | 935 newlines = [] |
| 932 for line in lines: | 936 for line in lines: |
| 933 if isinstance(line, str) and self._encoding is not None and self._en
coding.lower() != "charset": | 937 if isinstance(line, str) and self._encoding is not None and self._en
coding.lower() != "charset": |
| 934 try: | 938 try: |
| 935 line = line.decode(self._encoding) | 939 line = line.decode(self._encoding) |
| 936 except UnicodeError, e: | 940 except UnicodeError, e: |
| 937 raise UnicodeError("Error decoding line with encoding %r: %s
. Line is %r" % (self._encoding, e, line)) | 941 raise UnicodeError("Error decoding line with encoding %r: %s
. Line is %r" % (self._encoding, e, line)) |
| 938 newlines.append(line) | 942 newlines.append(line) |
| 939 return newlines | 943 return newlines |
| 940 | 944 |
| 941 def unit_iter(self): | 945 def unit_iter(self): |
| 942 for unit in self.units: | 946 for unit in self.units: |
| 943 if not (unit.isheader() or unit.isobsolete()): | 947 if not (unit.isheader() or unit.isobsolete()): |
| 944 yield unit | 948 yield unit |
| 945 | 949 |
| 946 if __name__ == '__main__': | 950 if __name__ == '__main__': |
| 947 import sys | 951 import sys |
| 948 pf = pofile(sys.stdin) | 952 pf = pofile(sys.stdin) |
| 949 sys.stdout.write(str(pf)) | 953 sys.stdout.write(str(pf)) |
| 950 | 954 |
| OLD | NEW |