Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(63)

Side by Side Diff: translate/storage/pypo.py

Issue 65: xliff2po & po2xliff should handle context SVN Base: https://translate.svn.sourceforge.net/svnroot/translate/src/trunk/
Patch Set: A more complete patch, supporting CPO and pypo Created 1 year, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 # 3 #
4 # Copyright 2002-2007 Zuza Software Foundation 4 # Copyright 2002-2007 Zuza Software Foundation
5 # 5 #
6 # This file is part of translate. 6 # This file is part of translate.
7 # 7 #
8 # translate is free software; you can redistribute it and/or modify 8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by 9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or 10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version. 11 # (at your option) any later version.
12 # 12 #
13 # translate is distributed in the hope that it will be useful, 13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details. 16 # GNU General Public License for more details.
17 # 17 #
18 # You should have received a copy of the GNU General Public License 18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software 19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 21
22 """classes that hold units of .po files (pounit) or entire files (pofile) 22 """classes that hold units of .po files (pounit) or entire files (pofile)
23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kb abel)""" 23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kb abel)"""
24 24
25 from __future__ import generators 25 from __future__ import generators
26 from translate.misc.multistring import multistring 26 from translate.misc.multistring import multistring
27 from translate.misc import quote 27 from translate.misc import quote
28 from translate.misc import textwrap 28 from translate.misc import textwrap
29 from translate.lang import data 29 from translate.lang import data
30 from translate.storage import pocommon, base 30 from translate.storage import pocommon, base
31 import re 31 import re
32 32
33 lsep = "\n#: " 33 lsep = "\n#: "
34 """Seperator for #: entries""" 34 """Seperator for #: entries"""
35 35
36 # general functions for quoting / unquoting po strings 36 # general functions for quoting / unquoting po strings
37 37
38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\ \'} 38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\ \'}
39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
40 40
41 def escapeforpo(line): 41 def escapeforpo(line):
42 """Escapes a line for po format. assumes no \n occurs in the line. 42 """Escapes a line for po format. assumes no \n occurs in the line.
43 43
44 @param line: unescaped text 44 @param line: unescaped text
45 """ 45 """
46 special_locations = [] 46 special_locations = []
47 for special_key in po_escape_map: 47 for special_key in po_escape_map:
48 special_locations.extend(quote.find_all(line, special_key)) 48 special_locations.extend(quote.find_all(line, special_key))
49 special_locations = dict.fromkeys(special_locations).keys() 49 special_locations = dict.fromkeys(special_locations).keys()
50 special_locations.sort() 50 special_locations.sort()
(...skipping 641 matching lines...) Show 10 above Show 10 below
692 return "".join(lines) 692 return "".join(lines)
693 lines.extend(self.automaticcomments) 693 lines.extend(self.automaticcomments)
694 lines.extend(self.sourcecomments) 694 lines.extend(self.sourcecomments)
695 lines.extend(self.typecomments) 695 lines.extend(self.typecomments)
696 if self.msgctxt: 696 if self.msgctxt:
697 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 697 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
698 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments )) 698 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments ))
699 if self.msgid_plural or self.msgid_pluralcomments: 699 if self.msgid_plural or self.msgid_pluralcomments:
700 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 700 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
701 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 701 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
702 lines = [self._encodeifneccessary(line) for line in lines] 702 lines = [self._encodeifneccessary(line) for line in lines]
703 postr = "".join(lines) 703 postr = "".join(lines)
704 return postr 704 return postr
705 705
706 def getlocations(self): 706 def getlocations(self):
707 """Get a list of locations from sourcecomments in the PO unit 707 """Get a list of locations from sourcecomments in the PO unit
708 708
709 rtype: List 709 rtype: List
710 return: A list of the locations with '#: ' stripped 710 return: A list of the locations with '#: ' stripped
711 711
712 """ 712 """
713 locations = [] 713 locations = []
714 for sourcecomment in self.sourcecomments: 714 for sourcecomment in self.sourcecomments:
715 locations += quote.rstripeol(sourcecomment)[3:].split() 715 locations += quote.rstripeol(sourcecomment)[3:].split()
716 return locations 716 return locations
717 717
718 def addlocation(self, location): 718 def addlocation(self, location):
719 """Add a location to sourcecomments in the PO unit 719 """Add a location to sourcecomments in the PO unit
720 720
721 @param location: Text location e.g. 'file.c:23' does not include #: 721 @param location: Text location e.g. 'file.c:23' does not include #:
722 @type location: String 722 @type location: String
723 723
724 """ 724 """
725 self.sourcecomments.append("#: %s\n" % location) 725 self.sourcecomments.append("#: %s\n" % location)
726 726
727 def _extract_msgidcomments(self, text=None): 727 def _extract_msgidcomments(self, text=None):
728 """Extract KDE style msgid comments from the unit. 728 """Extract KDE style msgid comments from the unit.
729 729
730 @rtype: String 730 @rtype: String
731 @return: Returns the extracted msgidcomments found in this unit's msgid. 731 @return: Returns the extracted msgidcomments found in this unit's msgid.
732 732
733 """ 733 """
734 734
735 if not text: 735 if not text:
736 text = unquotefrompo(self.msgidcomments) 736 text = unquotefrompo(self.msgidcomments)
737 return text.split('\n')[0].replace('_: ', '', 1) 737 return text.split('\n')[0].replace('_: ', '', 1)
738 738
739 def getcontext(self): 739 def getcontext(self):
740 """Get the message context.""" 740 """Get the message context."""
741 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments() 741 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
742
743 def setcontext(self, context):
744 """Sets the message context.
745
746 @param context: unescaped context message
747 """
748 if not self.msgctxt and context:
749 self.msgctxt = quoteforpo(context)
750 else:
751 self.msgctxt = []
742 752
743 def getid(self): 753 def getid(self):
744 """Returns a unique identifier for this unit.""" 754 """Returns a unique identifier for this unit."""
745 context = self.getcontext() 755 context = self.getcontext()
746 # Gettext does not consider the plural to determine duplicates, only 756 # Gettext does not consider the plural to determine duplicates, only
747 # the msgid. For generation of .mo files, we might want to use this 757 # the msgid. For generation of .mo files, we might want to use this
748 # code to generate the entry for the hash table, but for now, it is 758 # code to generate the entry for the hash table, but for now, it is
749 # commented out for conformance to gettext. 759 # commented out for conformance to gettext.
750 # id = '\0'.join(self.source.strings) 760 # id = '\0'.join(self.source.strings)
751 id = self.source 761 id = self.source
752 if self.msgidcomments: 762 if self.msgidcomments:
753 id = "_: %s\n%s" % (context, id) 763 id = "_: %s\n%s" % (context, id)
754 elif context: 764 elif context:
755 id = "%s\04%s" % (context, id) 765 id = "%s\04%s" % (context, id)
756 return id 766 return id
757 767
758 class pofile(pocommon.pofile): 768 class pofile(pocommon.pofile):
759 """this represents a .po file containing various units""" 769 """this represents a .po file containing various units"""
760 UnitClass = pounit 770 UnitClass = pounit
761 def __init__(self, inputfile=None, encoding=None, unitclass=pounit): 771 def __init__(self, inputfile=None, encoding=None, unitclass=pounit):
762 """construct a pofile, optionally reading in from inputfile. 772 """construct a pofile, optionally reading in from inputfile.
763 encoding can be specified but otherwise will be read from the PO header" "" 773 encoding can be specified but otherwise will be read from the PO header" ""
764 self.UnitClass = unitclass 774 self.UnitClass = unitclass
765 pocommon.pofile.__init__(self, unitclass=unitclass) 775 pocommon.pofile.__init__(self, unitclass=unitclass)
766 self.units = [] 776 self.units = []
767 self.filename = '' 777 self.filename = ''
768 self._encoding = encodingToUse(encoding) 778 self._encoding = encodingToUse(encoding)
769 if inputfile is not None: 779 if inputfile is not None:
770 self.parse(inputfile) 780 self.parse(inputfile)
771 781
772 def changeencoding(self, newencoding): 782 def changeencoding(self, newencoding):
773 """changes the encoding on the file""" 783 """changes the encoding on the file"""
774 self._encoding = encodingToUse(newencoding) 784 self._encoding = encodingToUse(newencoding)
775 if not self.units: 785 if not self.units:
776 return 786 return
777 header = self.header() 787 header = self.header()
778 if not header or header.isblank(): 788 if not header or header.isblank():
779 return 789 return
780 charsetline = None 790 charsetline = None
781 headerstr = unquotefrompo(header.msgstr) 791 headerstr = unquotefrompo(header.msgstr)
782 for line in headerstr.split("\n"): 792 for line in headerstr.split("\n"):
783 if not ":" in line: continue 793 if not ":" in line: continue
784 key, value = line.strip().split(":", 1) 794 key, value = line.strip().split(":", 1)
785 if key.strip() != "Content-Type": continue 795 if key.strip() != "Content-Type": continue
786 charsetline = line 796 charsetline = line
787 if charsetline is None: 797 if charsetline is None:
788 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 798 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
789 else: 799 else:
790 charset = re.search("charset=([^ ]*)", charsetline) 800 charset = re.search("charset=([^ ]*)", charsetline)
791 if charset is None: 801 if charset is None:
(...skipping 109 matching lines...) Show 10 above Show 10 below
901 output = self._getoutput() 911 output = self._getoutput()
902 if isinstance(output, unicode): 912 if isinstance(output, unicode):
903 return output.encode(getattr(self, "encoding", "UTF-8")) 913 return output.encode(getattr(self, "encoding", "UTF-8"))
904 return output 914 return output
905 915
906 def _getoutput(self): 916 def _getoutput(self):
907 """convert the units back to lines""" 917 """convert the units back to lines"""
908 lines = [] 918 lines = []
909 for unit in self.units: 919 for unit in self.units:
910 unitsrc = str(unit) + "\n" 920 unitsrc = str(unit) + "\n"
911 lines.append(unitsrc) 921 lines.append(unitsrc)
912 lines = "".join(self.encode(lines)).rstrip() 922 lines = "".join(self.encode(lines)).rstrip()
913 #After the last pounit we will have \n\n and we only want to end in \n: 923 #After the last pounit we will have \n\n and we only want to end in \n:
914 if lines: lines += "\n" 924 if lines: lines += "\n"
915 return lines 925 return lines
916 926
917 def encode(self, lines): 927 def encode(self, lines):
918 """encode any unicode strings in lines in self._encoding""" 928 """encode any unicode strings in lines in self._encoding"""
919 newlines = [] 929 newlines = []
920 encoding = self._encoding 930 encoding = self._encoding
921 if encoding is None or encoding.lower() == "charset": 931 if encoding is None or encoding.lower() == "charset":
922 encoding = 'UTF-8' 932 encoding = 'UTF-8'
923 for line in lines: 933 for line in lines:
924 if isinstance(line, unicode): 934 if isinstance(line, unicode):
925 line = line.encode(encoding) 935 line = line.encode(encoding)
926 newlines.append(line) 936 newlines.append(line)
927 return newlines 937 return newlines
928 938
929 def decode(self, lines): 939 def decode(self, lines):
930 """decode any non-unicode strings in lines with self._encoding""" 940 """decode any non-unicode strings in lines with self._encoding"""
931 newlines = [] 941 newlines = []
932 for line in lines: 942 for line in lines:
933 if isinstance(line, str) and self._encoding is not None and self._en coding.lower() != "charset": 943 if isinstance(line, str) and self._encoding is not None and self._en coding.lower() != "charset":
934 try: 944 try:
935 line = line.decode(self._encoding) 945 line = line.decode(self._encoding)
936 except UnicodeError, e: 946 except UnicodeError, e:
937 raise UnicodeError("Error decoding line with encoding %r: %s . Line is %r" % (self._encoding, e, line)) 947 raise UnicodeError("Error decoding line with encoding %r: %s . Line is %r" % (self._encoding, e, line))
938 newlines.append(line) 948 newlines.append(line)
939 return newlines 949 return newlines
940 950
941 def unit_iter(self): 951 def unit_iter(self):
942 for unit in self.units: 952 for unit in self.units:
943 if not (unit.isheader() or unit.isobsolete()): 953 if not (unit.isheader() or unit.isobsolete()):
944 yield unit 954 yield unit
945 955
946 if __name__ == '__main__': 956 if __name__ == '__main__':
947 import sys 957 import sys
948 pf = pofile(sys.stdin) 958 pf = pofile(sys.stdin)
949 sys.stdout.write(str(pf)) 959 sys.stdout.write(str(pf))
950 960
OLDNEW

Powered by Google App Engine
This is Rietveld r159