Fix babel msgids to include tag attributes and entities

The messages for translation were omitting the attributes in
tags and HTML entities embedded in the translation strings,
thus they no longer matched the original HTML.

Additionally, HTML entities are mostly (but not always) decoded
by the browser before angular-gettext sees them, so we have to
emulate this behaviour in our babel extractor so the message
ids match.

Change-Id: Ie6ae0776a2c0d7db589b34a9e7676508178a473d
Closes-Bug: 1561550
Closes-Bug: 1561738
Closes-Bug: 1561761
(cherry picked from commit fdf0bf6f18)
This commit is contained in:
Richard Jones 2016-04-07 14:28:05 +10:00 committed by Rob Cresswell
parent 0b35ce5374
commit dfeacc294f
3 changed files with 49 additions and 2 deletions

View File

@ -1,3 +1,4 @@
# -*- encoding: UTF-8 -*-
# Copyright 2015, Rackspace, US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -170,3 +171,20 @@ class ExtractAngularTestCase(test.TestCase):
self.assertEqual(
[(1, 'gettext', 'hello <b>beautiful <i>world</i></b> !', [])],
messages)
def test_nested_variations(self):
buf = StringIO(
'''
<p translate>To <a href="link">link</a> here</p>
<p translate>To <!-- a comment!! --> here</p>
<p translate>To trademark&reg; &#62; &#x3E; here</p>
'''
)
messages = list(extract_angular(buf, [], [], {}))
self.assertEqual(
[
(2, u'gettext', 'To <a href="link">link</a> here', []),
(3, u'gettext', 'To <!-- a comment!! --> here', []),
(4, u'gettext', u'To trademark® &#62; &#x3E; here', []),
],
messages)

View File

@ -1,3 +1,4 @@
# -*- encoding: UTF-8 -*-
# Copyright 2015, Rackspace, US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -22,6 +23,16 @@ filter_regex = re.compile(
r"""{\$\s*('([^']|\\')+'|"([^"]|\\")+")\s*\|\s*translate\s*\$}"""
)
# browser innerHTML decodes some html entities automatically, so when
# we extract the msgid and want to match what Javascript sees, we need
# to leave some entities alone, but decode all the rest. Add entries
# to HTML_ENTITIES as necessary.
HTML_ENTITY_PASSTHROUGH = {'amp', 'gt', 'lt'}
HTML_ENTITY_DECODED = {
'reg': u'®',
'times': u'×'
}
class AngularGettextHTMLParser(html_parser.HTMLParser):
"""Parse HTML to find translate directives.
@ -68,7 +79,10 @@ class AngularGettextHTMLParser(html_parser.HTMLParser):
if attr == 'translate-comment':
self.comments.append(value)
elif self.in_translate:
self.data += '<%s>' % tag
s = tag
if attrs:
s += ' ' + ' '.join('%s="%s"' % a for a in attrs)
self.data += '<%s>' % s
self.inner_tags.append(tag)
else:
for attr in attrs:
@ -89,6 +103,21 @@ class AngularGettextHTMLParser(html_parser.HTMLParser):
(self.line, u'gettext', match[0][1:-1], [])
)
def handle_entityref(self, name):
if self.in_translate:
if name in HTML_ENTITY_PASSTHROUGH:
self.data += '&%s;' % name
else:
self.data += HTML_ENTITY_DECODED[name]
def handle_charref(self, name):
if self.in_translate:
self.data += '&#%s;' % name
def handle_comment(self, comment):
if self.in_translate:
self.data += '<!--%s-->' % comment
def handle_endtag(self, tag):
if self.in_translate:
if len(self.inner_tags) > 0:

View File

@ -26,7 +26,7 @@
<div class="form-group">
<label class="control-label" translate>Container Access</label>
<div>
<label for="id_public" translate>
<label for="id_public">
<input type="checkbox" ng-model="ctrl.model.public"
name="public" checked id="id_public">
<span translate>Public</span>