import warnings
from io import StringIO
from django.template.base import Lexer, TokenType
from django.utils.regex_helper import _lazy_re_compile
from . import TranslatorCommentWarning, trim_whitespace
TRANSLATOR_COMMENT_MARK = "Translators"
dot_re = _lazy_re_compile(r"\S")
def blankout(src, char):
"""
Change every non-whitespace character to the given char.
Used in the templatize function.
"""
return dot_re.sub(char, src)
context_re = _lazy_re_compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
inline_re = _lazy_re_compile(
# Match the trans/translate 'some text' part.
r"""^\s*trans(?:late)?\s+((?:"[^"]*?")|(?:'[^']*?'))"""
# Match and ignore optional filters
r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*"""
# Match the optional context part
r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*"""
)
block_re = _lazy_re_compile(
r"""^\s*blocktrans(?:late)?(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)"""
)
endblock_re = _lazy_re_compile(r"""^\s*endblocktrans(?:late)?$""")
plural_re = _lazy_re_compile(r"""^\s*plural$""")
constant_re = _lazy_re_compile(r"""_\(((?:".*?")|(?:'.*?'))\)""")
def templatize(src, origin=None):
"""
Turn a Django template into something that is understood by xgettext. It
does so by translating the Django translation tags into standard gettext
function invocations.
"""
out = StringIO("")
message_context = None
intrans = False
inplural = False
trimmed = False
singular = []
plural = []
incomment = False
comment = []
lineno_comment_map = {}
comment_lineno_cache = None
# Adding the u prefix allows gettext to recognize the string (#26093).
raw_prefix = "u"
def join_tokens(tokens, trim=False):
message = "".join(tokens)
if trim:
message = trim_whitespace(message)
return message
for t in Lexer(src).tokenize():
if incomment:
if t.token_type == TokenType.BLOCK and t.contents == "endcomment":
content = "".join(comment)
translators_comment_start = None
for lineno, line in enumerate(content.splitlines(True)):
if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
translators_comment_start = lineno
for lineno, line in enumerate(content.splitlines(True)):
if (
translators_comment_start is not None
and lineno >= translators_comment_start
):
out.write(" # %s" % line)
else:
out.write(" #\n")
incomment = False
comment = []
else:
comment.append(t.contents)
elif intrans:
if t.token_type == TokenType.BLOCK:
endbmatch = endblock_re.match(t.contents)
pluralmatch = plural_re.match(t.contents)
if endbmatch:
if inplural:
if message_context:
out.write(
" npgettext({p}{!r}, {p}{!r}, {p}{!r},count) ".format(
message_context,
join_tokens(singular, trimmed),
join_tokens(plural, trimmed),
p=raw_prefix,
)
)
else:
out.write(
" ngettext({p}{!r}, {p}{!r}, count) ".format(
join_tokens(singular, trimmed),
join_tokens(plural, trimmed),
p=raw_prefix,
)
)
for part in singular:
out.write(blankout(part, "S"))
for part in plural:
out.write(blankout(part, "P"))
else:
if message_context:
out.write(
" pgettext({p}{!r}, {p}{!r}) ".format(
message_context,
join_tokens(singular, trimmed),
p=raw_prefix,
)
)
else:
out.write(
" gettext({p}{!r}) ".format(
join_tokens(singular, trimmed),
p=raw_prefix,
)
)
for part in singular:
out.write(blankout(part, "S"))
message_context = None
intrans = False
inplural = False
singular = []
plural = []
elif pluralmatch:
inplural = True
else:
filemsg = ""
if origin:
filemsg = "file %s, " % origin
raise SyntaxError(
"Translation blocks must not include other block tags: "
"%s (%sline %d)" % (t.contents, filemsg, t.lineno)
)
elif t.token_type == TokenType.VAR:
if inplural:
plural.append("%%(%s)s" % t.contents)
else:
singular.append("%%(%s)s" % t.contents)
elif t.token_type == TokenType.TEXT:
contents = t.contents.replace("%", "%%")
if inplural:
plural.append(contents)
else:
singular.append(contents)
else:
# Handle comment tokens (`{# ... #}`) plus other constructs on
# the same line:
if comment_lineno_cache is not None:
cur_lineno = t.lineno + t.contents.count("\n")
if comment_lineno_cache == cur_lineno:
if t.token_type != TokenType.COMMENT:
for c in lineno_comment_map[comment_lineno_cache]:
filemsg = ""
if origin:
filemsg = "file %s, " % origin
warn_msg = (
"The translator-targeted comment '%s' "
"(%sline %d) was ignored, because it wasn't "
"the last item on the line."
) % (c, filemsg, comment_lineno_cache)
warnings.warn(warn_msg, TranslatorCommentWarning)
lineno_comment_map[comment_lineno_cache] = []
else:
out.write(
"# %s" % " | ".join(lineno_comment_map[comment_lineno_cache])
)
comment_lineno_cache = None
if t.token_type == TokenType.BLOCK:
imatch = inline_re.match(t.contents)
bmatch = block_re.match(t.contents)
cmatches = constant_re.findall(t.contents)
if imatch:
g = imatch[1]
if g[0] == '"':
g = g.strip('"')
elif g[0] == "'":
g = g.strip("'")
g = g.replace("%", "%%")
if imatch[2]:
# A context is provided
context_match = context_re.match(imatch[2])
message_context = context_match[1]
if message_context[0] == '"':
message_context = message_context.strip('"')
elif message_context[0] == "'":
message_context = message_context.strip("'")
out.write(
" pgettext({p}{!r}, {p}{!r}) ".format(
message_context, g, p=raw_prefix
)
)
message_context = None
else:
out.write(" gettext({p}{!r}) ".format(g, p=raw_prefix))
elif bmatch:
for fmatch in constant_re.findall(t.contents):
out.write(" _(%s) " % fmatch)
if bmatch[1]:
# A context is provided
context_match = context_re.match(bmatch[1])
message_context = context_match[1]
if message_context[0] == '"':
message_context = message_context.strip('"')
elif message_context[0] == "'":
message_context = message_context.strip("'")
intrans = True
inplural = False
trimmed = "trimmed" in t.split_contents()
singular = []
plural = []
elif cmatches:
for cmatch in cmatches:
out.write(" _(%s) " % cmatch)
elif t.contents == "comment":
incomment = True
else:
out.write(blankout(t.contents, "B"))
elif t.token_type == TokenType.VAR:
parts = t.contents.split("|")
cmatch = constant_re.match(parts[0])
if cmatch:
out.write(" _(%s) " % cmatch[1])
for p in parts[1:]:
if p.find(":_(") >= 0:
out.write(" %s " % p.split(":", 1)[1])
else:
out.write(blankout(p, "F"))
elif t.token_type == TokenType.COMMENT:
if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
lineno_comment_map.setdefault(t.lineno, []).append(t.contents)
comment_lineno_cache = t.lineno
else:
out.write(blankout(t.contents, "X"))
return out.getvalue()