import fnmatch
import glob
import os
import re
import sys
from functools import total_ordering
from itertools import dropwhile
import django
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files.temp import NamedTemporaryFile
from django.core.management.base import BaseCommand, CommandError
from django.core.management.utils import (
find_command, handle_extensions, popen_wrapper,
)
from django.utils.encoding import DEFAULT_LOCALE_ENCODING
from django.utils.functional import cached_property
from django.utils.jslex import prepare_js_for_gettext
from django.utils.text import get_text_list
from django.utils.translation import templatize
plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL)
STATUS_OK = 0
NO_LOCALE_DIR = object()
def check_programs(*programs):
for program in programs:
if find_command(program) is None:
raise CommandError(
"Can't find %s. Make sure you have GNU gettext tools 0.15 or "
"newer installed." % program
)
@total_ordering
class TranslatableFile:
def __init__(self, dirpath, file_name, locale_dir):
self.file = file_name
self.dirpath = dirpath
self.locale_dir = locale_dir
def __repr__(self):
return "<%s: %s>" % (
self.__class__.__name__,
os.sep.join([self.dirpath, self.file]),
)
def __eq__(self, other):
return self.path == other.path
def __lt__(self, other):
return self.path < other.path
@property
def path(self):
return os.path.join(self.dirpath, self.file)
class BuildFile:
"""
Represent the state of a translatable file during the build process.
"""
def __init__(self, command, domain, translatable):
self.command = command
self.domain = domain
self.translatable = translatable
@cached_property
def is_templatized(self):
if self.domain == 'djangojs':
return self.command.gettext_version < (0, 18, 3)
elif self.domain == 'django':
file_ext = os.path.splitext(self.translatable.file)[1]
return file_ext != '.py'
return False
@cached_property
def path(self):
return self.translatable.path
@cached_property
def work_path(self):
"""
Path to a file which is being fed into GNU gettext pipeline. This may
be either a translatable or its preprocessed version.
"""
if not self.is_templatized:
return self.path
extension = {
'djangojs': 'c',
'django': 'py',
}.get(self.domain)
filename = '%s.%s' % (self.translatable.file, extension)
return os.path.join(self.translatable.dirpath, filename)
def preprocess(self):
"""
Preprocess (if necessary) a translatable file before passing it to
xgettext GNU gettext utility.
"""
if not self.is_templatized:
return
encoding = settings.FILE_CHARSET if self.command.settings_available else 'utf-8'
with open(self.path, 'r', encoding=encoding) as fp:
src_data = fp.read()
if self.domain == 'djangojs':
content = prepare_js_for_gettext(src_data)
elif self.domain == 'django':
content = templatize(src_data, origin=self.path[2:])
with open(self.work_path, 'w', encoding='utf-8') as fp:
fp.write(content)
def postprocess_messages(self, msgs):
"""
Postprocess messages generated by xgettext GNU gettext utility.
Transform paths as if these messages were generated from original
translatable files rather than from preprocessed versions.
"""
if not self.is_templatized:
return msgs
# Remove '.py' suffix
if os.name == 'nt':
# Preserve '.\' prefix on Windows to respect gettext behavior
old_path = self.work_path
new_path = self.path
else:
old_path = self.work_path[2:]
new_path = self.path[2:]
return re.sub(
r'^(#: .*)(' + re.escape(old_path) + r')',
lambda match: match.group().replace(old_path, new_path),
msgs,
flags=re.MULTILINE
)
def cleanup(self):
"""
Remove a preprocessed copy of a translatable file (if any).
"""
if self.is_templatized:
# This check is needed for the case of a symlinked file and its
# source being processed inside a single group (locale dir);
# removing either of those two removes both.
if os.path.exists(self.work_path):
os.unlink(self.work_path)
def normalize_eols(raw_contents):
"""
Take a block of raw text that will be passed through str.splitlines() to
get universal newlines treatment.
Return the resulting block of text with normalized `\n` EOL sequences ready
to be written to disk using current platform's native EOLs.
"""
lines_list = raw_contents.splitlines()
# Ensure last line has its EOL
if lines_list and lines_list[-1]:
lines_list.append('')
return '\n'.join(lines_list)
def write_pot_file(potfile, msgs):
"""
Write the `potfile` with the `msgs` contents, making sure its format is
valid.
"""
pot_lines = msgs.splitlines()
if os.path.exists(potfile):
# Strip the header
lines = dropwhile(len, pot_lines)
else:
lines = []
found, header_read = False, False
for line in pot_lines:
if not found and not header_read:
if 'charset=CHARSET' in line:
found = True
line = line.replace('charset=CHARSET', 'charset=UTF-8')
if not line and not found:
header_read = True
lines.append(line)
msgs = '\n'.join(lines)
# Force newlines of POT files to '\n' to work around
# https://savannah.gnu.org/bugs/index.php?52395
with open(potfile, 'a', encoding='utf-8', newline='\n') as fp:
fp.write(msgs)
class Command(BaseCommand):
help = (
"Runs over the entire source tree of the current directory and "
"pulls out all strings marked for translation. It creates (or updates) a message "
"file in the conf/locale (in the django tree) or locale (for projects and "
"applications) directory.\n\nYou must run this command with one of either the "
"--locale, --exclude, or --all options."
)
translatable_file_class = TranslatableFile
build_file_class = BuildFile
requires_system_checks = False
msgmerge_options = ['-q', '--previous']
msguniq_options = ['--to-code=utf-8']
msgattrib_options = ['--no-obsolete']
xgettext_options = ['--from-code=UTF-8', '--add-comments=Translators']
def add_arguments(self, parser):
parser.add_argument(
'--locale', '-l', default=[], action='append',
help='Creates or updates the message files for the given locale(s) (e.g. pt_BR). '
'Can be used multiple times.',
)
parser.add_argument(
'--exclude', '-x', default=[], action='append',
help='Locales to exclude. Default is none. Can be used multiple times.',
)
parser.add_argument(
'--domain', '-d', default='django',
help='The domain of the message files (default: "django").',
)
parser.add_argument(
'--all', '-a', action='store_true',
help='Updates the message files for all existing locales.',
)
parser.add_argument(
'--extension', '-e', dest='extensions', action='append',
help='The file extension(s) to examine (default: "html,txt,py", or "js" '
'if the domain is "djangojs"). Separate multiple extensions with '
'commas, or use -e multiple times.',
)
parser.add_argument(
'--symlinks', '-s', action='store_true',
help='Follows symlinks to directories when examining source code '
'and templates for translation strings.',
)
parser.add_argument(
'--ignore', '-i', action='append', dest='ignore_patterns',
default=[], metavar='PATTERN',
help='Ignore files or directories matching this glob-style pattern. '
'Use multiple times to ignore more.',
)
parser.add_argument(
'--no-default-ignore', action='store_false', dest='use_default_ignore_patterns',
help="Don't ignore the common glob-style patterns 'CVS', '.*', '*~' and '*.pyc'.",
)
parser.add_argument(
'--no-wrap', action='store_true',
help="Don't break long message lines into several lines.",
)
parser.add_argument(
'--no-location', action='store_true',
help="Don't write '#: filename:line' lines.",
)
parser.add_argument(
'--add-location',
choices=('full', 'file', 'never'), const='full', nargs='?',
help=(
"Controls '#: filename:line' lines. If the option is 'full' "
"(the default if not given), the lines include both file name "
"and line number. If it's 'file', the line number is omitted. If "
"it's 'never', the lines are suppressed (same as --no-location). "
"--add-location requires gettext 0.19 or newer."
),
)
parser.add_argument(
'--no-obsolete', action='store_true',
help="Remove obsolete message strings.",
)
parser.add_argument(
'--keep-pot', action='store_true',
help="Keep .pot file after making messages. Useful when debugging.",
)
def handle(self, *args, **options):
locale = options['locale']
exclude = options['exclude']
self.domain = options['domain']
self.verbosity = options['verbosity']
process_all = options['all']
extensions = options['extensions']
self.symlinks = options['symlinks']
ignore_patterns = options['ignore_patterns']
if options['use_default_ignore_patterns']:
ignore_patterns += ['CVS', '.*', '*~', '*.pyc']
self.ignore_patterns = list(set(ignore_patterns))
# Avoid messing with mutable class variables
if options['no_wrap']:
self.msgmerge_options = self.msgmerge_options[:] + ['--no-wrap']
self.msguniq_options = self.msguniq_options[:] + ['--no-wrap']
self.msgattrib_options = self.msgattrib_options[:] + ['--no-wrap']
self.xgettext_options = self.xgettext_options[:] + ['--no-wrap']
if options['no_location']:
self.msgmerge_options = self.msgmerge_options[:] + ['--no-location']
self.msguniq_options = self.msguniq_options[:] + ['--no-location']
self.msgattrib_options = self.msgattrib_options[:] + ['--no-location']
self.xgettext_options = self.xgettext_options[:] + ['--no-location']
if options['add_location']:
if self.gettext_version < (0, 19):
raise CommandError(
"The --add-location option requires gettext 0.19 or later. "
"You have %s." % '.'.join(str(x) for x in self.gettext_version)
)
arg_add_location = "--add-location=%s" % options['add_location']
self.msgmerge_options = self.msgmerge_options[:] + [arg_add_location]
self.msguniq_options = self.msguniq_options[:] + [arg_add_location]
self.msgattrib_options = self.msgattrib_options[:] + [arg_add_location]
self.xgettext_options = self.xgettext_options[:] + [arg_add_location]
self.no_obsolete = options['no_obsolete']
self.keep_pot = options['keep_pot']
if self.domain not in ('django', 'djangojs'):
raise CommandError("currently makemessages only supports domains "
"'django' and 'djangojs'")
if self.domain == 'djangojs':
exts = extensions or ['js']
else:
exts = extensions or ['html', 'txt', 'py']
self.extensions = handle_extensions(exts)
if (locale is None and not exclude and not process_all) or self.domain is None:
raise CommandError(
"Type '%s help %s' for usage information."
% (os.path.basename(sys.argv[0]), sys.argv[1])
)
if self.verbosity > 1:
self.stdout.write(
'examining files with the extensions: %s\n'
% get_text_list(list(self.extensions), 'and')
)
self.invoked_for_django = False
self.locale_paths = []
self.default_locale_path = None
if os.path.isdir(os.path.join('conf', 'locale')):
self.locale_paths = [os.path.abspath(os.path.join('conf', 'locale'))]
self.default_locale_path = self.locale_paths[0]
self.invoked_for_django = True
else:
if self.settings_available:
self.locale_paths.extend(settings.LOCALE_PATHS)
# Allow to run makemessages inside an app dir
if os.path.isdir('locale'):
self.locale_paths.append(os.path.abspath('locale'))
if self.locale_paths:
self.default_locale_path = self.locale_paths[0]
if not os.path.exists(self.default_locale_path):
os.makedirs(self.default_locale_path)
# Build locale list
looks_like_locale = re.compile(r'[a-z]{2}')
locale_dirs = filter(os.path.isdir, glob.glob('%s/*' % self.default_locale_path))
all_locales = [
lang_code for lang_code in map(os.path.basename, locale_dirs)
if looks_like_locale.match(lang_code)
]
# Account for excluded locales
if process_all:
locales = all_locales
else:
locales = locale or all_locales
locales = set(locales).difference(exclude)
if locales:
check_programs('msguniq', 'msgmerge', 'msgattrib')
check_programs('xgettext')
try:
potfiles = self.build_potfiles()
# Build po files for each selected locale
for locale in locales:
if self.verbosity > 0:
self.stdout.write("processing locale %s\n" % locale)
for potfile in potfiles:
self.write_po_file(potfile, locale)
finally:
if not self.keep_pot:
self.remove_potfiles()
@cached_property
def gettext_version(self):
# Gettext tools will output system-encoded bytestrings instead of UTF-8,
# when looking up the version. It's especially a problem on Windows.
out, err, status = popen_wrapper(
['xgettext', '--version'],
stdout_encoding=DEFAULT_LOCALE_ENCODING,
)
m = re.search(r'(\d+)\.(\d+)\.?(\d+)?', out)
if m:
return tuple(int(d) for d in m.groups() if d is not None)
else:
raise CommandError("Unable to get gettext version. Is it installed?")
@cached_property
def settings_available(self):
try:
settings.LOCALE_PATHS
except ImproperlyConfigured:
if self.verbosity > 1:
self.stderr.write("Running without configured settings.")
return False
return True
def build_potfiles(self):
"""
Build pot files and apply msguniq to them.
"""
file_list = self.find_files(".")
self.remove_potfiles()
self.process_files(file_list)
potfiles = []
for path in self.locale_paths:
potfile = os.path.join(path, '%s.pot' % self.domain)
if not os.path.exists(potfile):
continue
args = ['msguniq'] + self.msguniq_options + [potfile]
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(
"errors happened while running msguniq\n%s" % errors)
elif self.verbosity > 0:
self.stdout.write(errors)
msgs = normalize_eols(msgs)
with open(potfile, 'w', encoding='utf-8') as fp:
fp.write(msgs)
potfiles.append(potfile)
return potfiles
def remove_potfiles(self):
for path in self.locale_paths:
pot_path = os.path.join(path, '%s.pot' % self.domain)
if os.path.exists(pot_path):
os.unlink(pot_path)
def find_files(self, root):
"""
Get all files in the given root. Also check that there is a matching
locale dir for each file.
"""
def is_ignored(path, ignore_patterns):
"""
Check if the given path should be ignored or not.
"""
filename = os.path.basename(path)
def ignore(pattern):
return fnmatch.fnmatchcase(filename, pattern) or fnmatch.fnmatchcase(path, pattern)
return any(ignore(pattern) for pattern in ignore_patterns)
ignore_patterns = [os.path.normcase(p) for p in self.ignore_patterns]
dir_suffixes = {'%s*' % path_sep for path_sep in {'/', os.sep}}
norm_patterns = []
for p in ignore_patterns:
for dir_suffix in dir_suffixes:
if p.endswith(dir_suffix):
norm_patterns.append(p[:-len(dir_suffix)])
break
else:
norm_patterns.append(p)
all_files = []
ignored_roots = []
if self.settings_available:
ignored_roots = [os.path.normpath(p) for p in (settings.MEDIA_ROOT, settings.STATIC_ROOT) if p]
for dirpath, dirnames, filenames in os.walk(root, topdown=True, followlinks=self.symlinks):
for dirname in dirnames[:]:
if (is_ignored(os.path.normpath(os.path.join(dirpath, dirname)), norm_patterns) or
os.path.join(os.path.abspath(dirpath), dirname) in ignored_roots):
dirnames.remove(dirname)
if self.verbosity > 1:
self.stdout.write('ignoring directory %s\n' % dirname)
elif dirname == 'locale':
dirnames.remove(dirname)
self.locale_paths.insert(0, os.path.join(os.path.abspath(dirpath), dirname))
for filename in filenames:
file_path = os.path.normpath(os.path.join(dirpath, filename))
file_ext = os.path.splitext(filename)[1]
if file_ext not in self.extensions or is_ignored(file_path, self.ignore_patterns):
if self.verbosity > 1:
self.stdout.write('ignoring file %s in %s\n' % (filename, dirpath))
else:
locale_dir = None
for path in self.locale_paths:
if os.path.abspath(dirpath).startswith(os.path.dirname(path)):
locale_dir = path
break
locale_dir = locale_dir or self.default_locale_path or NO_LOCALE_DIR
all_files.append(self.translatable_file_class(dirpath, filename, locale_dir))
return sorted(all_files)
def process_files(self, file_list):
"""
Group translatable files by locale directory and run pot file build
process for each group.
"""
file_groups = {}
for translatable in file_list:
file_group = file_groups.setdefault(translatable.locale_dir, [])
file_group.append(translatable)
for locale_dir, files in file_groups.items():
self.process_locale_dir(locale_dir, files)
def process_locale_dir(self, locale_dir, files):
"""
Extract translatable literals from the specified files, creating or
updating the POT file for a given locale directory.
Use the xgettext GNU gettext utility.
"""
build_files = []
for translatable in files:
if self.verbosity > 1:
self.stdout.write('processing file %s in %s\n' % (
translatable.file, translatable.dirpath
))
if self.domain not in ('djangojs', 'django'):
continue
build_file = self.build_file_class(self, self.domain, translatable)
try:
build_file.preprocess()
except UnicodeDecodeError as e:
self.stdout.write(
'UnicodeDecodeError: skipped file %s in %s (reason: %s)' % (
translatable.file, translatable.dirpath, e,
)
)
continue
build_files.append(build_file)
if self.domain == 'djangojs':
is_templatized = build_file.is_templatized
args = [
'xgettext',
'-d', self.domain,
'--language=%s' % ('C' if is_templatized else 'JavaScript',),
'--keyword=gettext_noop',
'--keyword=gettext_lazy',
'--keyword=ngettext_lazy:1,2',
'--keyword=pgettext:1c,2',
'--keyword=npgettext:1c,2,3',
'--output=-',
]
elif self.domain == 'django':
args = [
'xgettext',
'-d', self.domain,
'--language=Python',
'--keyword=gettext_noop',
'--keyword=gettext_lazy',
'--keyword=ngettext_lazy:1,2',
'--keyword=ugettext_noop',
'--keyword=ugettext_lazy',
'--keyword=ungettext_lazy:1,2',
'--keyword=pgettext:1c,2',
'--keyword=npgettext:1c,2,3',
'--keyword=pgettext_lazy:1c,2',
'--keyword=npgettext_lazy:1c,2,3',
'--output=-',
]
else:
return
input_files = [bf.work_path for bf in build_files]
with NamedTemporaryFile(mode='w+') as input_files_list:
input_files_list.write(('\n'.join(input_files)))
input_files_list.flush()
args.extend(['--files-from', input_files_list.name])
args.extend(self.xgettext_options)
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
for build_file in build_files:
build_file.cleanup()
raise CommandError(
'errors happened while running xgettext on %s\n%s' %
('\n'.join(input_files), errors)
)
elif self.verbosity > 0:
# Print warnings
self.stdout.write(errors)
if msgs:
if locale_dir is NO_LOCALE_DIR:
file_path = os.path.normpath(build_files[0].path)
raise CommandError(
'Unable to find a locale path to store translations for '
'file %s' % file_path
)
for build_file in build_files:
msgs = build_file.postprocess_messages(msgs)
potfile = os.path.join(locale_dir, '%s.pot' % self.domain)
write_pot_file(potfile, msgs)
for build_file in build_files:
build_file.cleanup()
def write_po_file(self, potfile, locale):
"""
Create or update the PO file for self.domain and `locale`.
Use contents of the existing `potfile`.
Use msgmerge and msgattrib GNU gettext utilities.
"""
basedir = os.path.join(os.path.dirname(potfile), locale, 'LC_MESSAGES')
if not os.path.isdir(basedir):
os.makedirs(basedir)
pofile = os.path.join(basedir, '%s.po' % self.domain)
if os.path.exists(pofile):
args = ['msgmerge'] + self.msgmerge_options + [pofile, potfile]
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(
"errors happened while running msgmerge\n%s" % errors)
elif self.verbosity > 0:
self.stdout.write(errors)
else:
with open(potfile, 'r', encoding='utf-8') as fp:
msgs = fp.read()
if not self.invoked_for_django:
msgs = self.copy_plural_forms(msgs, locale)
msgs = normalize_eols(msgs)
msgs = msgs.replace(
"#. #-#-#-#-# %s.pot (PACKAGE VERSION) #-#-#-#-#\n" % self.domain, "")
with open(pofile, 'w', encoding='utf-8') as fp:
fp.write(msgs)
if self.no_obsolete:
args = ['msgattrib'] + self.msgattrib_options + ['-o', pofile, pofile]
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(
"errors happened while running msgattrib\n%s" % errors)
elif self.verbosity > 0:
self.stdout.write(errors)
def copy_plural_forms(self, msgs, locale):
"""
Copy plural forms header contents from a Django catalog of locale to
the msgs string, inserting it at the right place. msgs should be the
contents of a newly created .po file.
"""
django_dir = os.path.normpath(os.path.join(os.path.dirname(django.__file__)))
if self.domain == 'djangojs':
domains = ('djangojs', 'django')
else:
domains = ('django',)
for domain in domains:
django_po = os.path.join(django_dir, 'conf', 'locale', locale, 'LC_MESSAGES', '%s.po' % domain)
if os.path.exists(django_po):
with open(django_po, 'r', encoding='utf-8') as fp:
m = plural_forms_re.search(fp.read())
if m:
plural_form_line = m.group('value')
if self.verbosity > 1:
self.stdout.write("copying plural forms: %s\n" % plural_form_line)
lines = []
found = False
for line in msgs.splitlines():
if not found and (not line or plural_forms_re.search(line)):
line = plural_form_line
found = True
lines.append(line)
msgs = '\n'.join(lines)
break
return msgs