Utilities, assets and configurations used by I18n team
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

zanata_stats.py 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/usr/bin/env python3
  2. # Licensed under the Apache License, Version 2.0 (the "License");
  3. # you may not use this file except in compliance with the License.
  4. # You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software
  9. # distributed under the License is distributed on an "AS IS" BASIS,
  10. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  11. # implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import print_function
  15. import argparse
  16. import collections
  17. import csv
  18. import datetime
  19. import io
  20. import json
  21. import logging
  22. import random
  23. import re
  24. import sys
  25. import requests
  26. import six
  27. import yaml
  28. ZANATA_URI = 'https://translate.openstack.org/rest/%s'
  29. LOG = logging.getLogger('zanata_stats')
  30. ZANATA_VERSION_EXPR = r'^(master[-,a-z]*|stable-[a-z]+|openstack-user-survey)$'
  31. ZANATA_VERSION_PATTERN = re.compile(ZANATA_VERSION_EXPR)
  32. class ZanataUtility(object):
  33. """Utilities to invoke Zanata REST API."""
  34. user_agents = [
  35. 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0',
  36. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2',
  37. 'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0',
  38. 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120',
  39. 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
  40. ]
  41. def read_uri(self, uri, headers):
  42. try:
  43. headers['User-Agent'] = random.choice(ZanataUtility.user_agents)
  44. req = requests.get(uri, headers=headers)
  45. return req.text
  46. except Exception as e:
  47. LOG.error('Error "%(error)s" while reading uri %(uri)s',
  48. {'error': e, 'uri': uri})
  49. raise
  50. def read_json_from_uri(self, uri):
  51. data = self.read_uri(uri, {'Accept': 'application/json'})
  52. try:
  53. return json.loads(data)
  54. except Exception as e:
  55. LOG.error('Error "%(error)s" parsing json from uri %(uri)s',
  56. {'error': e, 'uri': uri})
  57. raise
  58. def get_projects(self):
  59. uri = ZANATA_URI % ('projects')
  60. LOG.debug("Reading projects from %s" % uri)
  61. projects_data = self.read_json_from_uri(uri)
  62. return [project['id'] for project in projects_data]
  63. @staticmethod
  64. def _is_valid_version(version):
  65. return bool(ZANATA_VERSION_PATTERN.match(version))
  66. def get_project_versions(self, project_id):
  67. uri = ZANATA_URI % ('projects/p/%s' % project_id)
  68. LOG.debug("Reading iterations for project %s" % project_id)
  69. project_data = self.read_json_from_uri(uri)
  70. if 'iterations' in project_data:
  71. return [interation_data['id']
  72. for interation_data in project_data['iterations']
  73. if self._is_valid_version(interation_data['id'])]
  74. else:
  75. return []
  76. def get_user_stats(self, project_id, iteration_id, zanata_user_id,
  77. start_date, end_date):
  78. uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
  79. % (project_id, iteration_id, zanata_user_id,
  80. start_date, end_date))
  81. return self.read_json_from_uri(uri)
  82. class LanguageTeam(object):
  83. def __init__(self, language_code, team_info):
  84. self.language_code = language_code
  85. self.language = team_info['language']
  86. # Zanata ID which only consists of numbers is a valid ID in Zanata.
  87. # Such entry is interpreted as integer unless it is quoted
  88. # in the YAML file. Ensure to stringify them.
  89. self.translators = [str(i) for i in team_info['translators']]
  90. self.reviewers = [str(i) for i in team_info.get('reviewers', [])]
  91. self.coordinators = [str(i) for i in team_info.get('coordinators', [])]
  92. @classmethod
  93. def load_from_language_team_yaml(cls, translation_team_uri, lang_list):
  94. LOG.debug('Process list of language team from uri: %s',
  95. translation_team_uri)
  96. content = yaml.safe_load(io.open(translation_team_uri, 'r'))
  97. if lang_list:
  98. lang_notfound = [lang_code for lang_code in lang_list
  99. if lang_code not in content]
  100. if lang_notfound:
  101. LOG.error('Language %s not tound in %s.',
  102. ', '.join(lang_notfound),
  103. translation_team_uri)
  104. sys.exit(1)
  105. return [cls(lang_code, team_info)
  106. for lang_code, team_info in content.items()
  107. if not lang_list or lang_code in lang_list]
  108. class User(object):
  109. trans_fields = ['total', 'translated', 'needReview',
  110. 'approved', 'rejected']
  111. review_fields = ['total', 'approved', 'rejected']
  112. def __init__(self, user_id, language_code):
  113. self.user_id = user_id
  114. self.lang = language_code
  115. self.stats = collections.defaultdict(dict)
  116. def __str__(self):
  117. return ("<%s: user_id=%s, lang=%s, stats=%s" %
  118. (self.__class__.__name__,
  119. self.user_id, self.lang, self.stats,))
  120. def __repr__(self):
  121. return repr(self.convert_to_serializable_data())
  122. def __lt__(self, other):
  123. if self.lang != other.lang:
  124. return self.lang < other.lang
  125. else:
  126. return self.user_id < other.user_id
  127. def read_from_zanata_stats(self, zanata_stats, project_id, version):
  128. # data format (Zanata 3.9.6)
  129. # {
  130. # "username": "amotoki",
  131. # "contributions": [
  132. # {
  133. # "locale": "ja",
  134. # "translation-stats": {
  135. # "translated": 7360,
  136. # "needReview": 0,
  137. # "approved": 152,
  138. # "rejected": 0
  139. # },
  140. # "review-stats": {
  141. # "approved": 220,
  142. # "rejected": 0
  143. # }
  144. # }
  145. # ]
  146. # }
  147. stats = [d for d in zanata_stats['contributions']
  148. if d['locale'] == self.lang]
  149. if not stats:
  150. return
  151. stats = stats[0]
  152. trans_stats = stats.get('translation-stats', {})
  153. if trans_stats:
  154. trans_stats['total'] = sum(trans_stats.values())
  155. review_stats = stats.get('review-stats', {})
  156. if review_stats:
  157. review_stats['total'] = sum(review_stats.values())
  158. self.stats[project_id][version] = {'translation-stats': trans_stats,
  159. 'review-stats': review_stats}
  160. def populate_total_stats(self):
  161. total_trans = dict([(k, 0) for k in self.trans_fields])
  162. total_review = dict([(k, 0) for k in self.review_fields])
  163. for project_id, versions in self.stats.items():
  164. for version, stats in versions.items():
  165. trans_stats = stats.get('translation-stats', {})
  166. for k in self.trans_fields:
  167. total_trans[k] += trans_stats.get(k, 0)
  168. review_stats = stats.get('review-stats', {})
  169. for k in self.review_fields:
  170. total_review[k] += review_stats.get(k, 0)
  171. self.stats['__total__']['translation-stats'] = total_trans
  172. self.stats['__total__']['review-stats'] = total_review
  173. def needs_output(self, include_no_activities):
  174. if include_no_activities:
  175. return True
  176. return bool(self.stats) and all(self.stats.values())
  177. @staticmethod
  178. def get_flattened_data_title():
  179. return [
  180. 'user_id',
  181. 'lang',
  182. 'project',
  183. 'version',
  184. 'translation-total',
  185. 'translated',
  186. 'needReview',
  187. 'approved',
  188. 'rejected',
  189. 'review-total',
  190. 'review-approved',
  191. 'review-rejected'
  192. ]
  193. def convert_to_flattened_data(self, detail=False):
  194. self.populate_total_stats()
  195. data = []
  196. for project_id, versions in self.stats.items():
  197. if project_id == '__total__':
  198. continue
  199. for version, stats in versions.items():
  200. trans_stats = stats.get('translation-stats', {})
  201. review_stats = stats.get('review-stats', {})
  202. if detail:
  203. data.append(
  204. [self.user_id, self.lang, project_id, version] +
  205. [trans_stats.get(k, 0) for k in self.trans_fields] +
  206. [review_stats.get(k, 0) for k in self.review_fields])
  207. data.append([self.user_id, self.lang, '-', '-'] +
  208. [self.stats['__total__']['translation-stats'][k]
  209. for k in self.trans_fields] +
  210. [self.stats['__total__']['review-stats'][k]
  211. for k in self.review_fields])
  212. return data
  213. def convert_to_serializable_data(self, detail):
  214. self.populate_total_stats()
  215. return {'user_id': self.user_id,
  216. 'lang': self.lang,
  217. 'stats': (self.stats if detail
  218. else self.stats['__total__'])}
  219. def get_zanata_stats(start_date, end_date, language_teams, project_list,
  220. version_list, user_list):
  221. LOG.info('Getting Zanata contributors statistics (from %s to %s) ...',
  222. start_date, end_date)
  223. zanataUtil = ZanataUtility()
  224. users = []
  225. for team in language_teams:
  226. users += [User(user_id, team.language_code)
  227. for user_id in team.translators]
  228. if not project_list:
  229. project_list = zanataUtil.get_projects()
  230. for project_id in project_list:
  231. for version in zanataUtil.get_project_versions(project_id):
  232. if version_list and version not in version_list:
  233. continue
  234. for user in users:
  235. if user_list and user.user_id not in user_list:
  236. continue
  237. LOG.info('Getting %(project_id)s %(version)s '
  238. 'for user %(user_id)s %(user_lang)s',
  239. {'project_id': project_id,
  240. 'version': version,
  241. 'user_id': user.user_id,
  242. 'user_lang': user.lang})
  243. data = zanataUtil.get_user_stats(
  244. project_id, version, user.user_id, start_date, end_date)
  245. LOG.debug('Got: %s', data)
  246. user.read_from_zanata_stats(data, project_id, version)
  247. LOG.debug('=> %s', user)
  248. return users
  249. def write_stats_to_file(users, output_file, file_format,
  250. include_no_activities, detail):
  251. users = sorted([user for user in users
  252. if user.needs_output(include_no_activities)])
  253. if file_format == 'csv':
  254. _write_stats_to_csvfile(users, output_file, detail)
  255. else:
  256. _write_stats_to_jsonfile(users, output_file, detail)
  257. LOG.info('Stats has been written to %s', output_file)
  258. def _write_stats_to_csvfile(users, output_file, detail):
  259. mode = 'w' if six.PY3 else 'wb'
  260. with open(output_file, mode) as csvfile:
  261. writer = csv.writer(csvfile)
  262. writer.writerow(User.get_flattened_data_title())
  263. for user in users:
  264. writer.writerows(user.convert_to_flattened_data(detail))
  265. def _write_stats_to_jsonfile(users, output_file, detail):
  266. users = [user.convert_to_serializable_data(detail)
  267. for user in users]
  268. with open(output_file, 'w') as f:
  269. f.write(json.dumps(users, indent=4, sort_keys=True))
  270. def _comma_separated_list(s):
  271. return s.split(',')
  272. def main():
  273. default_end_date = datetime.datetime.now()
  274. default_start_date = default_end_date - datetime.timedelta(days=180)
  275. default_start_date = default_start_date.strftime('%Y-%m-%d')
  276. default_end_date = default_end_date.strftime('%Y-%m-%d')
  277. parser = argparse.ArgumentParser()
  278. parser.add_argument("-s", "--start-date",
  279. default=default_start_date,
  280. help=("Specify the start date. "
  281. "Default:%s" % default_start_date))
  282. parser.add_argument("-e", "--end-date",
  283. default=default_end_date,
  284. help=("Specify the end date. "
  285. "Default:%s" % default_end_date))
  286. parser.add_argument("-o", "--output-file",
  287. help=("Specify the output file. "
  288. "Default: zanata_stats_output.{csv,json}."))
  289. parser.add_argument("-p", "--project",
  290. type=_comma_separated_list,
  291. help=("Specify project(s). Comma-separated list. "
  292. "Otherwise all Zanata projects are processed."))
  293. parser.add_argument("-l", "--lang",
  294. type=_comma_separated_list,
  295. help=("Specify language(s). Comma-separated list. "
  296. "Language code like zh-CN, ja needs to be used. "
  297. "Otherwise all languages are processed."))
  298. parser.add_argument("-t", "--target-version",
  299. type=_comma_separated_list,
  300. help=("Specify version(s). Comma-separated list. "
  301. "Otherwise all available versions are "
  302. "processed."))
  303. parser.add_argument("-u", "--user",
  304. type=_comma_separated_list,
  305. help=("Specify user(s). Comma-separated list. "
  306. "Otherwise all users are processed."))
  307. parser.add_argument('--detail',
  308. action='store_true',
  309. help=("If specified, statistics per project "
  310. "and version are output in addition to "
  311. "total statistics."))
  312. parser.add_argument("--include-no-activities",
  313. action='store_true',
  314. help=("If specified, stats for users with no "
  315. "activities are output as well."
  316. "By default, stats only for users with "
  317. "any activities are output."))
  318. parser.add_argument("-f", "--format",
  319. default='csv', choices=['csv', 'json'],
  320. help="Output file format.")
  321. parser.add_argument("--debug",
  322. action='store_true',
  323. help="Enable debug message.")
  324. parser.add_argument("user_yaml",
  325. help="YAML file of the user list")
  326. options = parser.parse_args()
  327. logging_level = logging.DEBUG if options.debug else logging.INFO
  328. formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
  329. handler = logging.StreamHandler()
  330. handler.setLevel(logging_level)
  331. handler.setFormatter(formatter)
  332. LOG.setLevel(logging_level)
  333. LOG.addHandler(handler)
  334. language_teams = LanguageTeam.load_from_language_team_yaml(
  335. options.user_yaml, options.lang)
  336. versions = [v.replace('/', '-') for v in options.target_version or []]
  337. users = get_zanata_stats(options.start_date, options.end_date,
  338. language_teams, options.project,
  339. versions, options.user)
  340. output_file = (options.output_file or
  341. 'zanata_stats_output.%s' % options.format)
  342. write_stats_to_file(users, output_file, options.format,
  343. options.include_no_activities,
  344. options.detail)
  345. if __name__ == '__main__':
  346. main()