Advertisement
darkmist

export_repo_issues_to_csv.py

Jul 20th, 2017
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.17 KB | None | 0 0
  1. """
  2. This is strongly based on https://gist.github.com/unbracketed/3380407;
  3. thanks to @unbracketed and the various commenters on the page.
  4.  
  5. I've mainly cleaned up the code into basic methods, and included the
  6. various suggestions in the comments. Hope this is useful to someone.
  7.  
  8. Make sure you have `requests` and `csv` installed via pip then run it:
  9. `python export_gh_issues_to_csv.py`
  10.  
  11. ---
  12.  
  13. Exports Issues from a specified repository to a CSV file
  14. Uses basic authentication (Github username + password) or token to retrieve Issues
  15. from a repository that username has access to. Supports Github API v3.
  16. """
  17. import csv
  18. import requests
  19.  
  20. GITHUB_USER = ''
  21. GITHUB_PASSWORD = ''
  22. GITHUB_TOKEN = ''
  23. REPO = ''  # format is username/repo
  24. ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO
  25.  
  26. # Update your filter here.  See https://developer.github.com/v3/issues/#list-issues-for-a-repository
  27. # Note that filtering is powerful and there are lots of things available. Also that issues and PRs
  28. # arrive in the same results set
  29. params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }
  30.  
  31. def write_issues(response, csvout):
  32.     "output a list of issues to csv"
  33.     print "  : Writing %s issues" % len(response.json())
  34.     for issue in response.json():
  35.         labels = issue['labels']
  36.         label_string = ''
  37.         for label in labels:
  38.             label_string = "%s, %s" % (label_string, label['name'])
  39.         label_string = label_string[2:]
  40.  
  41.         csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'), issue['created_at'], issue['updated_at']])
  42.  
  43.  
  44. def get_issues(url):
  45.     kwargs = {
  46.         'headers': {
  47.             'Content-Type': 'application/vnd.github.v3.raw+json',
  48.             'User-Agent': 'GitHub issue exporter'
  49.         },
  50.         'params': params_payload
  51.     }
  52.     if GITHUB_TOKEN != '':
  53.         kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
  54.     else:
  55.         kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)
  56.  
  57.     print "GET %s" % url
  58.     resp = requests.get(url, **kwargs)
  59.     print "  : => %s" % resp.status_code
  60.  
  61.     # import ipdb; ipdb.set_trace()
  62.     if resp.status_code != 200:
  63.         raise Exception(resp.status_code)
  64.  
  65.     return resp
  66.  
  67.  
  68. def next_page(response):
  69.     #more pages? examine the 'link' header returned
  70.     if 'link' in response.headers:
  71.         pages = dict(
  72.             [(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
  73.                 [link.split(';') for link in
  74.                     response.headers['link'].split(',')]])
  75.         # import ipdb; ipdb.set_trace()
  76.         if 'last' in pages and 'next' in pages:
  77.             return pages['next']
  78.  
  79.     return None
  80.  
  81.  
  82. def process(csvout, url=ISSUES_FOR_REPO_URL):
  83.     resp = get_issues(url)
  84.     write_issues(resp, csvout)
  85.     next_ = next_page(resp)
  86.     if next_ is not None:
  87.         process(csvout, next_)
  88.  
  89.  
  90. def main():
  91.     csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
  92.     csvout = csv.writer(open(csvfile, 'wb'))
  93.     csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
  94.     process(csvout)
  95.     csvfile.close()
  96.  
  97.  
  98.  
  99. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement