Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # Usage: download_issues.py owner repo postgres_dsn github_token
- # simple script for downloading all issues from the given repository
- # requires you to set up an API token with no special permissions
- # also requires a PostgreSQL database with the following tables:
- """
- create table issues (
- issue_number int unique not null,
- id bigint unique not null,
- state text,
- title text,
- body text,
- reporting_login text,
- created_at timestamptz,
- updated_at timestamptz,
- assignee text,
- raw jsonb
- );
- create table comments (
- issue_number int references issues(issue_number),
- id bigint unique not null,
- commenting_login text,
- body text,
- updated_at timestamptz,
- raw jsonb
- );
- create index on comments(issue_number);
- """
- import json
- import requests
- import github3
- import psycopg2
- import sys
- if len(sys.argv) < 4:
- print "Usage:\n download_issues.py owner repo postgres_dsn github_token"
- sys.exit(1)
- repo_owner = sys.argv[1]
- repo = sys.argv[2]
- pg_dsn = sys.argv[3]
- api_token = sys.argv[4]
- # connect to database
- conn=psycopg2.connect(pg_dsn)
- cur=conn.cursor()
- # connect to github
- gh = github3.login(token=api_token)
- issue_count=1
- # loop through issues
- for issue_obj in gh.iter_repo_issues(repo_owner,repo,state='open'):
- # we're going to work with the raw json for most things
- # because the github3 lib leaves out several fields we need
- issue = issue_obj.to_json()
- # save to database
- if issue["assignee"]:
- asignee = issue["assignee"]["login"]
- try:
- cur.execute("""INSERT INTO issues
- VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )""",
- (issue["number"], issue["id"], issue["state"],
- issue["title"], issue["body"], issue["user"]["login"],
- issue["created_at"], issue["updated_at"], asignee,
- json.dumps(issue),))
- except Exception as e:
- print 'inserting issue failed:'
- print json.dumps(issue, sort_keys=True, indent=2)
- print str(e)
- sys.exit(3)
- # get comments
- for comment_obj in issue_obj.iter_comments():
- # again, get JSON to get the fields the object doesn't have
- comment = comment_obj.to_json()
- # save comments to database
- try:
- cur.execute("""INSERT INTO comments
- VALUES (%s, %s, %s, %s, %s, %s)""",
- (issue["number"],comment["id"],comment["user"]["login"],
- comment["body"],comment["created_at"], json.dumps(comment),))
- except:
- print 'inserting comment failed:'
- print json.dumps(comment, sort_keys=True, indent=2)
- sys.exit(3)
- conn.commit()
- issue_count += 1
- if ( issue_count % 10 ) == 0:
- print "{0} issues downloaded".format(issue_count)
- conn.close()
- print "DONE: {0} issues downloaded".format(issue_count)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement