Skip to content

Commit

Permalink
scrape.js: Use GraphQL API
Browse files Browse the repository at this point in the history
Use GitHub GraphQL API instead of REST API

Closes #111
  • Loading branch information
li-boxuan committed Oct 29, 2018
1 parent 2ea74bc commit 8da2685
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 10 deletions.
9 changes: 9 additions & 0 deletions lib/queries/github_search_org.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
query($query: String!) {
search(type: USER, query: $query, first: 1) {
nodes {
...on Organization {
login
}
}
}
}
6 changes: 6 additions & 0 deletions lib/queries/github_user_info.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
query($user: String!) {
user(login: $user) {
login
updatedAt
}
}
2 changes: 2 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const { loadQuery } = require('../utils')

module.exports.GITHUB_REPO_INFO_QUERY = loadQuery('github_repo_info')
module.exports.GITHUB_SEARCH_ORG_QUERY = loadQuery('github_search_org')
module.exports.GITHUB_USER_INFO_QUERY = loadQuery('github_user_info')
77 changes: 67 additions & 10 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ const validUsername = require('valid-github-username')
const wdk = require('wikidata-sdk')
const cheerio = require('cheerio')

const { GITHUB_REPO_INFO_QUERY } = require('./queries')
const {
GITHUB_REPO_INFO_QUERY,
GITHUB_SEARCH_ORG_QUERY,
GITHUB_USER_INFO_QUERY,
} = require('./queries')
const { getLatestCommitMessage } = require('./utils')

const GH_BASE = 'https://github.com'
Expand Down Expand Up @@ -137,6 +141,7 @@ async function fetchRepositoryInfo(org) {
;({ data, errors } = await client.query(GITHUB_REPO_INFO_QUERY, { org }))
} catch (error) {
console.warn(`GitHub query for org ${org} fails, error: ${error}`)
return []
}

if (data && data.organization) {
Expand Down Expand Up @@ -209,12 +214,34 @@ async function checkGitHubUserExists(user) {
}

async function searchGitHubOrgs(query) {
let results = []
// use REST API to fetch
const res = await fetch(
`${GH_API_BASE}/search/users?q=${query}%20type:org`,
`${GH_API_BASE}/search/users?q=${query}`,
GH_API_OPTIONS
)
console.log('query=', `${GH_API_BASE}/search/users?q=${query}`)
const { items } = await res.json()
return items || []
results = results.concat(items)
// use GraphQL API to fetch
let data, errors
try {
;({ data, errors } = await client.query(GITHUB_SEARCH_ORG_QUERY, { query }))
} catch (error) {
console.warn(`GitHub query ${query} fails, error: ${error}`)
}

if (data && data.search && data.search.nodes) {
results = results.concat(data.search.nodes)
} else {
const errorMessage =
errors && errors.length ? errors[0].message : 'unknown error'
console.warn(
`Cannot query ${query} from GitHub, error message: ${errorMessage}`
)
}
console.log('results', results)
return unique(results)
}

async function getGitHubUserHistory(user, from, to) {
Expand Down Expand Up @@ -262,12 +289,34 @@ function findMatches(input, pattern) {
}

async function getGitHubUser(user) {
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
let data, errors
try {
;({ data, errors } = await client.query(GITHUB_USER_INFO_QUERY, { user }))
} catch (error) {
console.warn(`GitHub query for user ${user} fails, error: ${error}`)
}

if (data && data.user) {
return data.user
} else {
const errorMessage =
errors && errors.length ? errors[0].message : 'unknown error'
console.warn(
`Cannot fetch user ${user} via GitHub GraphQL,`,
`error message: ${errorMessage}, resorting to GitHub REST API hit`
)
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
const response = await res.json()
if (response && response.message) {
console.warn(
`Cannot fetch user ${user} via GitHub REST API,`,
`error message: ${response.message}`
)
return undefined
} else {
return response
}
}
return response
}

async function findOrganization({
Expand Down Expand Up @@ -307,10 +356,14 @@ async function findOrganization({
)

const removePattern = /the|project|\([a-zA-Z]+\)/gi
const searchQuery = name.replace(removePattern, '').trim()
const searchQuery = name.replace(removePattern, '').trim() + ' type:org'
const searchResults = await searchGitHubOrgs(searchQuery)

if (searchResults.length > 0 && searchResults[0].score > MIN_SEARCH_SCORE) {
if (searchResults.length > 0) {
if (searchResults[0].score && searchResults[0].score <= MIN_SEARCH_SCORE) {
// GitHub REST API returns a list of matches with confidence score
return null
}
return searchResults[0].login
}

Expand Down Expand Up @@ -658,6 +711,10 @@ async function fetchDates() {
return res.json()
}

function unique(arr) {
return Array.from(new Set(arr))
}

;(async () => {
const { competition_open_starts } = await fetchProgram()
COMPETITION_OPEN = new Date(competition_open_starts)
Expand Down

0 comments on commit 8da2685

Please sign in to comment.