Skip to content

Commit

Permalink
scrape.js: Use GraphQL API
Browse files Browse the repository at this point in the history
Use GitHub GraphQL API instead of REST API

Closes #111
  • Loading branch information
li-boxuan committed Oct 29, 2018
1 parent 2ea74bc commit 92c25b3
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 23 deletions.
9 changes: 9 additions & 0 deletions lib/queries/github_search_org.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
query($query: String!) {
search(type:USER, query:$query, first:1) {
nodes {
...on Organization {
login
}
}
}
}
6 changes: 6 additions & 0 deletions lib/queries/github_user_info.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
query ($user: String!) {
user(login: $user) {
login
updatedAt
}
}
2 changes: 2 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const { loadQuery } = require('../utils')

module.exports.GITHUB_REPO_INFO_QUERY = loadQuery('github_repo_info')
module.exports.GITHUB_SEARCH_ORG_QUERY = loadQuery('github_search_org')
module.exports.GITHUB_USER_INFO_QUERY = loadQuery('github_user_info')
41 changes: 18 additions & 23 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ const validUsername = require('valid-github-username')
const wdk = require('wikidata-sdk')
const cheerio = require('cheerio')

const { GITHUB_REPO_INFO_QUERY } = require('./queries')
const {
GITHUB_REPO_INFO_QUERY,
GITHUB_SEARCH_ORG_QUERY,
GITHUB_USER_INFO_QUERY,
} = require('./queries')
const { getLatestCommitMessage } = require('./utils')

const GH_BASE = 'https://github.com'
const GH_USER_BASE = `${GH_BASE}/users`
const GH_ORG_BASE = `${GH_BASE}/orgs`
const GH_API_BASE = 'https://api.github.com'
const GH_GQL_BASE = 'https://api.github.com/graphql'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

const MIN_SEARCH_SCORE = 10

// The time to cache GitHub usernames for in milliseconds
const GITHUB_CACHE_TIME = 2 * 24 * 60 * 60 * 1000

Expand Down Expand Up @@ -53,12 +54,6 @@ const CHAT_IMAGES = {
OTHER: 'images/chat.png',
}

const GH_API_OPTIONS = {
headers: process.env.GITHUB_TOKEN
? { Authorization: `token ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_GQL_OPTIONS = {
url: GH_GQL_BASE,
headers: process.env.GITHUB_TOKEN
Expand Down Expand Up @@ -209,12 +204,12 @@ async function checkGitHubUserExists(user) {
}

async function searchGitHubOrgs(query) {
const res = await fetch(
`${GH_API_BASE}/search/users?q=${query}%20type:org`,
GH_API_OPTIONS
)
const { items } = await res.json()
return items || []
const res = await client.query(GITHUB_SEARCH_ORG_QUERY, { query })
if (res && res.data) {
return res.data.search.nodes
} else {
return []
}
}

async function getGitHubUserHistory(user, from, to) {
Expand Down Expand Up @@ -262,12 +257,12 @@ function findMatches(input, pattern) {
}

async function getGitHubUser(user) {
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
const res = await client.query(GITHUB_USER_INFO_QUERY, { user })
if (res && res.data.user) {
return res.data.user
} else {
return undefined
}
return response
}

async function findOrganization({
Expand Down Expand Up @@ -307,10 +302,10 @@ async function findOrganization({
)

const removePattern = /the|project|\([a-zA-Z]+\)/gi
const searchQuery = name.replace(removePattern, '').trim()
const searchQuery = name.replace(removePattern, '').trim() + ' type:org'
const searchResults = await searchGitHubOrgs(searchQuery)

if (searchResults.length > 0 && searchResults[0].score > MIN_SEARCH_SCORE) {
if (searchResults.length > 0) {
return searchResults[0].login
}

Expand Down

0 comments on commit 92c25b3

Please sign in to comment.