69 lines
1.9 KiB
Python
69 lines
1.9 KiB
Python
__author__ = 'tsouza'
|
|
import datetime
|
|
import requests
|
|
from pprint import pprint
|
|
|
|
HN_URL = "http://hn.algolia.com/api/v1/search_by_date"
|
|
|
|
|
|
def top_of_the_hour(dt):
|
|
"""
|
|
Get the epoch time at the top of the current hour.
|
|
Do this so that I'll run at :01 past the hour to be safe.
|
|
"""
|
|
dt = datetime.datetime(dt.year, dt.month, dt.day, dt.hour, 0)
|
|
epoch = datetime.datetime.utcfromtimestamp(0)
|
|
delta = dt - epoch
|
|
return int(delta.total_seconds())
|
|
|
|
|
|
def get_page(start_time, end_time, page=0):
|
|
"""
|
|
Get the data from hackernews a page from start to end times.
|
|
"""
|
|
params = {
|
|
'tags': 'story',
|
|
'numericFilters': 'created_at_i>{0},created_at_i<{1}'.format(start_time, end_time),
|
|
'page': page,
|
|
'hitsPerPage': 50
|
|
}
|
|
body = requests.get(HN_URL, params=params).json()
|
|
return body
|
|
|
|
|
|
def get_githubs(end_time, start_time):
|
|
"""
|
|
Get all the github links for the time range.
|
|
"""
|
|
# get the first page
|
|
page = get_page(start_time, end_time)
|
|
hits = page['hits']
|
|
# if more than one page, get the rest of them
|
|
if page['nbPages'] > 1:
|
|
for page_number in xrange(1, page['nbPages']):
|
|
page = get_page(start_time, end_time, page_number)
|
|
hits.extend(page['hits'])
|
|
|
|
# Strip out all non github links.
|
|
githubs = [hit for hit in hits if "github.com" in hit['url']]
|
|
return githubs
|
|
|
|
|
|
def strip_fields(github):
|
|
"""
|
|
Get rid of the unneeded fields.
|
|
"""
|
|
return dict(
|
|
source="hackernews",
|
|
author=github['author'],
|
|
created_at=github['created_at_i'],
|
|
url=github['url'],
|
|
title=github['title']
|
|
)
|
|
|
|
|
|
def hacker_news(hours=1):
|
|
end_time = top_of_the_hour(datetime.datetime.now())
|
|
start_time = end_time - ((60 * 60) * hours)
|
|
return [strip_fields(github) for github in get_githubs(end_time, start_time)]
|