__author__ = 'tsouza' import datetime import requests from pprint import pprint HN_URL = "http://hn.algolia.com/api/v1/search_by_date" def top_of_the_hour(dt): """ Get the epoch time at the top of the current hour. Do this so that I'll run at :01 past the hour to be safe. """ dt = datetime.datetime(dt.year, dt.month, dt.day, dt.hour, 0) epoch = datetime.datetime.utcfromtimestamp(0) delta = dt - epoch return int(delta.total_seconds()) def get_page(start_time, end_time, page=0): """ Get the data from hackernews a page from start to end times. """ params = { 'tags': 'story', 'numericFilters': 'created_at_i>{0},created_at_i<{1}'.format(start_time, end_time), 'page': page, 'hitsPerPage': 50 } body = requests.get(HN_URL, params=params).json() return body def get_githubs(end_time, start_time): """ Get all the github links for the time range. """ # get the first page page = get_page(start_time, end_time) hits = page['hits'] # if more than one page, get the rest of them if page['nbPages'] > 1: for page_number in xrange(1, page['nbPages']): page = get_page(start_time, end_time, page_number) hits.extend(page['hits']) # Strip out all non github links. githubs = [hit for hit in hits if "github.com" in hit['url']] return githubs def strip_fields(github): """ Get rid of the unneeded fields. """ return dict( source="hackernews", author=github['author'], created_at=github['created_at_i'], url=github['url'], title=github['title'] ) def hacker_news(hours=1): end_time = top_of_the_hour(datetime.datetime.now()) start_time = end_time - ((60 * 60) * hours) return [strip_fields(github) for github in get_githubs(end_time, start_time)]