initial commit
This commit is contained in:
commit
276880d6fd
110
.gitignore
vendored
Normal file
110
.gitignore
vendored
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
# Created by https://www.gitignore.io
|
||||||
|
|
||||||
|
### Python ###
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Created by https://www.gitignore.io
|
||||||
|
|
||||||
|
### PyCharm ###
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm
|
||||||
|
|
||||||
|
*.iml
|
||||||
|
|
||||||
|
## Directory-based project format:
|
||||||
|
.idea/
|
||||||
|
# if you remove the above rule, at least ignore the following:
|
||||||
|
|
||||||
|
# User-specific stuff:
|
||||||
|
# .idea/workspace.xml
|
||||||
|
# .idea/tasks.xml
|
||||||
|
# .idea/dictionaries
|
||||||
|
|
||||||
|
# Sensitive or high-churn files:
|
||||||
|
# .idea/dataSources.ids
|
||||||
|
# .idea/dataSources.xml
|
||||||
|
# .idea/sqlDataSources.xml
|
||||||
|
# .idea/dynamic.xml
|
||||||
|
# .idea/uiDesigner.xml
|
||||||
|
|
||||||
|
# Gradle:
|
||||||
|
# .idea/gradle.xml
|
||||||
|
# .idea/libraries
|
||||||
|
|
||||||
|
# Mongo Explorer plugin:
|
||||||
|
# .idea/mongoSettings.xml
|
||||||
|
|
||||||
|
## File-based project format:
|
||||||
|
*.ipr
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
## Plugin-specific files:
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
|
||||||
|
|
||||||
|
*.sqlite3
|
20
gather_latest.py
Normal file
20
gather_latest.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from get_new_hn import hacker_news
|
||||||
|
from pymongo import MongoClient
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def get_new_hackernews():
|
||||||
|
client = MongoClient()
|
||||||
|
hn_data = hacker_news()
|
||||||
|
hn_db = client.githubs.hacker_news
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
for hn in hn_data:
|
||||||
|
if not hn_db.find_one({'url': hn['url']}):
|
||||||
|
hn_db.insert(hn)
|
||||||
|
inserted += 1
|
||||||
|
print datetime.datetime.now(), "Inserted", inserted
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
get_new_hackernews()
|
68
get_new_hn.py
Normal file
68
get_new_hn.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
__author__ = 'tsouza'
|
||||||
|
import datetime
|
||||||
|
import requests
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
HN_URL = "http://hn.algolia.com/api/v1/search_by_date"
|
||||||
|
|
||||||
|
|
||||||
|
def top_of_the_hour(dt):
|
||||||
|
"""
|
||||||
|
Get the epoch time at the top of the current hour.
|
||||||
|
Do this so that I'll run at :01 past the hour to be safe.
|
||||||
|
"""
|
||||||
|
dt = datetime.datetime(dt.year, dt.month, dt.day, dt.hour, 0)
|
||||||
|
epoch = datetime.datetime.utcfromtimestamp(0)
|
||||||
|
delta = dt - epoch
|
||||||
|
return int(delta.total_seconds())
|
||||||
|
|
||||||
|
|
||||||
|
def get_page(start_time, end_time, page=0):
|
||||||
|
"""
|
||||||
|
Get the data from hackernews a page from start to end times.
|
||||||
|
"""
|
||||||
|
params = {
|
||||||
|
'tags': 'story',
|
||||||
|
'numericFilters': 'created_at_i>{0},created_at_i<{1}'.format(start_time, end_time),
|
||||||
|
'page': page,
|
||||||
|
'hitsPerPage': 50
|
||||||
|
}
|
||||||
|
body = requests.get(HN_URL, params=params).json()
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
def get_githubs(end_time, start_time):
|
||||||
|
"""
|
||||||
|
Get all the github links for the time range.
|
||||||
|
"""
|
||||||
|
# get the first page
|
||||||
|
page = get_page(start_time, end_time)
|
||||||
|
hits = page['hits']
|
||||||
|
# if more than one page, get the rest of them
|
||||||
|
if page['nbPages'] > 1:
|
||||||
|
for page_number in xrange(1, page['nbPages']):
|
||||||
|
page = get_page(start_time, end_time, page_number)
|
||||||
|
hits.extend(page['hits'])
|
||||||
|
|
||||||
|
# Strip out all non github links.
|
||||||
|
githubs = [hit for hit in hits if "github.com" in hit['url']]
|
||||||
|
return githubs
|
||||||
|
|
||||||
|
|
||||||
|
def strip_fields(github):
|
||||||
|
"""
|
||||||
|
Get rid of the unneeded fields.
|
||||||
|
"""
|
||||||
|
return dict(
|
||||||
|
source="hackernews",
|
||||||
|
author=github['author'],
|
||||||
|
created_at=github['created_at_i'],
|
||||||
|
url=github['url'],
|
||||||
|
title=github['title']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def hacker_news(hours=1):
|
||||||
|
end_time = top_of_the_hour(datetime.datetime.now())
|
||||||
|
start_time = end_time - ((60 * 60) * hours)
|
||||||
|
return [strip_fields(github) for github in get_githubs(end_time, start_time)]
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
requests
|
||||||
|
pymongo
|
Loading…
Reference in New Issue
Block a user