Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

created rest api folder #15

Merged
merged 11 commits into from
Apr 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Logs
logs
logfile
*.log
npm-debug.log*

Expand Down
Empty file added CREATE
Empty file.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
$ cd ht-archive
$ npm install
```
### Database

[Use Docker or command line](https://github.com/anidata/ht-archive/wiki) if you already PostgreSQL server running.

### Running

Expand Down
Binary file added rest_api/.DS_Store
Binary file not shown.
45 changes: 45 additions & 0 deletions rest_api/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from restful import db


class Backpagecontent(db.Model):
id = db.Column(db.Integer, primary_key=True)
backpagepostid = db.Column('backpagepostid', db.Integer, unique=True)
title = db.Column('title', db.String(120))
body = db.Column('body', db.Unicode)
textsearch = db.Column('textsearch', db.String(80))


class Backpageemail(db.Model):
backpagepostid = db.Column('backpagepostid', db.Integer, primary_key=True)
email = db.Column('name', db.String(30))


class Backpageentities(db.Model):
enitity_id = db.Column('enitity_id', db.Integer, primary_key=True)
backpagepostid = db.Column('backpagepostid', db.Integer)


class Backpagephone(db.Model):
backpagepostid = db.Column('backpagepostid', db.Integer, primary_key=True)
number = db.Column('number', db.String(20))


class Backpagepost(db.Model):
id = db.Column(db.Integer, primary_key=True)
pageid = db.Column('pageid', db.Integer, unique=True)
oid = db.Column('oid', db.Integer)
posterage = db.Column('posterage', db.SmallInteger)
postdate = db.Column('postdate', db.DateTime(timezone=True))
backpagesiteid = db.Column('backpagesiteid', db.Integer)


class Backpagesite(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column('name', db.String(120))


class Crawler(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column('name', db.String(32))
version = db.Column('version', db.VARCHAR)

Binary file added rest_api/models.pyc
Binary file not shown.
104 changes: 104 additions & 0 deletions rest_api/restful.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from flask import Flask, jsonify, request, render_template
from flask_sqlalchemy import SQLAlchemy
from healthcheck import HealthCheck, EnvironmentDump
from models import *


app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = "postgresql://localhost:5432/crawler"
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db = SQLAlchemy(app)


# wrap the flask app and give a heathcheck url
health = HealthCheck(app, "/healthcheck")

def health_database_status():
is_database_working = True
output = 'database is ok'

try:
session = db.session()
session.execute('SELECT * from backpageemail')
except Exception as e:
output = str(e)
is_database_working = False

print output
return is_database_working, output


health_database_status()


@app.route('/', methods=['GET'])
def test():
return jsonify({'message': 'It works!'})


@app.route('/api/backpage/content/<int:backpage_content_id>', methods=['GET'])
def get_content(backpage_content_id):
contents = (Backpagecontent.query.filter_by(id=backpage_content_id).all())

return jsonify({'data': [
dict(id=c.id, postId=c.backpagepostid, title=c.title)
for c in contents
]})


@app.route('/api/backpage/cities/', methods=['GET'])
def get_all_cities():
cities = (Backpagesite.query.all())
# In case we want only names without id's
citynames = [c.name for c in cities]

return jsonify({'data': [
dict(id=c.id, city=c.name)
for c in cities
]})


@app.route('/api/backpage/phone/', methods=['GET'])
def get_all_numbers():
numbers = (Backpagephone.query.all())

return jsonify({'data': [
dict(backpagepostid=n.backpagepostid, number=n.number)
for n in numbers
]})


@app.route('/api/backpage/phone/<int:backpagepost_id>', methods=['GET'])
def get_number(backpagepost_id):
numbers = (Backpagephone.query.filter_by(backpagepostid=backpagepost_id).all())

return jsonify({'numbers': [n.number for n in numbers]})


@app.route('/api/backpage/phone/<string:number>', methods=['GET'])
def getid_from_number(number):
ids = (Backpagephone.query.filter_by(number=number).all())

return jsonify({'backpagepost_ids': [i.backpagepostid for i in ids]})


@app.route('/api/backpage/email/<int:backpagepost_id>', methods=['GET'])
def get_email(backpagepost_id):
emails = (Backpageemail.query.filter_by(backpagepostid=backpagepost_id).all())

return jsonify({'Emails': [i.email for i in emails]})


@app.route('/api/backpage/email/<string:email>', methods=['GET'])
def getid_from_mail(email):
ids = (Backpageemail.query.filter_by(email=email).all())

return jsonify({'backpagepost_ids': [i.backpagepostid for i in ids]})





if __name__ == "__main__":
app.run(debug=True, port = 8000)
Binary file added rest_api/restful.pyc
Binary file not shown.
Binary file added rest_api/templates/.DS_Store
Binary file not shown.
8 changes: 8 additions & 0 deletions rest_api/templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html>
<head>
<title>Database API</title>
</head>
<body>

</body>
</html>
51 changes: 41 additions & 10 deletions routes/queries.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ function QueryHandler(client) {
"emails": []
};


result.rows.map(function(row) {
if(row["name"] && data["emails"].indexOf(row["name"]) == -1) {
data["emails"].push(row["name"]);
Expand All @@ -51,8 +52,16 @@ function QueryHandler(client) {
});
});
}

var detail_q = "SELECT backpageentities.entity_id::integer AS id, ";
detail_q += "ARRAY_TO_STRING(ARRAY_AGG(DISTINCT backpagephone.number), ', ') phone, ";
detail_q += "ARRAY_TO_STRING(ARRAY_AGG(DISTINCT backpageemail.name), ', ') email, ";
detail_q += "COUNT(DISTINCT backpageentities.backpagepostid) postCount ";
detail_q += "FROM backpageentities ";
detail_q += "LEFT JOIN backpageemail ON backpageentities.backpagepostid::integer = backpageemail.backpagepostid::integer ";
detail_q += "LEFT JOIN backpagephone ON backpageentities.backpagepostid::integer = backpagephone.backpagepostid::integer ";

var search_q = "SELECT backpageentities.entity_id::integer AS id, ARRAY_TO_STRING(ARRAY_AGG(DISTINCT backpagephone.number), ', ') as phone, ARRAY_TO_STRING(ARRAY_AGG(DISTINCT backpageemail.name), ', ') as email, COUNT(DISTINCT backpageentities.backpagepostid) postCount FROM backpageentities";
var search_q = "SELECT DISTINCT backpageentities.entity_id::integer AS id FROM backpageentities"
search_q += " LEFT JOIN backpageemail ON backpageentities.backpagepostid::integer = backpageemail.backpagepostid";
search_q += " LEFT JOIN backpagephone ON backpageentities.backpagepostid::integer = backpagephone.backpagepostid";
search_q += " WHERE ";
Expand All @@ -61,6 +70,7 @@ function QueryHandler(client) {
var emails = [];
var phones = [];
var q = search_q;

if(req.query["email"] && req.query["email"].constructor === Array) {
emails = req.query["emais"];
} else if(req.query["email"]) {;
Expand All @@ -71,26 +81,47 @@ function QueryHandler(client) {
} else if(req.query["phone"]) {
phones.push(req.query["phone"]);
}
console.log(phones);
console.log("match: " + (phones[0] == '678-680-9278'));

q += 'backpageemail.name = ANY(\'{"' + emails.join('","') + '"}\'::text[]) OR backpagephone.number = ANY(\'{"' + phones.join('","') + '"}\'::text[])'
q += " GROUP BY backpageentities.entity_id;"

client.query(q, function(err, result) {
console.log(q);
if(err) {
console.error("error running query", err);
return res.status(500).json({
"message": err
});
}

var data = { "entities": [] };
console.log(result.rows);
result.rows.map(function(row) {
data["entities"].push(row);
var data = {entities: []};

var ids = result.rows.map(function(row) {
return row.id;
});

return res.status(200).json(data);
if (ids.length !== 0) {
detail_q += "WHERE backpageentities.entity_id = ";
detail_q += 'ANY(\'{"' + ids.join('","') + '"}\') ';
detail_q += "GROUP BY backpageentities.entity_id;"

client.query(detail_q, function(err, details) {

if(err) {
console.error("error running query", err);
return res.status(500).json({
"message": err
});
}

data['entities'] = details.rows;

return res.status(200).json(data);

});
}
else {
// Return an empty list
return res.status(200).json(data);
};
});
}
}
Expand Down