-
Notifications
You must be signed in to change notification settings - Fork 0
/
gen_dataframe.py
116 lines (95 loc) · 3.92 KB
/
gen_dataframe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""Helper functions to generate desired dataframe."""
import pandas as pd
import sqlalchemy as sql
from sqlalchemy import func
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class DonationData(Base):
"""Class for the donation data table."""
__tablename__ = 'donation_data'
Index = sql.Column(sql.Integer, primary_key=True)
Country_of_Giftor = sql.Column(sql.String)
Institution_name = sql.Column(sql.String)
ID = sql.Column(sql.Integer)
OPEID = sql.Column(sql.Integer)
City = sql.Column(sql.String)
State = sql.Column(sql.String)
Foreign_Gift_Received_Date = sql.Column(sql.String)
Foreign_Gift_Amount = sql.Column(sql.Integer)
Gift_Type = sql.Column(sql.String)
Giftor_Name = sql.Column(sql.String)
Country_to_merge_on = sql.Column(sql.String)
Rank = sql.Column(sql.Float)
Score = sql.Column(sql.Float)
Electoral_process_and_pluralism = sql.Column(sql.Float)
Functioning_of_government = sql.Column(sql.Float)
Political_participation = sql.Column(sql.Float)
Political_culture = sql.Column(sql.Float)
Civil_liberties = sql.Column(sql.Float)
Regime_type = sql.Column(sql.String)
Region = sql.Column(sql.String)
Country_Latitude = sql.Column(sql.Float)
Country_Longitude = sql.Column(sql.Float)
School_Latitude = sql.Column(sql.Float)
SChool_Longitude = sql.Column(sql.Float)
def generate_sql_connection(db_path):
"""Connect to database."""
sql_connection = sql.create_engine(db_path)
session_gen = sql.orm.sessionmaker(bind=sql_connection)
sql_session = session_gen()
return sql_connection, sql_session
def get_donations_df(db_path):
"""Generate df for all donations received grouped by schools."""
sql_connection, sql_session = generate_sql_connection(db_path)
overall_donations = (sql_session.query(
DonationData.Institution_name.label('School'),
func.sum(DonationData.Foreign_Gift_Amount).label('Amount')
)
.group_by(DonationData.Institution_name)
.order_by(sql.desc('Amount'))
)
df = pd.read_sql(overall_donations.statement, sql_connection)
sql_session.close()
sql_connection.dispose()
return df
def get_donationsTS_df(db_path, school):
"""Generate time series df of donations trends for the desired school."""
sql_connection, sql_session = generate_sql_connection(db_path)
donationTS = (sql_session.query(
DonationData.Institution_name.label('School'),
DonationData.Foreign_Gift_Received_Date.label('Date'),
DonationData.Foreign_Gift_Amount.label('Amount'),
DonationData.Country_of_Giftor.label('Donor Country'),
DonationData.Giftor_Name.label('Donor'),
).filter(DonationData.Institution_name == school)
.order_by('School')
)
df = pd.read_sql(
donationTS.statement,
sql_connection,
parse_dates=['Date']
).fillna('Unknown')
sql_session.close()
sql_connection.dispose()
return df
def get_donationsMAP_df(db_path, school):
"""Generate time series df of donations trends for the desired school."""
sql_connection, sql_session = generate_sql_connection(db_path)
donationTS = (sql_session.query(
DonationData.Institution_name.label('School'),
DonationData.Foreign_Gift_Received_Date.label('Date'),
DonationData.Foreign_Gift_Amount.label('Amount'),
DonationData.Country_of_Giftor.label('Donor Country'),
DonationData.Country_Latitude.label('Latitude'),
DonationData.Country_Longitude.label('Longitude'),
DonationData.Score.label('Score')
).filter(DonationData.Institution_name == school)
)
df = pd.read_sql(
donationTS.statement,
sql_connection,
parse_dates=['Date']
)
sql_session.close()
sql_connection.dispose()
return df