-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07_corr_analysis.py
56 lines (46 loc) · 2.06 KB
/
07_corr_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
"""
-----------------------------------------------
# File: 07_corr_analysis.py
# This file is created by Chuanting Zhang
# Email: chuanting.zhang@kaust.edu.sa
# Date: 2021-03-21 (YYYY-MM-DD)
-----------------------------------------------
"""
import pandas as pd
import geopandas as gpd
import os
import numpy as np
import mobile_codes
import itertools
def main():
folder = 'data/division/'
final_results = []
gsma_file = 'data/MCI_Data_2020.xls'
gsma_data = pd.read_excel(gsma_file, skiprows=2, sheet_name=2)
df_gsma = gsma_data.loc[gsma_data['Year'] == 2019]
df_gsma = df_gsma[['Country', 'ISO Code', 'Index']]
df_gsma.columns = ['Country', 'ISO Code', 'GSMA_index']
inclusive_file = 'data/Inclusive_index.xls'
inclusive_data = pd.read_excel(inclusive_file)
inclusive_data.columns = ['Country', 'Inclusive_index']
# find out countries in both index
common_contry = pd.merge(left=inclusive_data, right=df_gsma, on='Country', how='left')
common_contry.dropna(inplace=True)
final_index = pd.read_csv(folder + 'final_index.csv')
print(common_contry.head(10))
for i, name in enumerate(common_contry['ISO Code']):
whole_name = mobile_codes.alpha3(name).name
print(i, name, whole_name)
gsma_index = common_contry.loc[common_contry['ISO Code'] == name, 'GSMA_index'].values[0]
inclusive_index = common_contry.loc[common_contry['ISO Code'] == name, 'Inclusive_index'].values[0]
our_index = final_index.loc[final_index['ISO Code'] == name, 'index'].values[0]
final_results.append((whole_name, gsma_index, inclusive_index, our_index))
header = ['Country', 'GSMA_Index', 'Inclusive_Index', 'Inequality_Index']
exclude_country = ['Latvia', 'Lithuania', 'Kuwait',
'Bahrain', 'Iran, Islamic Republic of', 'Myanmar']
df = pd.DataFrame(final_results, columns=header)
df.drop(df[df['Country'].isin(exclude_country)].index, inplace=True)
df.to_csv(folder + 'common_country_index.csv', index=False)
if __name__ == '__main__':
main()