-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConverter.py
68 lines (53 loc) · 1.84 KB
/
Converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import codecs
import pandas as pd
import numpy as np
class ConverterBase(object):
"""
A base class for generating processed datasets.
"""
def __init__(self):
self.col = None
self.col_to_use = None
def getData(self, filepath):
# 変換するファイルを開く
with codecs.open(filepath, "r", "Shift-JIS", "ignore") as file:
df = pd.read_table(file, delimiter=",")
# 不要な列を削除し、カラム名を指定
df = df.ix[8:, :len(self.col)]
df.columns = self.col
df = df[self.col_to_use]
# 不要な行を削除
df = df.replace('*', np.nan)
df = df.replace('−', np.nan)
df = df[df.Label != '景気の現状判断']
df = df[df.Label != '景気の先行き判断']
df = df.dropna()
# 景気判断理由文頭の・を削除
df.Comment = list(self.getComment(df.Comment))
return df
def getComment(self, comment):
# 景気判断理由文頭の・を削除するための関数
for c in comment:
yield c[1:]
class ConverterGenjou(ConverterBase):
"""
A class for Genjou
"""
def __init__(self):
ConverterBase.__init__(self)
self.col = ['Fields', 'TokyoFlag', 'Label', 'Job', 'Reason', 'Comment']
self.col_to_use = ['Label', 'Job', 'Reason', 'Comment']
class ConverterSakiyuki(ConverterBase):
"""
A class for Sakiyuki
"""
def __init__(self):
ConverterBase.__init__(self)
self.col = ['Fields', 'TokyoFlag', 'Label', 'Job', 'Comment']
self.col_to_use = ['Label', 'Job', 'Comment']
if __name__ == '__main__':
# test
cg = ConverterGenjou('CSVFiles/201711watcher4.csv')
print(cg.getData())
cs = ConverterSakiyuki('CSVFiles/201711watcher5.csv')
print(cs.getData())