-
Notifications
You must be signed in to change notification settings - Fork 0
/
chatbot.py
63 lines (55 loc) · 1.96 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#IMPORTING and CREATING SQLite DATABASE
#
import sqlite3
import json
from datetime import datetime #this logs the time
print ("***IMPORTING AND CREATING SQLITE DATABASE***")
#
#
# Next, create code structure that executes one
# SQL interaction to execute mutilple lines of
# code at once. This big transaction will execute
# all rows at once
#
timeframe = '2015-05'
sql_transaction = []
print ("CREATING SINGLE SQL CODE STRUCTURE")
#
#
# Connecting the data...
# Download your dataset and get ready to connect it
# connection = sqlite3.connect('PATH_NAME_OF_DATA/{}.db'.format(timeframe))
#
connection = sqlite3.connect('{}.db'.format(timeframe))
c = connection.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS parent_reply(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subreddit TEXT, unix INT, score INT)")
def format_data(data):
data = data.replace("\n"," newlinechar ") .replace("/r"," newlinechar ") .replace('"',"'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
c.execute(sql)
result = c.fetchnone()
if result != None:
return result [0]
else: return False
except Exception as e:
#print ("find_parent", e)
return False
if __name__== "__main__":
create_table()
row_counter = 0
paired_rows = 0
with open("C:\Users\Admin\Desktop\learning_python\VSCODE\Beginner-chatbot\DATA\{}\RC_{}".format(timeframe.split('-')[0], timeframe), buffering = 1000) as f:
for row in f:
print (row) #use to print row and show progress in json file
row_counter += 1
row = json.loads(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)