-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodelGeneration.py
108 lines (83 loc) · 3.54 KB
/
modelGeneration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import csv
import os
import execnet
#Create files in format acceptable for OpenKE and save then in openKE/benchmarks/newsData
def createOpenkeFiles():
count = 0
entities2id = open('./openKE/benchmarks/newsData/entity2id.txt','w')
triples2id = open('./openKE/benchmarks/newsData/train2id.txt','w')
relation2id = open('./openKE/benchmarks/newsData/relation2id.txt','w')
type_constrain = open('./openKE/benchmarks/newsData/type_constrain.txt','w')
entities = []
relations = []
constrains = {}
n = 0
entities2id.write(" \n")
relation2id.write(" \n")
triples2id.write(" \n")
with open('openKE/benchmarks/newsData/textTriples.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in reader:
count += 1
if count < 28000:
print(count)
if row[1 - n] not in entities:
entities.append(row[1 - n])
entities2id.write(row[1 - n] + ' ' + str(entities.index(row[1 - n])) + '\n')
if row[3 - n] not in relations:
relations.append(row[3 - n])
relation2id.write(row[3 - n] + ' ' + str(relations.index(row[3 - n])) + '\n')
head = [row[1 - n]]
tail = [row[2 - n]]
constrains[str(row[3 - n])] = {'head':head, 'tail':tail}
else:
if row[1 - n] not in constrains[str(row[3 - n])]['head']:
constrains[str(row[3 - n])]['head'].append(row[1 - n])
if row[2 - n] not in constrains[str(row[3 - n])]['tail']:
constrains[str(row[3 - n])]['tail'].append(row[2 - n])
if row[2 - n] not in entities:
entities.append(row[2 - n])
entities2id.write(row[2 - n] + ' ' + str(entities.index(row[2 - n])) + '\n')
triples2id.write(str(entities.index(row[1 - n])) + ' ' + str(entities.index(row[2 - n])) + ' ' + str(relations.index(row[3 - n])) + '\n')
type_constrain.write(str(len(relations))+'\n')
for key in constrains:
# print(key)
type_constrain.write(str(relations.index(key))+'\t')
type_constrain.write(str(len(constrains[key]['head'])))
for i in range(len(constrains[key]['head'])):
type_constrain.write('\t'+str(entities.index(constrains[key]['head'][i])))
type_constrain.write('\n')
type_constrain.write(str(relations.index(key))+'\t')
type_constrain.write(str(len(constrains[key]['tail'])))
for i in range(len(constrains[key]['tail'])):
type_constrain.write('\t'+str(entities.index(constrains[key]['tail'][i])))
type_constrain.write('\n')
# print(constrains[key])
entities2id.seek(0)
entities2id.write(str(len(entities)))
relation2id.seek(0)
relation2id.write(str(len(relations)))
triples2id.seek(0)
triples2id.write(str(count))
entities2id.close()
triples2id.close()
relation2id.close()
type_constrain.close()
#Calling a different python version.
#Needed because OpenKE is in Python2 and this project is in Python3
def call_python_version(Version, Module, Function, ArgumentList):
gw = execnet.makegateway("popen//python=python%s" % Version)
channel = gw.remote_exec("""
from %s import % s as the_function
channel.send(the_function(*channel.receive()))
""" % (Module,Function))
channel.send(ArgumentList)
return channel.receive()
#Main model generation. Dummy variable needed as it is expected by call_python_function
#Calling OpenKE's 'example_train_transe.py' file which produces the embedding model
def generateModel(dummy):
createOpenkeFiles()
owd = os.getcwd()
os.chdir('openKE/')
call_python_version("2.7","example_train_transe","example", "d")
os.chdir(owd)