Skip to content

Commit 5d21abd

Browse files
committed
Removed requirements.txt
Added Poetry
1 parent 697020f commit 5d21abd

6 files changed

+4061
-309
lines changed

.python-version

-1
This file was deleted.

docs/index.html

+1-1
Large diffs are not rendered by default.

extract_graph.ipynb

+52-75
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 19,
12+
"execution_count": 1,
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
@@ -39,14 +39,14 @@
3939
},
4040
{
4141
"cell_type": "code",
42-
"execution_count": 20,
42+
"execution_count": 2,
4343
"metadata": {},
4444
"outputs": [
4545
{
4646
"name": "stderr",
4747
"output_type": "stream",
4848
"text": [
49-
"100%|██████████| 1/1 [00:00<00:00, 17.20it/s]"
49+
"100%|██████████| 1/1 [00:01<00:00, 1.82s/it]"
5050
]
5151
},
5252
{
@@ -94,7 +94,7 @@
9494
},
9595
{
9696
"cell_type": "code",
97-
"execution_count": 21,
97+
"execution_count": 3,
9898
"metadata": {},
9999
"outputs": [
100100
{
@@ -135,31 +135,31 @@
135135
" <th>0</th>\n",
136136
" <td>Abstract India’s health indicators have improv...</td>\n",
137137
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
138-
" <td>d6c50af2f56d482db830d502516192d6</td>\n",
138+
" <td>0f56d8fbefa04f1e877f573938f78ff1</td>\n",
139139
" </tr>\n",
140140
" <tr>\n",
141141
" <th>1</th>\n",
142142
" <td>Categories: Public Health, Epidemiology/Public...</td>\n",
143143
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
144-
" <td>278e3d8ae6714f889a984c217a3d166d</td>\n",
144+
" <td>92789b719a254c8385327b9d243935b6</td>\n",
145145
" </tr>\n",
146146
" <tr>\n",
147147
" <th>2</th>\n",
148148
" <td>Introduction And Background India’s health ind...</td>\n",
149149
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
150-
" <td>813107203afb4531ae915f7bf459b501</td>\n",
150+
" <td>9eefb3bf352a459c8895f272b632724e</td>\n",
151151
" </tr>\n",
152152
" <tr>\n",
153153
" <th>3</th>\n",
154154
" <td>An extensive literature search was performed, ...</td>\n",
155155
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
156-
" <td>67d0704481e243539db4effb9664c48f</td>\n",
156+
" <td>7c21bdb708d14855b7b3de9d8564b175</td>\n",
157157
" </tr>\n",
158158
" <tr>\n",
159159
" <th>4</th>\n",
160160
" <td>Review Overview of the public and private heal...</td>\n",
161161
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
162-
" <td>01029501dd3249738f761a6fe1d19ece</td>\n",
162+
" <td>bfc37e1213e7428d963fdac63eb80079</td>\n",
163163
" </tr>\n",
164164
" </tbody>\n",
165165
"</table>\n",
@@ -181,14 +181,14 @@
181181
"4 data_input/cureus/cureus-0015-00000040274.txt \n",
182182
"\n",
183183
" chunk_id \n",
184-
"0 d6c50af2f56d482db830d502516192d6 \n",
185-
"1 278e3d8ae6714f889a984c217a3d166d \n",
186-
"2 813107203afb4531ae915f7bf459b501 \n",
187-
"3 67d0704481e243539db4effb9664c48f \n",
188-
"4 01029501dd3249738f761a6fe1d19ece "
184+
"0 0f56d8fbefa04f1e877f573938f78ff1 \n",
185+
"1 92789b719a254c8385327b9d243935b6 \n",
186+
"2 9eefb3bf352a459c8895f272b632724e \n",
187+
"3 7c21bdb708d14855b7b3de9d8564b175 \n",
188+
"4 bfc37e1213e7428d963fdac63eb80079 "
189189
]
190190
},
191-
"execution_count": 21,
191+
"execution_count": 3,
192192
"metadata": {},
193193
"output_type": "execute_result"
194194
}
@@ -209,7 +209,7 @@
209209
},
210210
{
211211
"cell_type": "code",
212-
"execution_count": 22,
212+
"execution_count": 4,
213213
"metadata": {},
214214
"outputs": [],
215215
"source": [
@@ -234,7 +234,7 @@
234234
},
235235
{
236236
"cell_type": "code",
237-
"execution_count": 23,
237+
"execution_count": 5,
238238
"metadata": {},
239239
"outputs": [
240240
{
@@ -340,7 +340,7 @@
340340
"7 ae0fd26675d645e787964255667e90f4 4 "
341341
]
342342
},
343-
"execution_count": 23,
343+
"execution_count": 5,
344344
"metadata": {},
345345
"output_type": "execute_result"
346346
}
@@ -378,7 +378,7 @@
378378
},
379379
{
380380
"cell_type": "code",
381-
"execution_count": 24,
381+
"execution_count": 6,
382382
"metadata": {},
383383
"outputs": [
384384
{
@@ -477,7 +477,7 @@
477477
"2831 contextual proximity "
478478
]
479479
},
480-
"execution_count": 24,
480+
"execution_count": 6,
481481
"metadata": {},
482482
"output_type": "execute_result"
483483
}
@@ -522,7 +522,7 @@
522522
},
523523
{
524524
"cell_type": "code",
525-
"execution_count": 25,
525+
"execution_count": 7,
526526
"metadata": {},
527527
"outputs": [
528528
{
@@ -703,7 +703,7 @@
703703
"[758 rows x 5 columns]"
704704
]
705705
},
706-
"execution_count": 25,
706+
"execution_count": 7,
707707
"metadata": {},
708708
"output_type": "execute_result"
709709
}
@@ -727,7 +727,7 @@
727727
},
728728
{
729729
"cell_type": "code",
730-
"execution_count": 26,
730+
"execution_count": 8,
731731
"metadata": {},
732732
"outputs": [
733733
{
@@ -736,7 +736,7 @@
736736
"(215,)"
737737
]
738738
},
739-
"execution_count": 26,
739+
"execution_count": 8,
740740
"metadata": {},
741741
"output_type": "execute_result"
742742
}
@@ -748,7 +748,7 @@
748748
},
749749
{
750750
"cell_type": "code",
751-
"execution_count": 27,
751+
"execution_count": 9,
752752
"metadata": {},
753753
"outputs": [],
754754
"source": [
@@ -780,7 +780,7 @@
780780
},
781781
{
782782
"cell_type": "code",
783-
"execution_count": 28,
783+
"execution_count": 10,
784784
"metadata": {},
785785
"outputs": [
786786
{
@@ -810,7 +810,7 @@
810810
},
811811
{
812812
"cell_type": "code",
813-
"execution_count": 29,
813+
"execution_count": 11,
814814
"metadata": {},
815815
"outputs": [
816816
{
@@ -843,31 +843,31 @@
843843
" <tr>\n",
844844
" <th>0</th>\n",
845845
" <td>56 articles</td>\n",
846-
" <td>#ad57db</td>\n",
846+
" <td>#db57db</td>\n",
847847
" <td>1</td>\n",
848848
" </tr>\n",
849849
" <tr>\n",
850850
" <th>1</th>\n",
851851
" <td>analysis</td>\n",
852-
" <td>#ad57db</td>\n",
852+
" <td>#db57db</td>\n",
853853
" <td>1</td>\n",
854854
" </tr>\n",
855855
" <tr>\n",
856856
" <th>2</th>\n",
857857
" <td>corresponding authors' experiential knowledge</td>\n",
858-
" <td>#ad57db</td>\n",
858+
" <td>#db57db</td>\n",
859859
" <td>1</td>\n",
860860
" </tr>\n",
861861
" <tr>\n",
862862
" <th>3</th>\n",
863863
" <td>extensive literature search</td>\n",
864-
" <td>#ad57db</td>\n",
864+
" <td>#db57db</td>\n",
865865
" <td>1</td>\n",
866866
" </tr>\n",
867867
" <tr>\n",
868868
" <th>4</th>\n",
869869
" <td>peer-reviewed journals</td>\n",
870-
" <td>#ad57db</td>\n",
870+
" <td>#db57db</td>\n",
871871
" <td>1</td>\n",
872872
" </tr>\n",
873873
" <tr>\n",
@@ -879,31 +879,31 @@
879879
" <tr>\n",
880880
" <th>210</th>\n",
881881
" <td>rural medical assistants (rmas)</td>\n",
882-
" <td>#57db6e</td>\n",
882+
" <td>#57bcdb</td>\n",
883883
" <td>15</td>\n",
884884
" </tr>\n",
885885
" <tr>\n",
886886
" <th>211</th>\n",
887887
" <td>limited uptake</td>\n",
888-
" <td>#57bcdb</td>\n",
888+
" <td>#db57ac</td>\n",
889889
" <td>16</td>\n",
890890
" </tr>\n",
891891
" <tr>\n",
892892
" <th>212</th>\n",
893893
" <td>national health protection mission</td>\n",
894-
" <td>#57bcdb</td>\n",
894+
" <td>#db57ac</td>\n",
895895
" <td>16</td>\n",
896896
" </tr>\n",
897897
" <tr>\n",
898898
" <th>213</th>\n",
899899
" <td>private health sector systems</td>\n",
900-
" <td>#578ddb</td>\n",
900+
" <td>#57dbcc</td>\n",
901901
" <td>17</td>\n",
902902
" </tr>\n",
903903
" <tr>\n",
904904
" <th>214</th>\n",
905905
" <td>public</td>\n",
906-
" <td>#578ddb</td>\n",
906+
" <td>#57dbcc</td>\n",
907907
" <td>17</td>\n",
908908
" </tr>\n",
909909
" </tbody>\n",
@@ -913,22 +913,22 @@
913913
],
914914
"text/plain": [
915915
" node color group\n",
916-
"0 56 articles #ad57db 1\n",
917-
"1 analysis #ad57db 1\n",
918-
"2 corresponding authors' experiential knowledge #ad57db 1\n",
919-
"3 extensive literature search #ad57db 1\n",
920-
"4 peer-reviewed journals #ad57db 1\n",
916+
"0 56 articles #db57db 1\n",
917+
"1 analysis #db57db 1\n",
918+
"2 corresponding authors' experiential knowledge #db57db 1\n",
919+
"3 extensive literature search #db57db 1\n",
920+
"4 peer-reviewed journals #db57db 1\n",
921921
".. ... ... ...\n",
922-
"210 rural medical assistants (rmas) #57db6e 15\n",
923-
"211 limited uptake #57bcdb 16\n",
924-
"212 national health protection mission #57bcdb 16\n",
925-
"213 private health sector systems #578ddb 17\n",
926-
"214 public #578ddb 17\n",
922+
"210 rural medical assistants (rmas) #57bcdb 15\n",
923+
"211 limited uptake #db57ac 16\n",
924+
"212 national health protection mission #db57ac 16\n",
925+
"213 private health sector systems #57dbcc 17\n",
926+
"214 public #57dbcc 17\n",
927927
"\n",
928928
"[215 rows x 3 columns]"
929929
]
930930
},
931-
"execution_count": 29,
931+
"execution_count": 11,
932932
"metadata": {},
933933
"output_type": "execute_result"
934934
}
@@ -966,7 +966,7 @@
966966
},
967967
{
968968
"cell_type": "code",
969-
"execution_count": 30,
969+
"execution_count": 12,
970970
"metadata": {},
971971
"outputs": [],
972972
"source": [
@@ -978,7 +978,7 @@
978978
},
979979
{
980980
"cell_type": "code",
981-
"execution_count": 32,
981+
"execution_count": 13,
982982
"metadata": {},
983983
"outputs": [
984984
{
@@ -1019,30 +1019,7 @@
10191019
"execution_count": null,
10201020
"metadata": {},
10211021
"outputs": [],
1022-
"source": [
1023-
"SYS_PROMPT = (\n",
1024-
" \"You are a network graph maker who extracts terms and their relations from a given context. \"\n",
1025-
" \"You are provided with a context chunk (delimited by ```) Your task is to extract the ontology \"\n",
1026-
" \"of terms mentioned in the given context. These terms should represent the key concepts as per the context. \\n\"\n",
1027-
" \"Thought 1: While traversing through each sentence, Think about the key terms mentioned in it.\\n\"\n",
1028-
" \"\\tTerms may include object, entity, location, organization, person, \\n\"\n",
1029-
" \"\\tcondition, acronym, documents, service, concept, etc.\\n\"\n",
1030-
" \"\\tTerms should be as atomistic as possible\\n\\n\"\n",
1031-
" \"Thought 2: Think about how these terms can have one on one relation with other terms.\\n\"\n",
1032-
" \"\\tTerms that are mentioned in the same sentence or the same paragraph are typically related to each other.\\n\"\n",
1033-
" \"\\tTerms can be related to many other terms\\n\\n\"\n",
1034-
" \"Thought 3: Find out the relation between each such related pair of terms. \\n\\n\"\n",
1035-
" \"Format your output as a list of json. Each element of the list contains a pair of terms\"\n",
1036-
" \"and the relation between them, like the follwing: \\n\"\n",
1037-
" \"[\\n\"\n",
1038-
" \" {\\n\"\n",
1039-
" ' \"node_1\": \"A concept from extracted ontology\",\\n'\n",
1040-
" ' \"node_2\": \"A related concept from extracted ontology\",\\n'\n",
1041-
" ' \"edge\": \"relationship between the two concepts, node_1 and node_2 in one or two sentences\"\\n'\n",
1042-
" \" }, {...}\\n\"\n",
1043-
" \"]\"\n",
1044-
")"
1045-
]
1022+
"source": []
10461023
}
10471024
],
10481025
"metadata": {
@@ -1061,7 +1038,7 @@
10611038
"name": "python",
10621039
"nbconvert_exporter": "python",
10631040
"pygments_lexer": "ipython3",
1064-
"version": "3.11.1"
1041+
"version": "3.11.4"
10651042
}
10661043
},
10671044
"nbformat": 4,

0 commit comments

Comments
 (0)