|
9 | 9 | },
|
10 | 10 | {
|
11 | 11 | "cell_type": "code",
|
12 |
| - "execution_count": 19, |
| 12 | + "execution_count": 1, |
13 | 13 | "metadata": {},
|
14 | 14 | "outputs": [],
|
15 | 15 | "source": [
|
|
39 | 39 | },
|
40 | 40 | {
|
41 | 41 | "cell_type": "code",
|
42 |
| - "execution_count": 20, |
| 42 | + "execution_count": 2, |
43 | 43 | "metadata": {},
|
44 | 44 | "outputs": [
|
45 | 45 | {
|
46 | 46 | "name": "stderr",
|
47 | 47 | "output_type": "stream",
|
48 | 48 | "text": [
|
49 |
| - "100%|██████████| 1/1 [00:00<00:00, 17.20it/s]" |
| 49 | + "100%|██████████| 1/1 [00:01<00:00, 1.82s/it]" |
50 | 50 | ]
|
51 | 51 | },
|
52 | 52 | {
|
|
94 | 94 | },
|
95 | 95 | {
|
96 | 96 | "cell_type": "code",
|
97 |
| - "execution_count": 21, |
| 97 | + "execution_count": 3, |
98 | 98 | "metadata": {},
|
99 | 99 | "outputs": [
|
100 | 100 | {
|
|
135 | 135 | " <th>0</th>\n",
|
136 | 136 | " <td>Abstract India’s health indicators have improv...</td>\n",
|
137 | 137 | " <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
|
138 |
| - " <td>d6c50af2f56d482db830d502516192d6</td>\n", |
| 138 | + " <td>0f56d8fbefa04f1e877f573938f78ff1</td>\n", |
139 | 139 | " </tr>\n",
|
140 | 140 | " <tr>\n",
|
141 | 141 | " <th>1</th>\n",
|
142 | 142 | " <td>Categories: Public Health, Epidemiology/Public...</td>\n",
|
143 | 143 | " <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
|
144 |
| - " <td>278e3d8ae6714f889a984c217a3d166d</td>\n", |
| 144 | + " <td>92789b719a254c8385327b9d243935b6</td>\n", |
145 | 145 | " </tr>\n",
|
146 | 146 | " <tr>\n",
|
147 | 147 | " <th>2</th>\n",
|
148 | 148 | " <td>Introduction And Background India’s health ind...</td>\n",
|
149 | 149 | " <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
|
150 |
| - " <td>813107203afb4531ae915f7bf459b501</td>\n", |
| 150 | + " <td>9eefb3bf352a459c8895f272b632724e</td>\n", |
151 | 151 | " </tr>\n",
|
152 | 152 | " <tr>\n",
|
153 | 153 | " <th>3</th>\n",
|
154 | 154 | " <td>An extensive literature search was performed, ...</td>\n",
|
155 | 155 | " <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
|
156 |
| - " <td>67d0704481e243539db4effb9664c48f</td>\n", |
| 156 | + " <td>7c21bdb708d14855b7b3de9d8564b175</td>\n", |
157 | 157 | " </tr>\n",
|
158 | 158 | " <tr>\n",
|
159 | 159 | " <th>4</th>\n",
|
160 | 160 | " <td>Review Overview of the public and private heal...</td>\n",
|
161 | 161 | " <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
|
162 |
| - " <td>01029501dd3249738f761a6fe1d19ece</td>\n", |
| 162 | + " <td>bfc37e1213e7428d963fdac63eb80079</td>\n", |
163 | 163 | " </tr>\n",
|
164 | 164 | " </tbody>\n",
|
165 | 165 | "</table>\n",
|
|
181 | 181 | "4 data_input/cureus/cureus-0015-00000040274.txt \n",
|
182 | 182 | "\n",
|
183 | 183 | " chunk_id \n",
|
184 |
| - "0 d6c50af2f56d482db830d502516192d6 \n", |
185 |
| - "1 278e3d8ae6714f889a984c217a3d166d \n", |
186 |
| - "2 813107203afb4531ae915f7bf459b501 \n", |
187 |
| - "3 67d0704481e243539db4effb9664c48f \n", |
188 |
| - "4 01029501dd3249738f761a6fe1d19ece " |
| 184 | + "0 0f56d8fbefa04f1e877f573938f78ff1 \n", |
| 185 | + "1 92789b719a254c8385327b9d243935b6 \n", |
| 186 | + "2 9eefb3bf352a459c8895f272b632724e \n", |
| 187 | + "3 7c21bdb708d14855b7b3de9d8564b175 \n", |
| 188 | + "4 bfc37e1213e7428d963fdac63eb80079 " |
189 | 189 | ]
|
190 | 190 | },
|
191 |
| - "execution_count": 21, |
| 191 | + "execution_count": 3, |
192 | 192 | "metadata": {},
|
193 | 193 | "output_type": "execute_result"
|
194 | 194 | }
|
|
209 | 209 | },
|
210 | 210 | {
|
211 | 211 | "cell_type": "code",
|
212 |
| - "execution_count": 22, |
| 212 | + "execution_count": 4, |
213 | 213 | "metadata": {},
|
214 | 214 | "outputs": [],
|
215 | 215 | "source": [
|
|
234 | 234 | },
|
235 | 235 | {
|
236 | 236 | "cell_type": "code",
|
237 |
| - "execution_count": 23, |
| 237 | + "execution_count": 5, |
238 | 238 | "metadata": {},
|
239 | 239 | "outputs": [
|
240 | 240 | {
|
|
340 | 340 | "7 ae0fd26675d645e787964255667e90f4 4 "
|
341 | 341 | ]
|
342 | 342 | },
|
343 |
| - "execution_count": 23, |
| 343 | + "execution_count": 5, |
344 | 344 | "metadata": {},
|
345 | 345 | "output_type": "execute_result"
|
346 | 346 | }
|
|
378 | 378 | },
|
379 | 379 | {
|
380 | 380 | "cell_type": "code",
|
381 |
| - "execution_count": 24, |
| 381 | + "execution_count": 6, |
382 | 382 | "metadata": {},
|
383 | 383 | "outputs": [
|
384 | 384 | {
|
|
477 | 477 | "2831 contextual proximity "
|
478 | 478 | ]
|
479 | 479 | },
|
480 |
| - "execution_count": 24, |
| 480 | + "execution_count": 6, |
481 | 481 | "metadata": {},
|
482 | 482 | "output_type": "execute_result"
|
483 | 483 | }
|
|
522 | 522 | },
|
523 | 523 | {
|
524 | 524 | "cell_type": "code",
|
525 |
| - "execution_count": 25, |
| 525 | + "execution_count": 7, |
526 | 526 | "metadata": {},
|
527 | 527 | "outputs": [
|
528 | 528 | {
|
|
703 | 703 | "[758 rows x 5 columns]"
|
704 | 704 | ]
|
705 | 705 | },
|
706 |
| - "execution_count": 25, |
| 706 | + "execution_count": 7, |
707 | 707 | "metadata": {},
|
708 | 708 | "output_type": "execute_result"
|
709 | 709 | }
|
|
727 | 727 | },
|
728 | 728 | {
|
729 | 729 | "cell_type": "code",
|
730 |
| - "execution_count": 26, |
| 730 | + "execution_count": 8, |
731 | 731 | "metadata": {},
|
732 | 732 | "outputs": [
|
733 | 733 | {
|
|
736 | 736 | "(215,)"
|
737 | 737 | ]
|
738 | 738 | },
|
739 |
| - "execution_count": 26, |
| 739 | + "execution_count": 8, |
740 | 740 | "metadata": {},
|
741 | 741 | "output_type": "execute_result"
|
742 | 742 | }
|
|
748 | 748 | },
|
749 | 749 | {
|
750 | 750 | "cell_type": "code",
|
751 |
| - "execution_count": 27, |
| 751 | + "execution_count": 9, |
752 | 752 | "metadata": {},
|
753 | 753 | "outputs": [],
|
754 | 754 | "source": [
|
|
780 | 780 | },
|
781 | 781 | {
|
782 | 782 | "cell_type": "code",
|
783 |
| - "execution_count": 28, |
| 783 | + "execution_count": 10, |
784 | 784 | "metadata": {},
|
785 | 785 | "outputs": [
|
786 | 786 | {
|
|
810 | 810 | },
|
811 | 811 | {
|
812 | 812 | "cell_type": "code",
|
813 |
| - "execution_count": 29, |
| 813 | + "execution_count": 11, |
814 | 814 | "metadata": {},
|
815 | 815 | "outputs": [
|
816 | 816 | {
|
|
843 | 843 | " <tr>\n",
|
844 | 844 | " <th>0</th>\n",
|
845 | 845 | " <td>56 articles</td>\n",
|
846 |
| - " <td>#ad57db</td>\n", |
| 846 | + " <td>#db57db</td>\n", |
847 | 847 | " <td>1</td>\n",
|
848 | 848 | " </tr>\n",
|
849 | 849 | " <tr>\n",
|
850 | 850 | " <th>1</th>\n",
|
851 | 851 | " <td>analysis</td>\n",
|
852 |
| - " <td>#ad57db</td>\n", |
| 852 | + " <td>#db57db</td>\n", |
853 | 853 | " <td>1</td>\n",
|
854 | 854 | " </tr>\n",
|
855 | 855 | " <tr>\n",
|
856 | 856 | " <th>2</th>\n",
|
857 | 857 | " <td>corresponding authors' experiential knowledge</td>\n",
|
858 |
| - " <td>#ad57db</td>\n", |
| 858 | + " <td>#db57db</td>\n", |
859 | 859 | " <td>1</td>\n",
|
860 | 860 | " </tr>\n",
|
861 | 861 | " <tr>\n",
|
862 | 862 | " <th>3</th>\n",
|
863 | 863 | " <td>extensive literature search</td>\n",
|
864 |
| - " <td>#ad57db</td>\n", |
| 864 | + " <td>#db57db</td>\n", |
865 | 865 | " <td>1</td>\n",
|
866 | 866 | " </tr>\n",
|
867 | 867 | " <tr>\n",
|
868 | 868 | " <th>4</th>\n",
|
869 | 869 | " <td>peer-reviewed journals</td>\n",
|
870 |
| - " <td>#ad57db</td>\n", |
| 870 | + " <td>#db57db</td>\n", |
871 | 871 | " <td>1</td>\n",
|
872 | 872 | " </tr>\n",
|
873 | 873 | " <tr>\n",
|
|
879 | 879 | " <tr>\n",
|
880 | 880 | " <th>210</th>\n",
|
881 | 881 | " <td>rural medical assistants (rmas)</td>\n",
|
882 |
| - " <td>#57db6e</td>\n", |
| 882 | + " <td>#57bcdb</td>\n", |
883 | 883 | " <td>15</td>\n",
|
884 | 884 | " </tr>\n",
|
885 | 885 | " <tr>\n",
|
886 | 886 | " <th>211</th>\n",
|
887 | 887 | " <td>limited uptake</td>\n",
|
888 |
| - " <td>#57bcdb</td>\n", |
| 888 | + " <td>#db57ac</td>\n", |
889 | 889 | " <td>16</td>\n",
|
890 | 890 | " </tr>\n",
|
891 | 891 | " <tr>\n",
|
892 | 892 | " <th>212</th>\n",
|
893 | 893 | " <td>national health protection mission</td>\n",
|
894 |
| - " <td>#57bcdb</td>\n", |
| 894 | + " <td>#db57ac</td>\n", |
895 | 895 | " <td>16</td>\n",
|
896 | 896 | " </tr>\n",
|
897 | 897 | " <tr>\n",
|
898 | 898 | " <th>213</th>\n",
|
899 | 899 | " <td>private health sector systems</td>\n",
|
900 |
| - " <td>#578ddb</td>\n", |
| 900 | + " <td>#57dbcc</td>\n", |
901 | 901 | " <td>17</td>\n",
|
902 | 902 | " </tr>\n",
|
903 | 903 | " <tr>\n",
|
904 | 904 | " <th>214</th>\n",
|
905 | 905 | " <td>public</td>\n",
|
906 |
| - " <td>#578ddb</td>\n", |
| 906 | + " <td>#57dbcc</td>\n", |
907 | 907 | " <td>17</td>\n",
|
908 | 908 | " </tr>\n",
|
909 | 909 | " </tbody>\n",
|
|
913 | 913 | ],
|
914 | 914 | "text/plain": [
|
915 | 915 | " node color group\n",
|
916 |
| - "0 56 articles #ad57db 1\n", |
917 |
| - "1 analysis #ad57db 1\n", |
918 |
| - "2 corresponding authors' experiential knowledge #ad57db 1\n", |
919 |
| - "3 extensive literature search #ad57db 1\n", |
920 |
| - "4 peer-reviewed journals #ad57db 1\n", |
| 916 | + "0 56 articles #db57db 1\n", |
| 917 | + "1 analysis #db57db 1\n", |
| 918 | + "2 corresponding authors' experiential knowledge #db57db 1\n", |
| 919 | + "3 extensive literature search #db57db 1\n", |
| 920 | + "4 peer-reviewed journals #db57db 1\n", |
921 | 921 | ".. ... ... ...\n",
|
922 |
| - "210 rural medical assistants (rmas) #57db6e 15\n", |
923 |
| - "211 limited uptake #57bcdb 16\n", |
924 |
| - "212 national health protection mission #57bcdb 16\n", |
925 |
| - "213 private health sector systems #578ddb 17\n", |
926 |
| - "214 public #578ddb 17\n", |
| 922 | + "210 rural medical assistants (rmas) #57bcdb 15\n", |
| 923 | + "211 limited uptake #db57ac 16\n", |
| 924 | + "212 national health protection mission #db57ac 16\n", |
| 925 | + "213 private health sector systems #57dbcc 17\n", |
| 926 | + "214 public #57dbcc 17\n", |
927 | 927 | "\n",
|
928 | 928 | "[215 rows x 3 columns]"
|
929 | 929 | ]
|
930 | 930 | },
|
931 |
| - "execution_count": 29, |
| 931 | + "execution_count": 11, |
932 | 932 | "metadata": {},
|
933 | 933 | "output_type": "execute_result"
|
934 | 934 | }
|
|
966 | 966 | },
|
967 | 967 | {
|
968 | 968 | "cell_type": "code",
|
969 |
| - "execution_count": 30, |
| 969 | + "execution_count": 12, |
970 | 970 | "metadata": {},
|
971 | 971 | "outputs": [],
|
972 | 972 | "source": [
|
|
978 | 978 | },
|
979 | 979 | {
|
980 | 980 | "cell_type": "code",
|
981 |
| - "execution_count": 32, |
| 981 | + "execution_count": 13, |
982 | 982 | "metadata": {},
|
983 | 983 | "outputs": [
|
984 | 984 | {
|
|
1019 | 1019 | "execution_count": null,
|
1020 | 1020 | "metadata": {},
|
1021 | 1021 | "outputs": [],
|
1022 |
| - "source": [ |
1023 |
| - "SYS_PROMPT = (\n", |
1024 |
| - " \"You are a network graph maker who extracts terms and their relations from a given context. \"\n", |
1025 |
| - " \"You are provided with a context chunk (delimited by ```) Your task is to extract the ontology \"\n", |
1026 |
| - " \"of terms mentioned in the given context. These terms should represent the key concepts as per the context. \\n\"\n", |
1027 |
| - " \"Thought 1: While traversing through each sentence, Think about the key terms mentioned in it.\\n\"\n", |
1028 |
| - " \"\\tTerms may include object, entity, location, organization, person, \\n\"\n", |
1029 |
| - " \"\\tcondition, acronym, documents, service, concept, etc.\\n\"\n", |
1030 |
| - " \"\\tTerms should be as atomistic as possible\\n\\n\"\n", |
1031 |
| - " \"Thought 2: Think about how these terms can have one on one relation with other terms.\\n\"\n", |
1032 |
| - " \"\\tTerms that are mentioned in the same sentence or the same paragraph are typically related to each other.\\n\"\n", |
1033 |
| - " \"\\tTerms can be related to many other terms\\n\\n\"\n", |
1034 |
| - " \"Thought 3: Find out the relation between each such related pair of terms. \\n\\n\"\n", |
1035 |
| - " \"Format your output as a list of json. Each element of the list contains a pair of terms\"\n", |
1036 |
| - " \"and the relation between them, like the follwing: \\n\"\n", |
1037 |
| - " \"[\\n\"\n", |
1038 |
| - " \" {\\n\"\n", |
1039 |
| - " ' \"node_1\": \"A concept from extracted ontology\",\\n'\n", |
1040 |
| - " ' \"node_2\": \"A related concept from extracted ontology\",\\n'\n", |
1041 |
| - " ' \"edge\": \"relationship between the two concepts, node_1 and node_2 in one or two sentences\"\\n'\n", |
1042 |
| - " \" }, {...}\\n\"\n", |
1043 |
| - " \"]\"\n", |
1044 |
| - ")" |
1045 |
| - ] |
| 1022 | + "source": [] |
1046 | 1023 | }
|
1047 | 1024 | ],
|
1048 | 1025 | "metadata": {
|
|
1061 | 1038 | "name": "python",
|
1062 | 1039 | "nbconvert_exporter": "python",
|
1063 | 1040 | "pygments_lexer": "ipython3",
|
1064 |
| - "version": "3.11.1" |
| 1041 | + "version": "3.11.4" |
1065 | 1042 | }
|
1066 | 1043 | },
|
1067 | 1044 | "nbformat": 4,
|
|
0 commit comments