-
Notifications
You must be signed in to change notification settings - Fork 193
/
Copy pathcountries.xml
106 lines (94 loc) · 3.07 KB
/
countries.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
<!--
For more information, see
https://github.com/larsga/Duke/wiki/
Improvements needed:
- some area numbers have spaces in them
- not stripping accents from names
-->
<duke>
<object class="no.priv.garshol.duke.comparators.NumericComparator"
name="AreaComparator">
<param name="min-ratio" value="0.7"/>
</object>
<schema>
<threshold>0.7</threshold>
<property type="id">
<name>ID</name>
</property>
<property>
<name>NAME</name>
<comparator>no.priv.garshol.duke.comparators.Levenshtein</comparator>
<low>0.09</low>
<high>0.93</high>
</property>
<property>
<name>AREA</name>
<comparator>no.priv.garshol.duke.comparators.NumericComparator</comparator>
<low>0.04</low>
<high>0.73</high>
</property>
<property>
<name>CAPITAL</name>
<comparator>no.priv.garshol.duke.comparators.Levenshtein</comparator>
<low>0.12</low>
<high>0.61</high>
</property>
</schema>
<database class="no.priv.garshol.duke.databases.InMemoryDatabase">
</database>
<group>
<csv>
<param name="input-file" value="countries-dbpedia.csv"/>
<param name="header-line" value="false"/>
<column name="1" property="ID"/>
<column name="2"
property="NAME"
cleaner="no.priv.garshol.duke.examples.CountryNameCleaner"/>
<column name="3"
property="AREA"/>
<column name="4"
property="CAPITAL"
cleaner="no.priv.garshol.duke.examples.CapitalCleaner"/>
</csv>
</group>
<group>
<csv>
<param name="input-file" value="countries-mondial.csv"/>
<column name="id" property="ID"/>
<column name="country"
property="NAME"
cleaner="no.priv.garshol.duke.examples.CountryNameCleaner"/>
<column name="capital"
property="CAPITAL"
cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner"/>
<column name="area"
property="AREA"/>
</csv>
</group>
<!-- it's possible to get the DBpedia directly from the SPARQL endpoint
of DBpedia, but that puts load on their servers, and so we don't
do that by default. if you wish, you can use this method instead. -->
<!--sparql>
<param name="endpoint" value="http://dbpedia.org/sparql"/>
<param name="triple-mode" value="false"/>
<param name="query" value='
PREFIX dbprop: <http://dbpedia.org/property/>
PREFIX dbowl: <http://dbpedia.org/ontology/>
SELECT DISTINCT ?country ?name ?area ?capitalname
WHERE
{ ?country rdf:type dbpedia-owl:Country;
dbprop:commonName ?name ;
dbprop:areaKm ?area ;
dbowl:capital ?capital .
?capital rdfs:label ?capitalname .
OPTIONAL {?country dbpprop:yearEnd ?yearEnd}
FILTER (!bound(?yearEnd))
FILTER (lang(?capitalname) = "en")
} '/>
<column name="country"
property="ID"/>
<column name="name"
cleaner="no.priv.garshol.duke.LowerCaseNormalizeCleaner"
property="NAME"/>
</sparql-->
</duke>