Skip to content

Commit a38517c

Browse files
authored
Merge pull request #70 from MycroftAI/feature/pymediawiki
Major refactor including switch to pymediawiki
2 parents 4887a36 + e3f7e2e commit a38517c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1029
-349
lines changed

Diff for: .gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
*.pyc
2+
.pytest_cache/
23
settings.json

Diff for: __init__.py

+284-273
Large diffs are not rendered by default.

Diff for: dialog/ca-es/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
volíeu dir
2+
us agradaria sentir alguna cosa sobre

Diff for: dialog/ca-es/disambiguate.dialog

-2
This file was deleted.
File renamed without changes.

Diff for: dialog/da-dk/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mente du
2+
kunne du tænke dig at høre om

Diff for: dialog/da-dk/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/de-de/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
meinst du
2+
möchtest du etwas über

Diff for: dialog/de-de/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/el-gr/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
μήπως εννοούσες
2+
θα ήθελες να ακούσεις για

Diff for: dialog/el-gr/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/en-us/connection-error.dialog

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
I can't connect to Wikipedia. Please try again later.

Diff for: dialog/en-us/disambiguate-exists.dialog

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Did I get that right?

Diff for: dialog/en-us/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
did you mean
2+
would you like to hear about

Diff for: dialog/en-us/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/en-us/no entry found.dialog

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
I can't find a related Wikipedia article
2-
I'm afraid there isn't any article about that
1+
I can't find a Wikipedia article about {topic}
2+
I'm afraid there isn't any article about {topic}

Diff for: dialog/en-us/question_words.list

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ who
22
whom
33
what
44
when
5+
where

Diff for: dialog/es-es/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
quieres decir
2+
quieres escuchar sobre

Diff for: dialog/es-es/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/es-lm/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
quieres decir
2+
quieres escuchar sobre

Diff for: dialog/es-lm/disambiguate.dialog

-2
This file was deleted.
File renamed without changes.

Diff for: dialog/fa-fa/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
آیا منظورتون?
2+
می خوای درباره?

Diff for: dialog/fr-fr/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
voulez-vous dire
2+
voulez-vous entendre parler de

Diff for: dialog/fr-fr/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/gl-es/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
quixeches dicir
2+
gustaríache oír sobre

Diff for: dialog/gl-es/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/it-it/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
volevi dire
2+
vuoi sentire qualcosa su

Diff for: dialog/it-it/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/nl-nl/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bedoel je
2+
wil je meer weten over

Diff for: dialog/nl-nl/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/pl-pl/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
czy chodziło Ci o
2+
czy chciałbyś usłyszeć o

Diff for: dialog/pl-pl/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/pt-br/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
você quis dizer
2+
você gostaria de ouvir sobre

Diff for: dialog/pt-br/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/ro-ro/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
vrei să spui
2+
doriți să aflați despre

Diff for: dialog/ro-ro/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/ru-ru/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ты имел ввиду
2+
хотел бы ты услышать про

Diff for: dialog/ru-ru/disambiguate.dialog

-2
This file was deleted.

Diff for: dialog/sv-se/disambiguate-intro.dialog

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
menade du
2+
vill för höra om

Diff for: dialog/sv-se/disambiguate.dialog

-2
This file was deleted.

Diff for: requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
wikipedia==1.4.0
1+
pymediawiki~=0.7.0
2+
requests

Diff for: test/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2021, Mycroft AI Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

Diff for: test/behave/steps/wiki.py

+8-25
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,24 @@
1-
import time
1+
import time
22

3-
from behave import then, when
3+
from behave import then
44
from mycroft.messagebus import Message
5-
from mycroft.audio import wait_while_speaking
65
from test.integrationtests.voight_kampff import then_wait
76

7+
88
@then('dialog is stopped')
99
def dialog_is_stopped(context):
1010
def check_dialog_tts_stop(message):
1111
who = message.data.get('by', '')
1212
return (who == 'TTS', '')
1313

14-
def check_dialog_mycroft_stop(message):
15-
return True, ''
16-
1714
context.bus.emit(Message('mycroft.audio.speech.stop',
1815
data={},
1916
context={}))
20-
status, debug = then_wait("mycroft.stop.handled", check_dialog_tts_stop, context, 5)
21-
if status:
22-
return status, debug
23-
24-
return then_wait("mycroft.stop", check_dialog_mycroft_stop, context, 5)
25-
17+
status, debug = then_wait("mycroft.stop.handled",
18+
check_dialog_tts_stop, context, 5)
19+
return status, debug
2620

27-
@then('"{skill}" should reply with dialog "{dialog}"')
28-
def then_dialog(context, skill, dialog):
29-
def check_dialog(message):
30-
utt_dialog = message.data.get('utterance', '')
31-
return (utt_dialog == 'Here is your answer from wiki peedia', '')
3221

22+
@then('there will be a short delay')
23+
def short_sleep(context):
3324
time.sleep(1)
34-
passed, debug = then_wait('speak', check_dialog, context)
35-
if not passed:
36-
assert_msg = debug
37-
38-
assert passed, assert_msg or 'Mycroft didn\'t respond'
39-
40-
41-

Diff for: test/behave/wiki.feature

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Feature: Wikipedia Skill
44
Given an english speaking user
55
When the user says "<tell me about a person>"
66
Then "skill-wiki" should reply with dialog from "searching.dialog"
7+
Then there will be a short delay
78
And mycroft reply should contain "<person>"
89
Then dialog is stopped
910

@@ -23,6 +24,7 @@ Feature: Wikipedia Skill
2324
Given an english speaking user
2425
When the user says "<tell me about a person>"
2526
Then "skill-wiki" should reply with dialog from "searching.dialog"
27+
Then there will be a short delay
2628
And mycroft reply should contain "<person>"
2729
Then dialog is stopped
2830

@@ -34,6 +36,7 @@ Feature: Wikipedia Skill
3436
Given an english speaking user
3537
When the user says "<tell me about a place>"
3638
Then "mycroft-wiki" should reply with dialog from "searching.dialog"
39+
Then there will be a short delay
3740
And mycroft reply should contain "<place>"
3841
Then dialog is stopped
3942

@@ -49,6 +52,7 @@ Feature: Wikipedia Skill
4952
Given an english speaking user
5053
When the user says "<tell me about a thing>"
5154
Then "mycroft-wiki" should reply with dialog from "searching.dialog"
55+
Then there will be a short delay
5256
And mycroft reply should contain "<thing>"
5357
Then dialog is stopped
5458

@@ -64,6 +68,7 @@ Feature: Wikipedia Skill
6468
Given an english speaking user
6569
When the user says "<tell me about an idea>"
6670
Then "mycroft-wiki" should reply with dialog from "searching.dialog"
71+
Then there will be a short delay
6772
And mycroft reply should contain "<idea>"
6873
Then dialog is stopped
6974

Diff for: test/unit/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2021, Mycroft AI Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

Diff for: test/unit/test_util.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2021, Mycroft AI Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
from wiki.util import remove_nested_parentheses
18+
19+
class TestUtil(unittest.TestCase):
20+
21+
def test_remove_nested_parentheses(self):
22+
test_strings = [
23+
["No change", "No change"],
24+
["a (simple) one", "a one"],
25+
["Ləmurs (/ˈliːmər/ (listen) LEE-mər)", "Ləmurs "],
26+
["No (end (parentheses)", "No "]
27+
]
28+
for input, expected in test_strings:
29+
output = remove_nested_parentheses(input)
30+
self.assertEqual(output, expected)

Diff for: test/unit/test_wiki.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright 2021, Mycroft AI Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
from wiki import Wiki, DisambiguationError, MediaWikiPage
18+
19+
class TestWiki(unittest.TestCase):
20+
def setUp(self):
21+
self.wiki = Wiki('en', auto_more=False)
22+
self.test_pages = {}
23+
test_titles = ['Elon Musk', 'Lemur', 'Car', 'Nike, Inc.']
24+
for title in test_titles:
25+
self.test_pages[title] = self.wiki.get_page(title)
26+
27+
def test_wiki_search(self):
28+
results = self.wiki.search('cars')
29+
self.assertIsInstance(results, list)
30+
self.assertTrue(len(results) > 0)
31+
self.assertEqual(results[0], 'Car')
32+
33+
def test_wiki_search_returns_disambiguation(self):
34+
results = self.wiki.search('george church')
35+
with self.assertRaises(DisambiguationError):
36+
_ = self.wiki.get_page(results[0])
37+
38+
def test_get_best_image(self):
39+
for page in self.test_pages.values():
40+
image = self.wiki.get_best_image_url(page, 50)
41+
self.assertIsInstance(image, str)
42+
self.assertEqual(image[:5], 'https')
43+
self.assertEqual(image[-4:], '.jpg')
44+
45+
# def test_get_disambiguation_from_results(self):
46+
# results = self.wiki.search('george church')
47+
# title = self.wiki.get_disambiguation_page(results)
48+
# self.assertIsInstance(title, str)
49+
# self.assertTrue(len(title) > 0)
50+
# with self.assertRaises(DisambiguationError):
51+
# _ = self.wiki.get_page(title)
52+
53+
def test_get_page(self):
54+
bitcoin_page = self.wiki.get_page('bitcoin')
55+
self.assertIsInstance(bitcoin_page, MediaWikiPage)
56+
self.assertEqual(bitcoin_page.title, 'Bitcoin')
57+
self.assertTrue('crypto' in bitcoin_page.summarize())
58+
59+
def test_get_random_page(self):
60+
random_page = self.wiki.get_random_page()
61+
self.assertIsInstance(random_page, MediaWikiPage)
62+
self.assertIsInstance(random_page.title, str)
63+
self.assertTrue(len(random_page.title) > 0)
64+
65+
def test_set_language(self):
66+
changed = self.wiki.set_language('es')
67+
self.assertTrue(changed)
68+
self.assertEqual(self.wiki.wiki.language, 'es')
69+
page = self.wiki.get_page('barcelona')
70+
self.assertEqual(page.title, 'Barcelona')
71+
summary = page.summarize(sentences=1)
72+
expected_start = 'Barcelona es una ciudad española'
73+
self.assertEqual(summary[:len(expected_start)], expected_start)
74+
75+
# Try to change to unsupported language
76+
changed = self.wiki.set_language('notlang')
77+
self.assertTrue(not changed)
78+
self.assertEqual(self.wiki.wiki.language, 'es')
79+
80+
# Change to same language
81+
changed = self.wiki.set_language('es')
82+
self.assertTrue(not changed)
83+
self.assertEqual(self.wiki.wiki.language, 'es')
84+
85+
# Change to default
86+
changed = self.wiki.set_language('en')
87+
self.assertTrue(changed)
88+
self.assertEqual(self.wiki.wiki.language, 'en')
89+
90+
# Set to default language
91+
self.wiki.set_language('fr')
92+
self.assertEqual(self.wiki.wiki.language, 'fr')
93+
self.wiki.set_language()
94+
self.assertEqual(self.wiki.wiki.language, 'en')
95+
96+
def test_summarize_page(self):
97+
for page in self.test_pages.values():
98+
summary = self.wiki.summarize_page(page, sentences=2)
99+
self.assertIsInstance(summary, str)
100+
self.assertTrue('(' not in summary)
101+
self.assertTrue(')' not in summary)
102+
self.assertTrue(' ' not in summary)
103+
self.assertTrue(0 < len(summary) < 500)
104+
105+
def test_summary_next_lines(self):
106+
for page in self.test_pages.values():
107+
summary_intro, intro_length = self.wiki.get_summary_intro(page)
108+
new_lines = 2
109+
summary_follow_up, total_lines = self.wiki.get_summary_next_lines(page, intro_length, new_lines)
110+
self.assertIsInstance(summary_follow_up, str)
111+
self.assertTrue('(' not in summary_follow_up)
112+
self.assertTrue(')' not in summary_follow_up)
113+
self.assertTrue(' ' not in summary_follow_up)
114+
self.assertTrue(summary_intro not in summary_follow_up)
115+
self.assertEqual(total_lines, intro_length + new_lines)

0 commit comments

Comments
 (0)