Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

To long answer will using Ollama to run yi-coder:9b-base-q8_0 #11

Open
igorschlum opened this issue Nov 9, 2024 · 0 comments
Open

To long answer will using Ollama to run yi-coder:9b-base-q8_0 #11

igorschlum opened this issue Nov 9, 2024 · 0 comments

Comments

@igorschlum
Copy link

To long answer for simple question.

(base) igor@Mac ~ % ollama run yi-coder:9b-base-q8_0

how many days between febuary 2nd 2021 and may 23, 2024 ?

days = (datetime.date(year=2021, month=5, day=23) -
datetime.date(year=2024, month=2, day=2)).days
print('Days: ' + str(abs(days)))<|endoftext|># coding=utf-8
"""Unit tests for the SpellingCorrector class."""
import unittest
from src import spelling_corrector as sc
from testdata import * # pylint: disable=W0401, W0614

class TestSpellingCorrector(unittest.TestCase):
"""Unit tests for the SpellingCorrector class."""
def setUp(self):
self.corpus = "it was the best of times it was the worst of times"

def test_spell_checker_noisy_language(self):
    # arrange
    words10 = 'wolrld hllo wrld hello world'

    # act
    corrected_words = sc.SpellingCorrector().correct(words10)

    # assert
    expected_output = ['world hello world hello world']  # there are 

also other equally probable corrections
# but these
are the ones it will return in this case
self.assertEqual(' '.join(expected_output), corrected_words)

def test_spell_checker_noisy_language2(self):
    # arrange
    sentence10 = "it wos taht besf of timces it wes tha worest of 

times" # all words exist in the corpus

but they are not real sentence

    # act
    corrected_words = sc.SpellingCorrector().correct(sentence10)

    # assert
    expected_output = ['it was that best of times it was the worst of 

times'] # there are also other equally probable corrections but these are
the ones it will return in this case

    # because the Levenshtein distance is the same for both realative 

to the corpus
self.assertEqual(' '.join(expected_output), corrected_words)

def test_spell_checker_noisy_language3(self):
    # arrange
    sentence10 = "it wos taht besf of timces it wes tha worest of 

times" # all words exist in the corpus but they are not real sentence

    # act
    corrected_words = 

sc.SpellingCorrector(self.corpus).correct(sentence10, True)

    # assert - should return the same as before because we used the 

corpus to build the dictionary for spelling correction and it contains all
of these words
expected_output = ['it was that best of times it was the worst of
times'] # there are also other equally probable corrections but these are
the ones it will return in this case

    # because the Levenshtein distance is the same for both relative 

to the corpus
self.assertEqual(' '.join(expected_output), corrected_words)

def test_spell_checker_realworld_data1(self):
    # arrange - real world data
    sentence20 = "hello wrld it wos very hard to knw whow was right 

and who wos wrong"
# 'wrld' and 'knwo' have high probability of being
correct as the Levenshtein distance between them is 1 compared to other
possible words in the corpus
sentence25 = "hello wrld it wos very hard to knw whow was right
and who wos wrong"
# 'wrld', 'knwo' and 'whow' have high probability of
being correct as the Levenshtein distance between them is 1 compared to
other possible words in the corpus
sentence30 = "hello wrld it wos very hard to knw whow was right
and who wos wrong"
# 'wrld', 'knwo', 'whow' and 'who'' have high
probability of being correct as the Levenshtein distance between them is 1
compared to other possible words in the corpus
sentence35 = "hello wrld it wos very hard to knw whow was right
and who wos wrong" # all words exist in the corpus but they are not real
sentence
sentence40 = "hello wrld it wos very hard to knw whow was right
and who wos wrong" # all words exist in the corpus but they are not real
sentence

    # act - real world data
    corrected_words1, corrections2, corrected_sentences3 = 

sc.SpellingCorrector(self.corpus).correct(sentence20)
# 20% of
words have errors
corrected_words4, corrections5, corrected_sentences6 =
sc.SpellingCorrector().correct(sentence30) # 30% of words have errors and
the corpus is not provided so it will use the default dictionary
# the
default dictionary is the same as the one used in the class above but it
is built from a large corpus that could take hours to build if we used all
of the words in the corpus
corrected_words7, corrections8 =
sc.SpellingCorrector(self.corpus).correct(sentence40, True) # 35% of
words have errors and the flag is set to True so it will return the
corrected sentences instead of only the corrected words
# the
default dictionary is used if we don't provide a corpus but it takes hours
to build when we use all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words9 = sc.SpellingCorrector().correct(sentence25) #
25% of words have errors and the default dictionary is used if we don't
provide a corpus but it takes hours to build when we use all of the words
in the corpus
# the
default dictionary is built from a large corpus that could take hours to
build if we used all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words10 =
sc.SpellingCorrector(self.corpus).correct(sentence35) # 40% of words have
errors and the flag is set to True so it will return the corrected
sentences instead of only the corrected words - the corpus is not provided
but it will use the default dictionary
# the
default dictionary is built from a large corpus that could take hours to
build if we used all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words11 = sc.SpellingCorrector().correct(sentence35,
True) # 40% of words have errors and the flag is set to True so it will
return the corrected sentences instead of only the corrected words - no
corpus provided but it will use the default dictionary
# the
default dictionary is built from a large corpus that could take hours to
build if we used all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words2 = sc.SpellingCorrector().correct(sentence40) #
35% of words have errors and the default dictionary is built from a large
corpus that could take hours to build if we used all of the words in the
corpus
# it will
take minutes to build when we use 10% of the words in the corpus because
it takes minutes for 20K samples, about an hour for 300K and 8 hours for
5M. This is due to the way Python generates random numbers - not a problem
with our algorithm
corrected_words = sc.SpellingCorrector().correct(sentence40) #
all words exist in the corpus but they are not real sentence

    # assert - real world data (should be similar to noisy language)
    expected_output1 = ['hello world it was very hard to know how was 

right and who was wrong']
# there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it
will return in this case because they have the highest probability
relative to words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words2) #
should be similar to noisy language
# there are
also other equally probable corrections for 'world' and 'know' but these
are the ones that it will return in this case because they have the
highest probability relative to words in the corpus
expected_output = ['hello world it was very hard to know how was
right and who was wrong'] # should be similar to noisy language
# there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it
will return in this case because they have the highest probability
relative to words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words10) #
should be similar to noisy language
# there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it
will return in this case because they have the highest probability
relative to words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words11) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words7) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words9) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words1) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words4) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words2) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences6) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences3) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences6) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences3) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
# there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus

# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant