From 9dae6e2085837c5ba1e27d8ae24aee30f2361ae7 Mon Sep 17 00:00:00 2001 From: CLearERR Date: Sun, 29 Oct 2017 01:57:49 +0500 Subject: [PATCH 1/5] fix for issue about mutable vector returned by KeyedVectors.word_vector --- gensim/models/keyedvectors.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index b46c3a21ec..9897c5f325 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -278,11 +278,12 @@ def word_vec(self, word, use_norm=False): """ if word in self.vocab: if use_norm: - return self.syn0norm[self.vocab[word].index] + result = self.syn0norm[self.vocab[word].index] else: - return self.syn0[self.vocab[word].index] - else: - raise KeyError("word '%s' not in vocabulary" % word) + result = self.syn0[self.vocab[word].index] + + result.setflag(write=False) + return result def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None): """ From 7e7778caded7a922f67c248ff0fdeb04101303be Mon Sep 17 00:00:00 2001 From: CLearERR Date: Sun, 29 Oct 2017 22:12:49 +0500 Subject: [PATCH 2/5] fix for previous fix --- gensim/models/keyedvectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 9897c5f325..4476b8aea2 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -282,7 +282,7 @@ def word_vec(self, word, use_norm=False): else: result = self.syn0[self.vocab[word].index] - result.setflag(write=False) + result.setflags(write=False) return result def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None): From 42f480a29cb03e1109cdd7fd5636cff7d19b46d7 Mon Sep 17 00:00:00 2001 From: CLearERR Date: Mon, 30 Oct 2017 00:49:03 +0500 Subject: [PATCH 3/5] Added negative testfor word_vec, also returned not-a-word check --- gensim/models/keyedvectors.py | 2 ++ gensim/test/test_doc2vec.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 4476b8aea2..183f45ada3 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -284,6 +284,8 @@ def word_vec(self, word, use_norm=False): result.setflags(write=False) return result + else: + raise KeyError("word '%s' not in vocabulary" % word) def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None): """ diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 4f0d4865f5..26b46772b1 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -386,6 +386,15 @@ def test_delete_temporary_training_data(self): self.model_sanity(model, keep_training=False) self.assertTrue(hasattr(model, 'syn1neg')) + def test_word_vec(self): + model = keyedvectors.KeyedVectors.load_word2vec_format(datapath('word2vec_pre_kv_c')) + vector = model['says'] + with self.assertRaises(ValueError): + vector *= 0 + + + + @log_capture() def testBuildVocabWarning(self, l): """Test if logger warning is raised on non-ideal input to a doc2vec model""" From f467a9f486dcdb7e9f761783bef2dda5d85ae033 Mon Sep 17 00:00:00 2001 From: Menshikh Ivan Date: Mon, 30 Oct 2017 00:51:45 +0500 Subject: [PATCH 4/5] fix PEP8 --- gensim/test/test_doc2vec.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 26b46772b1..7e5d4b1b17 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -392,9 +392,6 @@ def test_word_vec(self): with self.assertRaises(ValueError): vector *= 0 - - - @log_capture() def testBuildVocabWarning(self, l): """Test if logger warning is raised on non-ideal input to a doc2vec model""" From b042209069f71939ff01a8856d6c5d21a52acf0d Mon Sep 17 00:00:00 2001 From: Menshikh Ivan Date: Mon, 30 Oct 2017 01:27:36 +0500 Subject: [PATCH 5/5] Change test name --- gensim/test/test_doc2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 7e5d4b1b17..6feeab3bd2 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -386,7 +386,7 @@ def test_delete_temporary_training_data(self): self.model_sanity(model, keep_training=False) self.assertTrue(hasattr(model, 'syn1neg')) - def test_word_vec(self): + def test_word_vec_non_writeable(self): model = keyedvectors.KeyedVectors.load_word2vec_format(datapath('word2vec_pre_kv_c')) vector = model['says'] with self.assertRaises(ValueError):