diff --git a/python/src/sentencepiece/__init__.py b/python/src/sentencepiece/__init__.py index b5fe7c4c..5fd29684 100644 --- a/python/src/sentencepiece/__init__.py +++ b/python/src/sentencepiece/__init__.py @@ -116,6 +116,9 @@ def SampleEncodeAsIds(self, input, nbest_size, alpha): def DecodePieces(self, pieces): return _sentencepiece.SentencePieceProcessor_DecodePieces(self, pieces) + def DecodeIds(self, ids): + return _sentencepiece.SentencePieceProcessor_DecodeIds(self, ids) + def EncodeAsSerializedProto(self, input): return _sentencepiece.SentencePieceProcessor_EncodeAsSerializedProto(self, input) @@ -128,6 +131,9 @@ def NBestEncodeAsSerializedProto(self, input, nbest_size): def DecodePiecesAsSerializedProto(self, pieces): return _sentencepiece.SentencePieceProcessor_DecodePiecesAsSerializedProto(self, pieces) + def DecodeIdsAsSerializedProto(self, ids): + return _sentencepiece.SentencePieceProcessor_DecodeIdsAsSerializedProto(self, ids) + def GetPieceSize(self): return _sentencepiece.SentencePieceProcessor_GetPieceSize(self) @@ -312,7 +318,7 @@ def _decode(input): if not input: return self.DecodeIds([]) if type(input[0]) is int: - return self.DecodeId(input) + return self.DecodeIds(input) return self.DecodePieces(input) if type(input[0]) is list: diff --git a/python/src/sentencepiece/sentencepiece.i b/python/src/sentencepiece/sentencepiece.i index ef37ff9e..253d1b2e 100644 --- a/python/src/sentencepiece/sentencepiece.i +++ b/python/src/sentencepiece/sentencepiece.i @@ -176,8 +176,6 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { %ignore sentencepiece::SentencePieceProcessor::SampleEncode; %ignore sentencepiece::SentencePieceProcessor::NBestEncode; %ignore sentencepiece::SentencePieceProcessor::Decode; -%ignore sentencepiece::SentencePieceProcessor::DecodeIds; -%ignore sentencepiece::SentencePieceProcessor::DecodeIdsAsSerializedProto; %ignore sentencepiece::SentencePieceProcessor::model_proto; %ignore sentencepiece::SentencePieceProcessor::Load; %ignore sentencepiece::SentencePieceProcessor::LoadOrDie; @@ -341,7 +339,7 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { if not input: return self.DecodeIds([]) if type(input[0]) is int: - return self.DecodeId(input) + return self.DecodeIds(input) return self.DecodePieces(input) if type(input[0]) is list: diff --git a/python/src/sentencepiece/sentencepiece_wrap.cxx b/python/src/sentencepiece/sentencepiece_wrap.cxx index 7451d604..25388427 100644 --- a/python/src/sentencepiece/sentencepiece_wrap.cxx +++ b/python/src/sentencepiece/sentencepiece_wrap.cxx @@ -2698,9 +2698,10 @@ SWIGINTERN PyObject *SWIG_PyStaticMethod_New(PyObject *SWIGUNUSEDPARM(self), PyO #define SWIGTYPE_p_sentencepiece__SentencePieceTrainer swig_types[3] #define SWIGTYPE_p_std__string swig_types[4] #define SWIGTYPE_p_std__unordered_mapT_std__string_std__string_t swig_types[5] -#define SWIGTYPE_p_std__vectorT_std__string_t swig_types[6] -static swig_type_info *swig_types[8]; -static swig_module_info swig_module = {swig_types, 7, 0, 0, 0, 0}; +#define SWIGTYPE_p_std__vectorT_int_t swig_types[6] +#define SWIGTYPE_p_std__vectorT_std__string_t swig_types[7] +static swig_type_info *swig_types[9]; +static swig_module_info swig_module = {swig_types, 8, 0, 0, 0, 0}; #define SWIG_TypeQuery(name) SWIG_TypeQueryModule(&swig_module, &swig_module, name) #define SWIG_MangledTypeQuery(name) SWIG_MangledTypeQueryModule(&swig_module, &swig_module, name) @@ -4214,6 +4215,66 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePieces(PyObject *SWIGUNU } +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodeIds(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< int > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + std::string result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_DecodeIds", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_DecodeIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyInt_Check(o)) { + (*out)[i] = static_cast(PyInt_AsLong(o)); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = ((sentencepiece::SentencePieceProcessor const *)arg1)->DecodeIds((std::vector< int > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(result, input_type); + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; @@ -4424,6 +4485,65 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto( } +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodeIdsAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< int > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::util::bytes result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_DecodeIdsAsSerializedProto", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_DecodeIdsAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyInt_Check(o)) { + (*out)[i] = static_cast(PyInt_AsLong(o)); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = ((sentencepiece::SentencePieceProcessor const *)arg1)->DecodeIdsAsSerializedProto((std::vector< int > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = MakePyOutputBytes(result); + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetPieceSize(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; @@ -5277,10 +5397,12 @@ static PyMethodDef SwigMethods[] = { { "SentencePieceProcessor_SampleEncodeAsPieces", _wrap_SentencePieceProcessor_SampleEncodeAsPieces, METH_VARARGS, NULL}, { "SentencePieceProcessor_SampleEncodeAsIds", _wrap_SentencePieceProcessor_SampleEncodeAsIds, METH_VARARGS, NULL}, { "SentencePieceProcessor_DecodePieces", _wrap_SentencePieceProcessor_DecodePieces, METH_VARARGS, NULL}, + { "SentencePieceProcessor_DecodeIds", _wrap_SentencePieceProcessor_DecodeIds, METH_VARARGS, NULL}, { "SentencePieceProcessor_EncodeAsSerializedProto", _wrap_SentencePieceProcessor_EncodeAsSerializedProto, METH_VARARGS, NULL}, { "SentencePieceProcessor_SampleEncodeAsSerializedProto", _wrap_SentencePieceProcessor_SampleEncodeAsSerializedProto, METH_VARARGS, NULL}, { "SentencePieceProcessor_NBestEncodeAsSerializedProto", _wrap_SentencePieceProcessor_NBestEncodeAsSerializedProto, METH_VARARGS, NULL}, { "SentencePieceProcessor_DecodePiecesAsSerializedProto", _wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor_DecodeIdsAsSerializedProto", _wrap_SentencePieceProcessor_DecodeIdsAsSerializedProto, METH_VARARGS, NULL}, { "SentencePieceProcessor_GetPieceSize", _wrap_SentencePieceProcessor_GetPieceSize, METH_O, NULL}, { "SentencePieceProcessor_PieceToId", _wrap_SentencePieceProcessor_PieceToId, METH_VARARGS, NULL}, { "SentencePieceProcessor_IdToPiece", _wrap_SentencePieceProcessor_IdToPiece, METH_VARARGS, NULL}, @@ -5320,6 +5442,7 @@ static swig_type_info _swigt__p_sentencepiece__SentencePieceProcessor = {"_p_sen static swig_type_info _swigt__p_sentencepiece__SentencePieceTrainer = {"_p_sentencepiece__SentencePieceTrainer", "sentencepiece::SentencePieceTrainer *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__string = {"_p_std__string", "sentencepiece::util::bytes *|std::string *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__unordered_mapT_std__string_std__string_t = {"_p_std__unordered_mapT_std__string_std__string_t", "std::unordered_map< std::string,std::string > *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_std__vectorT_int_t = {"_p_std__vectorT_int_t", "std::vector< int > *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__vectorT_std__string_t = {"_p_std__vectorT_std__string_t", "std::vector< std::string > *", 0, 0, (void*)0, 0}; static swig_type_info *swig_type_initial[] = { @@ -5329,6 +5452,7 @@ static swig_type_info *swig_type_initial[] = { &_swigt__p_sentencepiece__SentencePieceTrainer, &_swigt__p_std__string, &_swigt__p_std__unordered_mapT_std__string_std__string_t, + &_swigt__p_std__vectorT_int_t, &_swigt__p_std__vectorT_std__string_t, }; @@ -5338,6 +5462,7 @@ static swig_cast_info _swigc__p_sentencepiece__SentencePieceProcessor[] = { {&_ static swig_cast_info _swigc__p_sentencepiece__SentencePieceTrainer[] = { {&_swigt__p_sentencepiece__SentencePieceTrainer, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__string[] = { {&_swigt__p_std__string, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__unordered_mapT_std__string_std__string_t[] = { {&_swigt__p_std__unordered_mapT_std__string_std__string_t, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_std__vectorT_int_t[] = { {&_swigt__p_std__vectorT_int_t, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__vectorT_std__string_t[] = { {&_swigt__p_std__vectorT_std__string_t, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info *swig_cast_initial[] = { @@ -5347,6 +5472,7 @@ static swig_cast_info *swig_cast_initial[] = { _swigc__p_sentencepiece__SentencePieceTrainer, _swigc__p_std__string, _swigc__p_std__unordered_mapT_std__string_std__string_t, + _swigc__p_std__vectorT_int_t, _swigc__p_std__vectorT_std__string_t, };