-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
failed to cast tokenizer
#32
Comments
Using Hugot as the library to generate embeddings. |
Debug logs with fill Rust backtrace: $ RUST_BACKTRACE=full CGO_LDFLAGS="-L/usr/lib/" go test -v ./internal/embed
=== RUN TestConfigOptions
=== RUN TestConfigOptions/with_custom_model_dir
=== RUN TestConfigOptions/with_custom_model_name
=== RUN TestConfigOptions/with_both_options
--- PASS: TestConfigOptions (0.00s)
--- PASS: TestConfigOptions/with_custom_model_dir (0.00s)
--- PASS: TestConfigOptions/with_custom_model_name (0.00s)
--- PASS: TestConfigOptions/with_both_options (0.00s)
=== RUN TestEmbedder
=== RUN TestEmbedder/initialisation
=== RUN TestEmbedder/singleton_pattern
=== RUN TestEmbedder/embedding_generation
thread '<unnamed>' panicked at 'failed to cast tokenizer', src/lib.rs:86:54
stack backtrace:
0: 0xc9a6a1 - <unknown>
1: 0xcf7f2f - <unknown>
2: 0xc8f1c1 - <unknown>
3: 0xc9a4b5 - <unknown>
4: 0xc9d177 - <unknown>
5: 0xc9cf64 - <unknown>
6: 0xc9d6ec - <unknown>
7: 0xc9d5e7 - <unknown>
8: 0xc9aad6 - <unknown>
9: 0xc9d332 - <unknown>
10: 0x422363 - <unknown>
11: 0x422323 - <unknown>
12: 0x97638e - <unknown>
13: 0x96d3b8 - <unknown>
14: 0x49e2e4 - <unknown>
fatal runtime error: failed to initiate panic, error 5
SIGABRT: abort
PC=0x74a06aa9eb1c m=8 sigcode=18446744073709551610
signal arrived during cgo execution
goroutine 121 gp=0xc000104fc0 m=8 mp=0xc000580008 [syscall]:
runtime.cgocall(0x96d380, 0xc0000b1688)
/usr/local/go/src/runtime/cgocall.go:167 +0x4b fp=0xc0000b1660 sp=0xc0000b1628 pc=0x49002b
github.com/daulet/tokenizers._Cfunc_encode(0x0, 0x749ff413a6d0, 0xc000598820)
_cgo_gotypes.go:160 +0x6b fp=0xc0000b1688 sp=0xc0000b1660 pc=0x91d18b
github.com/daulet/tokenizers.(*Tokenizer).EncodeWithOptions.func2(0x0?, 0x749ff413a6d0, 0xc0005988
20)
/home/fred/go/pkg/mod/github.com/daulet/tokenizers@v1.20.2/tokenizer.go:368 +0x91 fp=0xc00
00b1728 sp=0xc0000b1688 pc=0x91e0f1
github.com/daulet/tokenizers.(*Tokenizer).EncodeWithOptions(0xc0000a00a0, {0xeeb462?, 0x44543c?},
0x1, {0xc00013c100, 0x3, 0x432ebe?})
/home/fred/go/pkg/mod/github.com/daulet/tokenizers@v1.20.2/tokenizer.go:368 +0x12d fp=0xc0
000b1910 sp=0xc0000b1728 pc=0x91d94d
github.com/knights-analytics/hugot/pipelineBackends.tokenizeInputsRust(0xc0000b1d08, 0xc000206450,
{0xc0000b1ed0, 0x1, 0x9214c6?})
/home/fred/go/pkg/mod/github.com/knights-analytics/hugot@v0.3.0/pipelineBackends/tokenizer
_rust.go:55 +0x134 fp=0xc0000b1b88 sp=0xc0000b1910 pc=0x921934
github.com/knights-analytics/hugot/pipelineBackends.TokenizeInputs(...)
/home/fred/go/pkg/mod/github.com/knights-analytics/hugot@v0.3.0/pipelineBackends/tokenizer
.go:34
github.com/knights-analytics/hugot/pipelines.(*FeatureExtractionPipeline).Preprocess(0xc0004460a0,
0xc0000b1d08, {0xc0000b1ed0, 0x1, 0x1})
/home/fred/go/pkg/mod/github.com/knights-analytics/hugot@v0.3.0/pipelines/featureExtractio
n.go:151 +0x7e fp=0xc0000b1bd8 sp=0xc0000b1b88 pc=0x95399e
github.com/knights-analytics/hugot/pipelines.(*FeatureExtractionPipeline).RunPipeline(0xc0004460a0
, {0xc0002536d0?, 0x53fd5e?, 0xc000204820?})
/home/fred/go/pkg/mod/github.com/knights-analytics/hugot@v0.3.0/pipelines/featureExtractio
n.go:235 +0x11e fp=0xc0000b1da0 sp=0xc0000b1bd8 pc=0x95425e
github.com/Predixus/DynaRAG/internal/embed.(*Embedder).GetEmbeddings(0xfcfc78?, {0xc0002536d0?, 0x
0?, 0x0?})
/home/fred/git/predixus/DynaRAG/internal/embed/main.go:146 +0xc5 fp=0xc0000b1e18 sp=0xc000
0b1da0 pc=0x968905
github.com/Predixus/DynaRAG/internal/embed.TestEmbedder.func4(0xc000204820)
/home/fred/git/predixus/DynaRAG/internal/embed/main_test.go:168 +0x29e fp=0xc0000b1f70 sp=
0xc0000b1e18 pc=0x9698de
testing.tRunner(0xc000204820, 0xc0002b00d8)
/usr/local/go/src/testing/testing.go:1690 +0xf4 fp=0xc0000b1fc0 sp=0xc0000b1f70 pc=0x53b1f
4
testing.(*T).Run.gowrap1()
/usr/local/go/src/testing/testing.go:1743 +0x25 fp=0xc0000b1fe0 sp=0xc0000b1fc0 pc=0x53c1e
5
runtime.goexit({})
/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc0000b1fe8 sp=0xc0000b1fe0 pc=0x49e66
1
created by testing.(*T).Run in goroutine 24
/usr/local/go/src/testing/testing.go:1743 +0x390
goroutine 1 gp=0xc0000061c0 m=nil [chan receive]:
runtime.gopark(0x210340?, 0x74a0240c1688?, 0x18?, 0x0?, 0xe51d20?)
/usr/local/go/src/runtime/proc.go:424 +0xce fp=0xc00018f9c8 sp=0xc00018f9a8 pc=0x49650e
runtime.chanrecv(0xc000112a10, 0xc00018faaf, 0x1)
/usr/local/go/src/runtime/chan.go:639 +0x41c fp=0xc00018fa40 sp=0xc00018f9c8 pc=0x42c7bc
runtime.chanrecv1(0x15dfd00?, 0xe09ce0?)
/usr/local/go/src/runtime/chan.go:489 +0x12 fp=0xc00018fa68 sp=0xc00018fa40 pc=0x42c372
testing.(*T).Run(0xc0002044e0, {0xeeab88?, 0x0?}, 0xf2b0f8)
/usr/local/go/src/testing/testing.go:1751 +0x3ab fp=0xc00018fb28 sp=0xc00018fa68 pc=0x53c0
8b
testing.runTests.func1(0xc0002044e0)
/usr/local/go/src/testing/testing.go:2168 +0x37 fp=0xc00018fb68 sp=0xc00018fb28 pc=0x53e35
7
testing.tRunner(0xc0002044e0, 0xc00018fc70)
/usr/local/go/src/testing/testing.go:1690 +0xf4 fp=0xc00018fbb8 sp=0xc00018fb68 pc=0x53b1f
4
testing.runTests(0xc0001387f8, {0x159bf80, 0x2, 0x2}, {0x494390?, 0x493ffa?, 0x15e05a0?})
/usr/local/go/src/testing/testing.go:2166 +0x43d fp=0xc00018fca0 sp=0xc00018fbb8 pc=0x53e2
3d
testing.(*M).Run(0xc0001db900)
/usr/local/go/src/testing/testing.go:2034 +0x64a fp=0xc00018fed0 sp=0xc00018fca0 pc=0x53cc
6a
main.main()
_testmain.go:47 +0x9b fp=0xc00018ff50 sp=0xc00018fed0 pc=0x96b4bb
runtime.main()
/usr/local/go/src/runtime/proc.go:272 +0x28b fp=0xc00018ffe0 sp=0xc00018ff50 pc=0x46052b
runtime.goexit({})
/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc00018ffe8 sp=0xc00018ffe0 pc=0x49e66
1
goroutine 2 gp=0xc000006c40 m=nil [force gc (idle)]:
|
It's finding the file just fine. Seems to have an issue with parsing it. |
@frederickmannings can you share the config? perhaps minimized version if you can reduce it to essential repro? |
Thanks @daulet for replying. I will try and shrink this down to the minimal example. |
# for free
to join this conversation on GitHub.
Already have an account?
# to comment
Strange behaviour when trying to run a huggingface embedding pipeline within a test, resulting in error:
The same process works just fine when running outside of tests. @daulet any ideas about what might be the issue here?
tokenizers.a
is present in /usr/lib/The text was updated successfully, but these errors were encountered: