umputun · umputun · Oct 20, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
@@ -95,7 +95,7 @@ Nothing needed to enable CAS integration, it is enabled by default. To disable i
 
 **OpenAI integration**
 
-Setting `--openai.token [$OPENAI_PROMPT]` enables OpenAI integration. All other parameters for OpenAI integration are optional and have reasonable defaults, for more details see [All Application Options](#all-application-options) section below.
+Setting `--openai.token [$OPENAI_TOKEN]` or `--openai.apibase [$OPENAI_API_BASE]` enables OpenAI integration. All other parameters for OpenAI integration are optional and have reasonable defaults, for more details see [All Application Options](#all-application-options) section below.
 
 To keep the number of calls low and the price manageable, the bot uses the following approach:
 
@@ -273,9 +273,10 @@ meta:
 
 openai:
       --openai.token=               openai token, disabled if not set [$OPENAI_TOKEN]
+      --openai.apibase=             custom openai API base, default is https://api.openai.com/v1 [$OPENAI_API_BASE]
       --openai.veto                 veto mode, confirm detected spam [$OPENAI_VETO]
       --openai.prompt=              openai system prompt, if empty uses builtin default [$OPENAI_PROMPT]
-      --openai.model=               openai model (default: gpt-4) [$OPENAI_MODEL]
+      --openai.model=               openai model (default: gpt-4o-mini) [$OPENAI_MODEL]
       --openai.max-tokens-response= openai max tokens in response (default: 1024) [$OPENAI_MAX_TOKENS_RESPONSE]
       --openai.max-tokens-request=  openai max tokens in request (default: 2048) [$OPENAI_MAX_TOKENS_REQUEST]
       --openai.max-symbols-request= openai max symbols in request, failback if tokenizer failed (default: 16000) [$OPENAI_MAX_SYMBOLS_REQUEST]
@@ -344,7 +345,7 @@ Pls note: Missed spam messages forwarded to the admin chat will be removed from
 
 The bot can be run with a webapi server. This is useful for integration with other tools. The server is disabled by default, to enable it pass `--server.enabled [$SERVER_ENABLED]`. The server will listen on the port specified by `--server.listen [$SERVER_LISTEN]` parameter (default is `:8080`).
 
-By default, the server is protected by basic auth with user `tg-bot` and randomly generated password. This password is printed to the console on startup. If user wants to set a custom auth password, it can be done with `--server.auth [$SERVER_AUTH]` parameter. Setting it to empty string will disable basic auth protection.
+By default, the server is protected by basic auth with user `tg-spam` and randomly generated password. This password is printed to the console on startup. If user wants to set a custom auth password, it can be done with `--server.auth [$SERVER_AUTH]` parameter. Setting it to empty string will disable basic auth protection.
 
 It is truly a **bad idea** to run the server without basic auth protection, as it allows adding/removing users and updating spam samples to anyone who knows the endpoint. The only reason to run it without protection is inside the trusted network or for testing purposes.  Exposing the server directly to the internet is not recommended either, as basic auth is not secure enough if used without SSL. It is better to use a reverse proxy with TLS termination in front of the server.
 
@@ -470,7 +471,7 @@ A small utility and docker container provided to update spam and ham samples fro
 
 It also has an example of [docker-compose.yml](https://github.com/umputun/tg-spam/tree/master/updater/docker-compose.yml) to run it as a container side-by-side with the bot.
 
-## Running tgspam for multiple groups
+## Running tg-spam for multiple groups
 
 It is not possible to run the bot for multiple groups, as the bot is designed to work with a single group only. However, it is possible to run multiple instances of the bot with different tokens and different groups. Note: it has to have a token per bot, because TG doesn't allow using the same token for multiple bots at the same time, and such a reuse attempt will prevent the bot from working properly.
 

@@ -73,9 +73,10 @@ type options struct {
 
 	OpenAI struct {
 		Token                            string `long:"token" env:"TOKEN" description:"openai token, disabled if not set"`
+		APIBase                          string `long:"apibase" env:"API_BASE" description:"custom openai API base, default is https://api.openai.com/v1"`
 		Veto                             bool   `long:"veto" env:"VETO" description:"veto mode, confirm detected spam"`
 		Prompt                           string `long:"prompt" env:"PROMPT" default:"" description:"openai system prompt, if empty uses builtin default"`
-		Model                            string `long:"model" env:"MODEL" default:"gpt-4" description:"openai model"`
+		Model                            string `long:"model" env:"MODEL" default:"gpt-4o-mini" description:"openai model"`
 		MaxTokensResponse                int    `long:"max-tokens-response" env:"MAX_TOKENS_RESPONSE" default:"1024" description:"openai max tokens in response"`
 		MaxTokensRequestMaxTokensRequest int    `long:"max-tokens-request" env:"MAX_TOKENS_REQUEST" default:"2048" description:"openai max tokens in request"`
 		MaxSymbolsRequest                int    `long:"max-symbols-request" env:"MAX_SYMBOLS_REQUEST" default:"16000" description:"openai max symbols in request, failback if tokenizer failed"`
@@ -360,7 +361,7 @@ func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc *
 		MetaLinksOnly:           opts.Meta.LinksOnly,
 		MetaImageOnly:           opts.Meta.ImageOnly,
 		MultiLangLimit:          opts.MultiLangWords,
-		OpenAIEnabled:           opts.OpenAI.Token != "",
+		OpenAIEnabled:           opts.OpenAI.Token != "" || opts.OpenAI.APIBase != "",
 		SamplesDataPath:         opts.Files.SamplesDataPath,
 		DynamicDataPath:         opts.Files.DynamicDataPath,
 		WatchIntervalSecs:       int(opts.Files.WatchInterval.Seconds()),
@@ -423,7 +424,7 @@ func makeDetector(opts options) *tgspam.Detector {
 	detector := tgspam.NewDetector(detectorConfig)
 	log.Printf("[DEBUG] detector config: %+v", detectorConfig)
 
-	if opts.OpenAI.Token != "" {
+	if opts.OpenAI.Token != "" || opts.OpenAI.APIBase != "" {
 		log.Printf("[WARN] openai enabled")
 		openAIConfig := tgspam.OpenAIConfig{
 			SystemPrompt:      opts.OpenAI.Prompt,
@@ -433,7 +434,9 @@ func makeDetector(opts options) *tgspam.Detector {
 			MaxSymbolsRequest: opts.OpenAI.MaxSymbolsRequest,
 		}
 		log.Printf("[DEBUG] openai  config: %+v", openAIConfig)
-		detector.WithOpenAIChecker(openai.NewClient(opts.OpenAI.Token), openAIConfig)
+		config := openai.DefaultConfig(opts.OpenAI.Token)
+		config.BaseURL = opts.OpenAI.APIBase
+		detector.WithOpenAIChecker(openai.NewClientWithConfig(config), openAIConfig)
 	}
 
 	metaChecks := []tgspam.MetaCheck{}

@@ -58,7 +58,7 @@ func newOpenAIChecker(client openAIClient, params OpenAIConfig) *openAIChecker {
 		params.MaxSymbolsRequest = 8192
 	}
 	if params.Model == "" {
-		params.Model = "gpt-4"
+		params.Model = "gpt-4o-mini"
 	}
 	return &openAIChecker{client: client, params: params}
 }

@@ -27,7 +27,7 @@ func TestOpenAIChecker_Check(t *testing.T) {
 		MaxTokensResponse: 300,
 		MaxTokensRequest:  3000,
 		MaxSymbolsRequest: 12000,
-		Model:             "gpt-4",
+		Model:             "gpt-4o-mini",
 	})
 
 	t.Run("spam response", func(t *testing.T) {