From 3bdb8f2419075858ef8b922a094f875fe46e830f Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Fri, 27 Mar 2026 08:30:13 +0100 Subject: [PATCH] fix: support gpt-5 token parameter handling --- README.md | 1 + docs/TARGET_STATE_AND_ROADMAP.md | 1 + internal/domain/llm_settings.go | 2 + internal/llmruntime/runtime.go | 17 +++++- internal/llmruntime/runtime_test.go | 55 ++++++++++++++++--- .../provider_suggestion_generator_test.go | 9 ++- 6 files changed, 74 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 564648e..4e5d2c6 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Die App kann heute: - Globalen Master-Prompt in Settings pflegen sowie Prompt-Bloecke fuer den spaeteren LLM-Flow als Standard konfigurieren. - Im Settings-/Config-Bereich die LLM-Basiskonfiguration pflegen: aktiver Provider, aktives Modell (provider-aware statische Auswahlliste), Base URL fuer Ollama/kompatible Endpoints, Temperature/Max Tokens sowie getrennte API-Key-Speicher je Provider (OpenAI, Anthropic, Google, xAI, Ollama). - LLM-Provider-Konfiguration in Settings per leichtgewichtigem Validate-Action pruefen (aktiver Provider/Modell/Key/Base URL via kurzem Runtime-Request). +- OpenAI-kompatible Runtime-Requests waehlen den Token-Limit-Parameter intern modellkompatibel (`max_completion_tokens` fuer OpenAI GPT-5-Modelle, sonst `max_tokens`), inkl. Settings-Validate-Action. - Im Draft-/Build-UI den User-Flow auf Stammdaten, Intake-/Website-Kontext, Stil-Auswahl und Template-Felder fokussieren; Prompt-Interna liegen in Settings. - Interne semantische Zielslots (z. B. `hero.title`, `service_items[n].description`) auf Template-Felder abbilden als Vorbereitung fuer spaeteren LLM-Autofill. - Repeated-Bereiche in semantischen Slots werden block-/rollenbasiert getrennt (z. B. Services/Team/Testimonials pro Item statt Sammel-Slot). diff --git a/docs/TARGET_STATE_AND_ROADMAP.md b/docs/TARGET_STATE_AND_ROADMAP.md index e29e3bf..8801d08 100644 --- a/docs/TARGET_STATE_AND_ROADMAP.md +++ b/docs/TARGET_STATE_AND_ROADMAP.md @@ -43,6 +43,7 @@ Aktueller Stand: - Repeated-Sektionen (u. a. Services/Team/Testimonials) werden in der Slot-Vorschau block- und rollentypisch pro Item getrennt statt in Sammel-Slots zusammenzufallen. - LLM-first Suggestion-State fuer Draft-/Build-UI ist vorhanden: Vorschlaege werden separat von Feldwerten gespeichert und per Generate/Regenerate/Apply (global und per Feld) explizit gesteuert; Rule-based bleibt als letzter Fallback/Testpfad aktiv. - Provider-aware Suggestion-Runtime ist aktiv: Settings (`llm_active_provider`, `llm_active_model`, `llm_temperature`, `llm_max_tokens`, provider-spezifischer API-Key, `llm_base_url` fuer Ollama/kompatible Endpoints) steuern den primaeren Laufzeitpfad; der bestehende QC-Pfad bleibt als Kompatibilitaetsfallback erhalten. +- OpenAI-kompatible Requests nutzen intern modellabhaengig den passenden Token-Limit-Parameter (`max_completion_tokens` fuer OpenAI GPT-5-Modelle, ansonsten `max_tokens`), auch im Settings-Validate-Pfad. - Settings enthalten einen leichtgewichtigen Validate-Action fuer die aktive Provider-Konfiguration (kurzer Runtime-Check), ohne den Draft-/Review-Flow zu umgehen. - Modellauswahl ist provider-aware statisch umgesetzt und so strukturiert, dass spaeter dynamische Model-Listen/Refresh anschliessbar sind. - Technische Felddetails (z. B. Feldpfade/Slots/Suggestion-Metadaten) sind im UI per Debug-Toggle optional einblendbar. diff --git a/internal/domain/llm_settings.go b/internal/domain/llm_settings.go index 3c9c94a..1b96c3c 100644 --- a/internal/domain/llm_settings.go +++ b/internal/domain/llm_settings.go @@ -43,6 +43,8 @@ func LLMProviderOptions() []LLMProviderOption { Models: []LLMModelOption{ {Value: "gpt-5.2", Label: "gpt-5.2"}, {Value: "gpt-5.4", Label: "gpt-5.4"}, + {Value: "gpt-5.4-mini", Label: "gpt-5.4-mini"}, + {Value: "gpt-5.4-nano", Label: "gpt-5.4-nano"}, }, }, { diff --git a/internal/llmruntime/runtime.go b/internal/llmruntime/runtime.go index b815f97..f687684 100644 --- a/internal/llmruntime/runtime.go +++ b/internal/llmruntime/runtime.go @@ -74,12 +74,12 @@ func (c *openAICompatibleClient) Generate(ctx context.Context, req Request) (str payload := map[string]any{ "model": strings.TrimSpace(req.Model), "temperature": optionalFloat64(req.Temperature, 0), - "max_tokens": optionalInt(req.MaxTokens, 1200), "messages": []map[string]string{ {"role": "system", "content": strings.TrimSpace(req.SystemPrompt)}, {"role": "user", "content": strings.TrimSpace(req.UserPrompt)}, }, } + payload[openAICompatibleMaxTokensField(req.Provider, req.Model)] = optionalInt(req.MaxTokens, 1200) body, err := doJSON(ctx, c.httpClient, http.MethodPost, baseURL+"/v1/chat/completions", req.APIKey, nil, payload) if err != nil { @@ -263,6 +263,21 @@ func optionalInt(value *int, fallback int) int { return *value } +func openAICompatibleMaxTokensField(provider, model string) string { + if isOpenAIGPT5Model(provider, model) { + return "max_completion_tokens" + } + return "max_tokens" +} + +func isOpenAIGPT5Model(provider, model string) bool { + if !strings.EqualFold(strings.TrimSpace(provider), "openai") { + return false + } + normalizedModel := strings.ToLower(strings.TrimSpace(model)) + return strings.HasPrefix(normalizedModel, "gpt-5") +} + func trimProviderErrorMessage(respBody []byte) string { message := extractProviderErrorMessage(respBody) if len(message) > 500 { diff --git a/internal/llmruntime/runtime_test.go b/internal/llmruntime/runtime_test.go index 736f852..1d570f0 100644 --- a/internal/llmruntime/runtime_test.go +++ b/internal/llmruntime/runtime_test.go @@ -13,10 +13,7 @@ import ( func TestOpenAICompatibleClient_ForwardsTemperatureAndMaxTokens(t *testing.T) { t.Parallel() - var got struct { - Temperature float64 `json:"temperature"` - MaxTokens int `json:"max_tokens"` - } + var got map[string]any server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewDecoder(r.Body).Decode(&got) _, _ = w.Write([]byte(`{"choices":[{"message":{"content":"ok"}}]}`)) @@ -43,11 +40,53 @@ func TestOpenAICompatibleClient_ForwardsTemperatureAndMaxTokens(t *testing.T) { if err != nil { t.Fatalf("generate failed: %v", err) } - if got.Temperature != 0.77 { - t.Fatalf("unexpected temperature: %v", got.Temperature) + gotTemperature, _ := got["temperature"].(float64) + if gotTemperature != 0.77 { + t.Fatalf("unexpected temperature: %v", gotTemperature) + } + if _, exists := got["max_tokens"]; exists { + t.Fatalf("did not expect max_tokens for openai gpt-5 models") + } + gotMaxCompletionTokens, _ := got["max_completion_tokens"].(float64) + if gotMaxCompletionTokens != 777 { + t.Fatalf("unexpected max_completion_tokens: %v", gotMaxCompletionTokens) + } +} + +func TestOpenAICompatibleClient_UsesMaxTokensForOlderOpenAIModels(t *testing.T) { + t.Parallel() + + var got map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = json.NewDecoder(r.Body).Decode(&got) + _, _ = w.Write([]byte(`{"choices":[{"message":{"content":"ok"}}]}`)) + })) + defer server.Close() + + factory := NewFactory(2 * time.Second) + client, err := factory.ClientFor("openai") + if err != nil { + t.Fatalf("client creation failed: %v", err) + } + maxTokens := 512 + _, err = client.Generate(context.Background(), Request{ + Provider: "openai", + BaseURL: server.URL, + Model: "gpt-4.1", + APIKey: "key", + MaxTokens: &maxTokens, + SystemPrompt: "system", + UserPrompt: "user", + }) + if err != nil { + t.Fatalf("generate failed: %v", err) + } + if _, exists := got["max_completion_tokens"]; exists { + t.Fatalf("did not expect max_completion_tokens for non-gpt-5 model") } - if got.MaxTokens != 777 { - t.Fatalf("unexpected max tokens: %v", got.MaxTokens) + gotMaxTokens, _ := got["max_tokens"].(float64) + if gotMaxTokens != 512 { + t.Fatalf("unexpected max_tokens: %v", gotMaxTokens) } } diff --git a/internal/mapping/provider_suggestion_generator_test.go b/internal/mapping/provider_suggestion_generator_test.go index d0c9440..943723e 100644 --- a/internal/mapping/provider_suggestion_generator_test.go +++ b/internal/mapping/provider_suggestion_generator_test.go @@ -34,6 +34,7 @@ func TestProviderAwareSuggestionGenerator_UsesActiveProviderModelAndKey(t *testi gotModel string gotTemperature float64 gotMaxTokens float64 + gotMaxComp float64 ) server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { gotPath = r.URL.Path @@ -43,6 +44,7 @@ func TestProviderAwareSuggestionGenerator_UsesActiveProviderModelAndKey(t *testi gotModel, _ = payload["model"].(string) gotTemperature, _ = payload["temperature"].(float64) gotMaxTokens, _ = payload["max_tokens"].(float64) + gotMaxComp, _ = payload["max_completion_tokens"].(float64) _, _ = w.Write([]byte(`{"choices":[{"message":{"content":"{\"suggestions\":[{\"fieldPath\":\"text.textTitle_m1710_1\",\"value\":\"Provider Hero\",\"reason\":\"focused hero\"}]}"}}]}`)) })) defer server.Close() @@ -84,8 +86,11 @@ func TestProviderAwareSuggestionGenerator_UsesActiveProviderModelAndKey(t *testi if gotTemperature != 0.65 { t.Fatalf("unexpected temperature: %v", gotTemperature) } - if gotMaxTokens != 333 { - t.Fatalf("unexpected max_tokens: %v", gotMaxTokens) + if gotMaxTokens != 0 { + t.Fatalf("unexpected max_tokens for openai gpt-5 model: %v", gotMaxTokens) + } + if gotMaxComp != 333 { + t.Fatalf("unexpected max_completion_tokens: %v", gotMaxComp) } }