From 3c1496d2bbeecfde1cb53b4bcf50c3ab9471d357 Mon Sep 17 00:00:00 2001 From: Denis Shulyaka Date: Sun, 12 Oct 2025 19:44:38 +0300 Subject: [PATCH] Add gpt-image-1-mini support (#154316) --- .../components/openai_conversation/ai_task.py | 10 ++++++++-- .../openai_conversation/config_flow.py | 15 +++++++++++++++ .../components/openai_conversation/const.py | 2 ++ .../components/openai_conversation/entity.py | 16 ++++++++++------ .../components/openai_conversation/strings.json | 4 ++++ .../openai_conversation/test_ai_task.py | 10 +++++++++- .../openai_conversation/test_config_flow.py | 2 ++ 7 files changed, 50 insertions(+), 9 deletions(-) diff --git a/homeassistant/components/openai_conversation/ai_task.py b/homeassistant/components/openai_conversation/ai_task.py index bc05671e48f..91933a36bb9 100644 --- a/homeassistant/components/openai_conversation/ai_task.py +++ b/homeassistant/components/openai_conversation/ai_task.py @@ -16,7 +16,13 @@ from homeassistant.exceptions import HomeAssistantError from homeassistant.helpers.entity_platform import AddConfigEntryEntitiesCallback from homeassistant.util.json import json_loads -from .const import CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL, UNSUPPORTED_IMAGE_MODELS +from .const import ( + CONF_CHAT_MODEL, + CONF_IMAGE_MODEL, + RECOMMENDED_CHAT_MODEL, + RECOMMENDED_IMAGE_MODEL, + UNSUPPORTED_IMAGE_MODELS, +) from .entity import OpenAIBaseLLMEntity if TYPE_CHECKING: @@ -142,7 +148,7 @@ class OpenAITaskEntity( mime_type=mime_type, width=int(width) if width else None, height=int(height) if height else None, - model="gpt-image-1", + model=self.subentry.data.get(CONF_IMAGE_MODEL, RECOMMENDED_IMAGE_MODEL), revised_prompt=image_call.revised_prompt if hasattr(image_call, "revised_prompt") else None, diff --git a/homeassistant/components/openai_conversation/config_flow.py b/homeassistant/components/openai_conversation/config_flow.py index e21005d8541..a9fdf5fd771 100644 --- a/homeassistant/components/openai_conversation/config_flow.py +++ b/homeassistant/components/openai_conversation/config_flow.py @@ -43,6 +43,7 @@ from homeassistant.helpers.typing import VolDictType from .const import ( CONF_CHAT_MODEL, CONF_CODE_INTERPRETER, + CONF_IMAGE_MODEL, CONF_MAX_TOKENS, CONF_PROMPT, CONF_REASONING_EFFORT, @@ -64,6 +65,7 @@ from .const import ( RECOMMENDED_CHAT_MODEL, RECOMMENDED_CODE_INTERPRETER, RECOMMENDED_CONVERSATION_OPTIONS, + RECOMMENDED_IMAGE_MODEL, RECOMMENDED_MAX_TOKENS, RECOMMENDED_REASONING_EFFORT, RECOMMENDED_TEMPERATURE, @@ -72,6 +74,7 @@ from .const import ( RECOMMENDED_WEB_SEARCH, RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE, RECOMMENDED_WEB_SEARCH_USER_LOCATION, + UNSUPPORTED_IMAGE_MODELS, UNSUPPORTED_MODELS, UNSUPPORTED_WEB_SEARCH_MODELS, ) @@ -411,6 +414,18 @@ class OpenAISubentryFlowHandler(ConfigSubentryFlow): ) } + if self._subentry_type == "ai_task_data" and not model.startswith( + tuple(UNSUPPORTED_IMAGE_MODELS) + ): + step_schema[ + vol.Optional(CONF_IMAGE_MODEL, default=RECOMMENDED_IMAGE_MODEL) + ] = SelectSelector( + SelectSelectorConfig( + options=["gpt-image-1", "gpt-image-1-mini"], + mode=SelectSelectorMode.DROPDOWN, + ) + ) + if user_input is not None: if user_input.get(CONF_WEB_SEARCH): if user_input.get(CONF_WEB_SEARCH_USER_LOCATION): diff --git a/homeassistant/components/openai_conversation/const.py b/homeassistant/components/openai_conversation/const.py index fda862e1dbe..9d936e03348 100644 --- a/homeassistant/components/openai_conversation/const.py +++ b/homeassistant/components/openai_conversation/const.py @@ -13,6 +13,7 @@ DEFAULT_AI_TASK_NAME = "OpenAI AI Task" DEFAULT_NAME = "OpenAI Conversation" CONF_CHAT_MODEL = "chat_model" +CONF_IMAGE_MODEL = "image_model" CONF_CODE_INTERPRETER = "code_interpreter" CONF_FILENAMES = "filenames" CONF_MAX_TOKENS = "max_tokens" @@ -31,6 +32,7 @@ CONF_WEB_SEARCH_COUNTRY = "country" CONF_WEB_SEARCH_TIMEZONE = "timezone" RECOMMENDED_CODE_INTERPRETER = False RECOMMENDED_CHAT_MODEL = "gpt-4o-mini" +RECOMMENDED_IMAGE_MODEL = "gpt-image-1" RECOMMENDED_MAX_TOKENS = 3000 RECOMMENDED_REASONING_EFFORT = "low" RECOMMENDED_TEMPERATURE = 1.0 diff --git a/homeassistant/components/openai_conversation/entity.py b/homeassistant/components/openai_conversation/entity.py index 0ff6e662918..845c8a7bfed 100644 --- a/homeassistant/components/openai_conversation/entity.py +++ b/homeassistant/components/openai_conversation/entity.py @@ -67,6 +67,7 @@ from homeassistant.util import slugify from .const import ( CONF_CHAT_MODEL, CONF_CODE_INTERPRETER, + CONF_IMAGE_MODEL, CONF_MAX_TOKENS, CONF_REASONING_EFFORT, CONF_TEMPERATURE, @@ -82,6 +83,7 @@ from .const import ( DOMAIN, LOGGER, RECOMMENDED_CHAT_MODEL, + RECOMMENDED_IMAGE_MODEL, RECOMMENDED_MAX_TOKENS, RECOMMENDED_REASONING_EFFORT, RECOMMENDED_TEMPERATURE, @@ -516,13 +518,15 @@ class OpenAIBaseLLMEntity(Entity): model_args.setdefault("include", []).append("code_interpreter_call.outputs") # type: ignore[union-attr] if force_image: - tools.append( - ImageGeneration( - type="image_generation", - input_fidelity="high", - output_format="png", - ) + image_model = options.get(CONF_IMAGE_MODEL, RECOMMENDED_IMAGE_MODEL) + image_tool = ImageGeneration( + type="image_generation", + model=image_model, + output_format="png", ) + if image_model == "gpt-image-1": + image_tool["input_fidelity"] = "high" + tools.append(image_tool) model_args["tool_choice"] = ToolChoiceTypesParam(type="image_generation") model_args["store"] = True # Avoid sending image data back and forth diff --git a/homeassistant/components/openai_conversation/strings.json b/homeassistant/components/openai_conversation/strings.json index 190e86e87b8..e5b3cb30646 100644 --- a/homeassistant/components/openai_conversation/strings.json +++ b/homeassistant/components/openai_conversation/strings.json @@ -50,6 +50,7 @@ "data": { "code_interpreter": "Enable code interpreter tool", "reasoning_effort": "Reasoning effort", + "image_model": "Image generation model", "web_search": "Enable web search", "search_context_size": "Search context size", "user_location": "Include home location" @@ -57,6 +58,7 @@ "data_description": { "code_interpreter": "This tool, also known as the python tool to the model, allows it to run code to answer questions", "reasoning_effort": "How many reasoning tokens the model should generate before creating a response to the prompt", + "image_model": "The model to use when generating images", "web_search": "Allow the model to search the web for the latest information before generating a response", "search_context_size": "High level guidance for the amount of context window space to use for the search", "user_location": "Refine search results based on geography" @@ -97,12 +99,14 @@ "title": "[%key:component::openai_conversation::config_subentries::conversation::step::model::title%]", "data": { "reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::reasoning_effort%]", + "image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::image_model%]", "web_search": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::web_search%]", "search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::search_context_size%]", "user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::user_location%]" }, "data_description": { "reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::reasoning_effort%]", + "image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::image_model%]", "web_search": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::web_search%]", "search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::search_context_size%]", "user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::user_location%]" diff --git a/tests/components/openai_conversation/test_ai_task.py b/tests/components/openai_conversation/test_ai_task.py index b9a69e5f77e..77d878bcfa1 100644 --- a/tests/components/openai_conversation/test_ai_task.py +++ b/tests/components/openai_conversation/test_ai_task.py @@ -213,12 +213,14 @@ async def test_generate_data_with_attachments( @pytest.mark.usefixtures("mock_init_component") @freeze_time("2025-06-14 22:59:00") +@pytest.mark.parametrize("image_model", ["gpt-image-1", "gpt-image-1-mini"]) async def test_generate_image( hass: HomeAssistant, mock_config_entry: MockConfigEntry, mock_create_stream: AsyncMock, entity_registry: er.EntityRegistry, issue_registry: ir.IssueRegistry, + image_model: str, ) -> None: """Test AI Task image generation.""" entity_id = "ai_task.openai_ai_task" @@ -232,6 +234,12 @@ async def test_generate_image( if entry.subentry_type == "ai_task_data" ) ) + hass.config_entries.async_update_subentry( + mock_config_entry, + ai_task_entry, + data={"image_model": image_model}, + ) + await hass.async_block_till_done() assert entity_entry is not None assert entity_entry.config_entry_id == mock_config_entry.entry_id assert entity_entry.config_subentry_id == ai_task_entry.subentry_id @@ -258,7 +266,7 @@ async def test_generate_image( assert result["width"] == 1536 assert result["revised_prompt"] == "Mock revised prompt." assert result["mime_type"] == "image/png" - assert result["model"] == "gpt-image-1" + assert result["model"] == image_model mock_upload_media.assert_called_once() image_data = mock_upload_media.call_args[0][1] diff --git a/tests/components/openai_conversation/test_config_flow.py b/tests/components/openai_conversation/test_config_flow.py index 4a87365a4a6..ce0a64ea718 100644 --- a/tests/components/openai_conversation/test_config_flow.py +++ b/tests/components/openai_conversation/test_config_flow.py @@ -14,6 +14,7 @@ from homeassistant.components.openai_conversation.config_flow import ( from homeassistant.components.openai_conversation.const import ( CONF_CHAT_MODEL, CONF_CODE_INTERPRETER, + CONF_IMAGE_MODEL, CONF_MAX_TOKENS, CONF_PROMPT, CONF_REASONING_EFFORT, @@ -917,6 +918,7 @@ async def test_creating_ai_task_subentry_advanced( assert result4.get("data") == { CONF_RECOMMENDED: False, CONF_CHAT_MODEL: "gpt-4o", + CONF_IMAGE_MODEL: "gpt-image-1", CONF_MAX_TOKENS: 200, CONF_TEMPERATURE: 0.5, CONF_TOP_P: 0.9,