crmne · rhys117 · May 14, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
@@ -22,7 +22,9 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
-      @temperature = 0.7
+      @thinking = @config.default_thinking
+      @thinking_budget = @config.default_thinking_budget
+      @temperature = @config.default_temperature
       @messages = []
       @tools = {}
       @on = {
@@ -63,6 +65,8 @@ def with_tools(*tools)
     def with_model(model_id, provider: nil, assume_exists: false)
       @model, @provider = Models.resolve(model_id, provider:, assume_exists:)
       @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)
+      # TODO: Currently the unsupported errors will not retrigger after model reassignment.
+
       self
     end
 
@@ -71,6 +75,18 @@ def with_temperature(temperature)
       self
     end
 
+    def with_thinking(thinking: true, budget: nil, temperature: 1)
+      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && [email protected]?
+
+      @thinking = thinking
+
+      # Most thinking models require set temperature so force it 1 here, however allowing override via param.
+      @temperature = temperature
+      @thinking_budget = budget if budget
+
+      self
+    end
+
     def with_context(context)
       @context = context
       @config = context.config
@@ -98,6 +114,8 @@ def complete(&)
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
+        thinking: @thinking,
+        thinking_budget: @thinking_budget,
         connection: @connection,
         &wrap_streaming_block(&)
       )
@@ -123,6 +141,10 @@ def reset_messages!
       @messages.clear
     end
 
+    def thinking?
+      @thinking
+    end
+
     private
 
     def wrap_streaming_block(&block)

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
@@ -28,6 +28,10 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
+                  # Default model settings
+                  :default_temperature,
+                  :default_thinking,
+                  :default_thinking_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -55,6 +59,11 @@ def initialize
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
 
+      # Default model settings
+      @default_thinking = false
+      @default_thinking_budget = 2048
+      @default_temperature = 0.7
+
       # Logging configuration
       @log_file = $stdout
       @log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO

diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb
@@ -25,6 +25,7 @@ class InvalidRoleError < StandardError; end
   class ModelNotFoundError < StandardError; end
   class UnsupportedFunctionsError < StandardError; end
   class UnsupportedAttachmentError < StandardError; end
+  class UnsupportedThinkingError < StandardError; end
 
   # Error classes for different HTTP status codes
   class BadRequestError < Error; end

diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
@@ -7,11 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking
 
     def initialize(options = {})
       @role = options.fetch(:role).to_sym
       @content = normalize_content(options.fetch(:content))
+      @thinking = options[:thinking]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]

diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
@@ -35,7 +35,7 @@ def supports?(capability)
         capabilities.include?(capability.to_s)
       end
 
-      %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
+      %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
         define_method "#{cap}?" do
           supports?(cap)
         end

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -182,7 +182,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -317,7 +318,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -349,7 +351,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -1754,7 +1757,7 @@
       "streaming",
       "function_calling",
       "structured_output",
-      "reasoning",
+      "thinking",
       "batch",
       "citations"
     ],
@@ -1807,7 +1810,8 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output"
+      "structured_output",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -1858,7 +1862,8 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output"
+      "structured_output",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -27601,4 +27606,4 @@
       ]
     }
   }
-]
+]
diff --git a/lib/ruby_llm/models.rb b/lib/ruby_llm/models.rb
@@ -57,7 +57,7 @@ def resolve(model_id, provider: nil, assume_exists: false) # rubocop:disable Met
             id: model_id,
             name: model_id.gsub('-', ' ').capitalize,
             provider: provider.slug,
-            capabilities: %w[function_calling streaming],
+            capabilities: %w[function_calling streaming thinking],
             modalities: { input: %w[text image], output: %w[text] },
             metadata: { warning: 'Assuming model exists, capabilities may not be accurate' }
           )

diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
@@ -10,13 +10,15 @@ module Provider
     module Methods
       extend Streaming
 
-      def complete(messages, tools:, temperature:, model:, connection:, &)
+      def complete(messages, tools:, temperature:, model:, thinking:, thinking_budget:, connection:, &) # rubocop:disable Metrics/ParameterLists
         normalized_temperature = maybe_normalize_temperature(temperature, model)
 
         payload = render_payload(messages,
                                  tools: tools,
                                  temperature: normalized_temperature,
                                  model: model,
+                                 thinking: thinking,
+                                 thinking_budget: thinking_budget,
                                  stream: block_given?)
 
         if block_given?

diff --git a/lib/ruby_llm/providers/anthropic/capabilities.rb b/lib/ruby_llm/providers/anthropic/capabilities.rb
@@ -65,14 +65,16 @@ def supports_json_mode?(model_id)
         # @param model_id [String] the model identifier
         # @return [Boolean] true if the model supports extended thinking
         def supports_extended_thinking?(model_id)
-          model_id.match?(/claude-3-7-sonnet/)
+          model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/)
         end
 
         # Determines the model family for a given model ID
         # @param model_id [String] the model identifier
         # @return [Symbol] the model family identifier
         def model_family(model_id)
           case model_id
+          when /claude-sonnet-4/    then 'claude-sonnet-4'
+          when /claude-opus-4/      then 'claude-opus-4'
           when /claude-3-7-sonnet/  then 'claude-3-7-sonnet'
           when /claude-3-5-sonnet/  then 'claude-3-5-sonnet'
           when /claude-3-5-haiku/   then 'claude-3-5-haiku'
@@ -131,17 +133,17 @@ def capabilities_for(model_id)
           capabilities = ['streaming']
 
           # Function calling for Claude 3+
-          if model_id.match?(/claude-3/)
+          if model_id.match?(/claude-3|claude-sonnet-4|claude-opus-4/)
             capabilities << 'function_calling'
             capabilities << 'structured_output'
             capabilities << 'batch'
           end
 
-          # Extended thinking (reasoning) for Claude 3.7
-          capabilities << 'reasoning' if model_id.match?(/claude-3-7/)
+          # Extended thinking for Claude 3.7 and Claude 4
+          capabilities << 'thinking' if supports_extended_thinking?(model_id)
 
           # Citations
-          capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7/)
+          capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/)
 
           capabilities
         end
@@ -161,10 +163,10 @@ def pricing_for(model_id)
             output_per_million: prices[:output] * 0.5
           }
 
-          # Add reasoning output pricing for 3.7 models
-          if model_id.match?(/claude-3-7/)
-            standard_pricing[:reasoning_output_per_million] = prices[:output] * 2.5
-            batch_pricing[:reasoning_output_per_million] = prices[:output] * 1.25
+          # Add thinking output pricing for 3.7 and 4 models
+          if model_id.match?(/claude-3-7|claude-sonnet-4|claude-opus-4/)
+            standard_pricing[:thinking_output_per_million] = prices[:output] * 2.5
+            batch_pricing[:thinking_output_per_million] = prices[:output] * 1.25
           end
 
           {

diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,12 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Metrics/ParameterLists
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:)
+            add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:)
           end
         end
 
@@ -45,30 +45,45 @@ def build_base_payload(chat_messages, temperature, model, stream)
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:)
+        def add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
+          return unless thinking
+
+          payload[:thinking] = {
+            type: 'enabled',
+            budget_tokens: thinking_budget
+          }
         end
 
         def parse_completion_response(response)
           data = response.body
+          RubyLLM.logger.debug("Anthropic response: #{data}")
+
           content_blocks = data['content'] || []
 
+          thinking_content = extract_thinking_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use)
+          build_message(data, text_content, tool_use, thinking_content)
+        end
+
+        def extract_thinking_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thinking_blocks.map { |c| c['thinking'] }.join
         end
 
         def extract_text_content(blocks)
           text_blocks = blocks.select { |c| c['type'] == 'text' }
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use)
+        def build_message(data, content, tool_use, thinking_content)
           Message.new(
             role: :assistant,
             content: content,
+            thinking: thinking_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),

diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,6 +16,7 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
+            thinking: data.dig('delta', 'thinking'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             tool_calls: extract_tool_calls(data)

diff --git a/lib/ruby_llm/providers/bedrock/capabilities.rb b/lib/ruby_llm/providers/bedrock/capabilities.rb
@@ -108,6 +108,10 @@ def supports_structured_output?(model_id)
           model_id.match?(/anthropic\.claude/)
         end
 
+        def supports_extended_thinking?(model_id)
+          model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/)
+        end
+
         # Model family patterns for capability lookup
         MODEL_FAMILIES = {
           /anthropic\.claude-3-opus/ => :claude3_opus,
@@ -117,7 +121,9 @@ def supports_structured_output?(model_id)
           /anthropic\.claude-3-haiku/ => :claude3_haiku,
           /anthropic\.claude-3-5-haiku/ => :claude3_5_haiku,
           /anthropic\.claude-v2/ => :claude2,
-          /anthropic\.claude-instant/ => :claude_instant
+          /anthropic\.claude-instant/ => :claude_instant,
+          /anthropic\.claude-sonnet-4/ => :claude_sonnet4,
+          /anthropic\.claude-opus-4/ => :claude_opus4
         }.freeze
 
         # Determines the model family for pricing and capability lookup
@@ -187,8 +193,8 @@ def capabilities_for(model_id)
 
           capabilities << 'structured_output' if supports_json_mode?(model_id)
 
-          # Extended thinking for 3.7 models
-          capabilities << 'reasoning' if model_id.match?(/claude-3-7/)
+          # Extended thinking for 3.7, and 4 models
+          capabilities << 'thinking' if supports_extended_thinking?(model_id)
 
           # Batch capabilities for newer Claude models
           if model_id.match?(/claude-3\.5|claude-3-7/)

diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,15 +39,15 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, **) # rubocop:disable Metrics/ParameterLists
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 
           system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
           system_content = Anthropic::Chat.build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model).tap do |payload|
-            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:)
+            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:)
           end
         end
 

diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, **)
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),