From b9adf25e2042e706e8393606f57ea9adae12dd0d Mon Sep 17 00:00:00 2001
From: Dan Singerman <dan@reasonfactory.com>
Date: Fri, 4 Jul 2025 11:36:02 +0100
Subject: [PATCH 1/3] Added proper handling of streaming error responses across
 both Faraday V1 and V2

---
 lib/ruby_llm/streaming.rb                     |  28 ++++-
 ...241022_supports_streaming_error_chunks.yml |  87 +++++++++++++++
 ...241022_supports_streaming_error_events.yml | 105 ++++++++++++++++++
 spec/ruby_llm/chat_streaming_spec.rb          |  76 +++++++++++++
 4 files changed, 291 insertions(+), 5 deletions(-)
 create mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml
 create mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml

diff --git a/lib/ruby_llm/streaming.rb b/lib/ruby_llm/streaming.rb
index b7017896e..86e9f56a8 100644
--- a/lib/ruby_llm/streaming.rb
+++ b/lib/ruby_llm/streaming.rb
@@ -55,13 +55,13 @@ def create_stream_processor(parser, buffer, &)
       end
     end
 
-    def process_stream_chunk(chunk, parser, _env, &)
+    def process_stream_chunk(chunk, parser, env, &)
       RubyLLM.logger.debug "Received chunk: #{chunk}"
 
       if error_chunk?(chunk)
-        handle_error_chunk(chunk, nil)
+        handle_error_chunk(chunk, env)
       else
-        yield handle_sse(chunk, parser, nil, &)
+        yield handle_sse(chunk, parser, env, &)
       end
     end
 
@@ -88,7 +88,16 @@ def error_chunk?(chunk)
     def handle_error_chunk(chunk, env)
       error_data = chunk.split("\n")[1].delete_prefix('data: ')
       status, _message = parse_streaming_error(error_data)
-      error_response = env.merge(body: JSON.parse(error_data), status: status)
+      parsed_data = JSON.parse(error_data)
+
+      # Create a response-like object that works for both Faraday v1 and v2
+      error_response = if env
+                         env.merge(body: parsed_data, status: status)
+                       else
+                         # For Faraday v1, create a simple object that responds to .status and .body
+                         Struct.new(:body, :status).new(parsed_data, status)
+                       end
+
       ErrorMiddleware.parse_error(provider: self, response: error_response)
     rescue JSON::ParserError => e
       RubyLLM.logger.debug "Failed to parse error chunk: #{e.message}"
@@ -122,7 +131,16 @@ def handle_data(data)
 
     def handle_error_event(data, env)
       status, _message = parse_streaming_error(data)
-      error_response = env.merge(body: JSON.parse(data), status: status)
+      parsed_data = JSON.parse(data)
+
+      # Create a response-like object that works for both Faraday v1 and v2
+      error_response = if env
+                         env.merge(body: parsed_data, status: status)
+                       else
+                         # For Faraday v1, create a simple object that responds to .status and .body
+                         Struct.new(:body, :status).new(parsed_data, status)
+                       end
+
       ErrorMiddleware.parse_error(provider: self, response: error_response)
     rescue JSON::ParserError => e
       RubyLLM.logger.debug "Failed to parse error event: #{e.message}"
diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml
new file mode 100644
index 000000000..c8252b1f8
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml
@@ -0,0 +1,87 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count
+        from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.1
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Wed, 11 Jun 2025 12:53:20 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cache-Control:
+      - no-cache
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '100000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '100000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-06-11T12:53:18Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-06-11T12:53:18Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '1000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-06-11T12:53:19Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '120000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '120000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-06-11T12:53:18Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 0137b15c-16bf-490d-9f90-8cfd7e325ec0
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: UTF-8
+      string: |+
+        event: error
+        data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"}     }
+
+
+  recorded_at: Wed, 11 Jun 2025 12:53:21 GMT
+recorded_with: VCR 6.3.1
+...
diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml
new file mode 100644
index 000000000..2f2193827
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml
@@ -0,0 +1,105 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count
+        from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.1
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Wed, 11 Jun 2025 12:53:20 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cache-Control:
+      - no-cache
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '100000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '100000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-06-11T12:53:18Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-06-11T12:53:18Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '1000'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '999'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-06-11T12:53:19Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '120000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '120000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-06-11T12:53:18Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - 0137b15c-16bf-490d-9f90-8cfd7e325ec0
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: UTF-8
+      string: |+
+        event: message_start
+        data: {"type":"message_start","message":{"id":"msg_01C9wXLHGibzr3JZM3HQSiRd","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":1,"service_tier":"standard"}}}
+
+        event: content_block_start
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}             }
+
+        event: ping
+        data: {"type": "ping"}
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"}       }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s counting from 1 to"}  }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"}     }
+
+        event: error
+        data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"}     }
+
+
+  recorded_at: Wed, 11 Jun 2025 12:53:21 GMT
+recorded_with: VCR 6.3.1
+...
diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb
index 8ea5f6143..956f6d777 100644
--- a/spec/ruby_llm/chat_streaming_spec.rb
+++ b/spec/ruby_llm/chat_streaming_spec.rb
@@ -41,4 +41,80 @@
       end
     end
   end
+
+  describe 'Error handling' do
+    let(:chat) { RubyLLM.chat(model: 'claude-3-5-haiku-20241022', provider: :anthropic) }
+
+    describe 'Faraday version 1' do
+      before do
+        stub_const('Faraday::VERSION', '1.10.0')
+      end
+
+      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength
+        VCR.use_cassette(
+          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks',
+          record: :none
+        ) do
+          chunks = []
+
+          expect do
+            chat.ask('Count from 1 to 3') do |chunk|
+              chunks << chunk
+            end
+          end.to raise_error(RubyLLM::Error, /Overloaded/)
+        end
+      end
+
+      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength
+        VCR.use_cassette(
+          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events',
+          record: :none
+        ) do
+          chunks = []
+
+          expect do
+            chat.ask('Count from 1 to 3') do |chunk|
+              chunks << chunk
+            end
+          end.to raise_error(RubyLLM::Error, /Overloaded/)
+        end
+      end
+    end
+
+    describe 'Faraday version 2' do
+      before do
+        stub_const('Faraday::VERSION', '2.0.0')
+      end
+
+      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength
+        VCR.use_cassette(
+          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks',
+          record: :none
+        ) do
+          chunks = []
+
+          expect do
+            chat.ask('Count from 1 to 3') do |chunk|
+              chunks << chunk
+            end
+          end.to raise_error(RubyLLM::Error, /Overloaded/)
+        end
+      end
+
+      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength
+        VCR.use_cassette(
+          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events',
+          record: :none
+        ) do
+          chunks = []
+
+          expect do
+            chat.ask('Count from 1 to 3') do |chunk|
+              chunks << chunk
+            end
+          end.to raise_error(RubyLLM::Error, /Overloaded/)
+        end
+      end
+    end
+  end
 end

From bbabf607a5733dfda94b630cbd363ad61ea855b7 Mon Sep 17 00:00:00 2001
From: Dan Singerman <dan@reasonfactory.com>
Date: Wed, 16 Jul 2025 20:36:46 +0100
Subject: [PATCH 2/3] replace VCR with mocks

---
 ...241022_supports_streaming_error_chunks.yml |  87 --------------
 ...241022_supports_streaming_error_events.yml | 105 ----------------
 spec/ruby_llm/chat_streaming_spec.rb          | 113 ++++++++++--------
 3 files changed, 65 insertions(+), 240 deletions(-)
 delete mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml
 delete mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml

diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml
deleted file mode 100644
index c8252b1f8..000000000
--- a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml
+++ /dev/null
@@ -1,87 +0,0 @@
----
-http_interactions:
-- request:
-    method: post
-    uri: https://api.anthropic.com/v1/messages
-    body:
-      encoding: UTF-8
-      string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count
-        from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}'
-    headers:
-      User-Agent:
-      - Faraday v2.13.1
-      X-Api-Key:
-      - "<ANTHROPIC_API_KEY>"
-      Anthropic-Version:
-      - '2023-06-01'
-      Content-Type:
-      - application/json
-      Accept-Encoding:
-      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
-      Accept:
-      - "*/*"
-  response:
-    status:
-      code: 200
-      message: OK
-    headers:
-      Date:
-      - Wed, 11 Jun 2025 12:53:20 GMT
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Transfer-Encoding:
-      - chunked
-      Connection:
-      - keep-alive
-      Cache-Control:
-      - no-cache
-      Anthropic-Ratelimit-Input-Tokens-Limit:
-      - '100000'
-      Anthropic-Ratelimit-Input-Tokens-Remaining:
-      - '100000'
-      Anthropic-Ratelimit-Input-Tokens-Reset:
-      - '2025-06-11T12:53:18Z'
-      Anthropic-Ratelimit-Output-Tokens-Limit:
-      - '20000'
-      Anthropic-Ratelimit-Output-Tokens-Remaining:
-      - '20000'
-      Anthropic-Ratelimit-Output-Tokens-Reset:
-      - '2025-06-11T12:53:18Z'
-      Anthropic-Ratelimit-Requests-Limit:
-      - '1000'
-      Anthropic-Ratelimit-Requests-Remaining:
-      - '999'
-      Anthropic-Ratelimit-Requests-Reset:
-      - '2025-06-11T12:53:19Z'
-      Anthropic-Ratelimit-Tokens-Limit:
-      - '120000'
-      Anthropic-Ratelimit-Tokens-Remaining:
-      - '120000'
-      Anthropic-Ratelimit-Tokens-Reset:
-      - '2025-06-11T12:53:18Z'
-      Request-Id:
-      - "<REQUEST_ID>"
-      Strict-Transport-Security:
-      - max-age=31536000; includeSubDomains; preload
-      Anthropic-Organization-Id:
-      - 0137b15c-16bf-490d-9f90-8cfd7e325ec0
-      Via:
-      - 1.1 google
-      Cf-Cache-Status:
-      - DYNAMIC
-      X-Robots-Tag:
-      - none
-      Server:
-      - cloudflare
-      Cf-Ray:
-      - "<CF_RAY>"
-    body:
-      encoding: UTF-8
-      string: |+
-        event: error
-        data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"}     }
-
-
-  recorded_at: Wed, 11 Jun 2025 12:53:21 GMT
-recorded_with: VCR 6.3.1
-...
diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml
deleted file mode 100644
index 2f2193827..000000000
--- a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml
+++ /dev/null
@@ -1,105 +0,0 @@
----
-http_interactions:
-- request:
-    method: post
-    uri: https://api.anthropic.com/v1/messages
-    body:
-      encoding: UTF-8
-      string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count
-        from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}'
-    headers:
-      User-Agent:
-      - Faraday v2.13.1
-      X-Api-Key:
-      - "<ANTHROPIC_API_KEY>"
-      Anthropic-Version:
-      - '2023-06-01'
-      Content-Type:
-      - application/json
-      Accept-Encoding:
-      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
-      Accept:
-      - "*/*"
-  response:
-    status:
-      code: 200
-      message: OK
-    headers:
-      Date:
-      - Wed, 11 Jun 2025 12:53:20 GMT
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Transfer-Encoding:
-      - chunked
-      Connection:
-      - keep-alive
-      Cache-Control:
-      - no-cache
-      Anthropic-Ratelimit-Input-Tokens-Limit:
-      - '100000'
-      Anthropic-Ratelimit-Input-Tokens-Remaining:
-      - '100000'
-      Anthropic-Ratelimit-Input-Tokens-Reset:
-      - '2025-06-11T12:53:18Z'
-      Anthropic-Ratelimit-Output-Tokens-Limit:
-      - '20000'
-      Anthropic-Ratelimit-Output-Tokens-Remaining:
-      - '20000'
-      Anthropic-Ratelimit-Output-Tokens-Reset:
-      - '2025-06-11T12:53:18Z'
-      Anthropic-Ratelimit-Requests-Limit:
-      - '1000'
-      Anthropic-Ratelimit-Requests-Remaining:
-      - '999'
-      Anthropic-Ratelimit-Requests-Reset:
-      - '2025-06-11T12:53:19Z'
-      Anthropic-Ratelimit-Tokens-Limit:
-      - '120000'
-      Anthropic-Ratelimit-Tokens-Remaining:
-      - '120000'
-      Anthropic-Ratelimit-Tokens-Reset:
-      - '2025-06-11T12:53:18Z'
-      Request-Id:
-      - "<REQUEST_ID>"
-      Strict-Transport-Security:
-      - max-age=31536000; includeSubDomains; preload
-      Anthropic-Organization-Id:
-      - 0137b15c-16bf-490d-9f90-8cfd7e325ec0
-      Via:
-      - 1.1 google
-      Cf-Cache-Status:
-      - DYNAMIC
-      X-Robots-Tag:
-      - none
-      Server:
-      - cloudflare
-      Cf-Ray:
-      - "<CF_RAY>"
-    body:
-      encoding: UTF-8
-      string: |+
-        event: message_start
-        data: {"type":"message_start","message":{"id":"msg_01C9wXLHGibzr3JZM3HQSiRd","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":1,"service_tier":"standard"}}}
-
-        event: content_block_start
-        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}             }
-
-        event: ping
-        data: {"type": "ping"}
-
-        event: content_block_delta
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"}       }
-
-        event: content_block_delta
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s counting from 1 to"}  }
-
-        event: content_block_delta
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"}     }
-
-        event: error
-        data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"}     }
-
-
-  recorded_at: Wed, 11 Jun 2025 12:53:21 GMT
-recorded_with: VCR 6.3.1
-...
diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb
index 956f6d777..8228d537c 100644
--- a/spec/ruby_llm/chat_streaming_spec.rb
+++ b/spec/ruby_llm/chat_streaming_spec.rb
@@ -44,6 +44,15 @@
 
   describe 'Error handling' do
     let(:chat) { RubyLLM.chat(model: 'claude-3-5-haiku-20241022', provider: :anthropic) }
+    let(:error_response) do
+      {
+        type: 'error',
+        error: {
+          type: 'overloaded_error',
+          message: 'Overloaded'
+        }
+      }.to_json
+    end
 
     describe 'Faraday version 1' do
       before do
@@ -51,33 +60,37 @@
       end
 
       it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength
-        VCR.use_cassette(
-          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks',
-          record: :none
-        ) do
-          chunks = []
-
-          expect do
-            chat.ask('Count from 1 to 3') do |chunk|
-              chunks << chunk
-            end
-          end.to raise_error(RubyLLM::Error, /Overloaded/)
-        end
+        stub_request(:post, 'https://api.anthropic.com/v1/messages')
+          .to_return(
+            status: 529,
+            body: "data: #{error_response}\n\n",
+            headers: { 'Content-Type' => 'text/event-stream' }
+          )
+
+        chunks = []
+
+        expect do
+          chat.ask('Count from 1 to 3') do |chunk|
+            chunks << chunk
+          end
+        end.to raise_error(RubyLLM::OverloadedError)
       end
 
       it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength
-        VCR.use_cassette(
-          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events',
-          record: :none
-        ) do
-          chunks = []
-
-          expect do
-            chat.ask('Count from 1 to 3') do |chunk|
-              chunks << chunk
-            end
-          end.to raise_error(RubyLLM::Error, /Overloaded/)
-        end
+        stub_request(:post, 'https://api.anthropic.com/v1/messages')
+          .to_return(
+            status: 200,
+            body: "event: error\ndata: #{error_response}\n\n",
+            headers: { 'Content-Type' => 'text/event-stream' }
+          )
+
+        chunks = []
+
+        expect do
+          chat.ask('Count from 1 to 3') do |chunk|
+            chunks << chunk
+          end
+        end.to raise_error(RubyLLM::OverloadedError)
       end
     end
 
@@ -87,33 +100,37 @@
       end
 
       it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength
-        VCR.use_cassette(
-          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks',
-          record: :none
-        ) do
-          chunks = []
-
-          expect do
-            chat.ask('Count from 1 to 3') do |chunk|
-              chunks << chunk
-            end
-          end.to raise_error(RubyLLM::Error, /Overloaded/)
-        end
+        stub_request(:post, 'https://api.anthropic.com/v1/messages')
+          .to_return(
+            status: 529,
+            body: "data: #{error_response}\n\n",
+            headers: { 'Content-Type' => 'text/event-stream' }
+          )
+
+        chunks = []
+
+        expect do
+          chat.ask('Count from 1 to 3') do |chunk|
+            chunks << chunk
+          end
+        end.to raise_error(RubyLLM::OverloadedError)
       end
 
       it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength
-        VCR.use_cassette(
-          'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events',
-          record: :none
-        ) do
-          chunks = []
-
-          expect do
-            chat.ask('Count from 1 to 3') do |chunk|
-              chunks << chunk
-            end
-          end.to raise_error(RubyLLM::Error, /Overloaded/)
-        end
+        stub_request(:post, 'https://api.anthropic.com/v1/messages')
+          .to_return(
+            status: 200,
+            body: "event: error\ndata: #{error_response}\n\n",
+            headers: { 'Content-Type' => 'text/event-stream' }
+          )
+
+        chunks = []
+
+        expect do
+          chat.ask('Count from 1 to 3') do |chunk|
+            chunks << chunk
+          end
+        end.to raise_error(RubyLLM::OverloadedError)
       end
     end
   end

From 038f8fcfa0d0006a84dcdc435a34f70b3f11c4d1 Mon Sep 17 00:00:00 2001
From: Dan Singerman <dan@reasonfactory.com>
Date: Thu, 17 Jul 2025 11:57:29 +0100
Subject: [PATCH 3/3] Replace cassettes with mock responses, and add support
 for other providers

---
 lib/ruby_llm/providers/openai/streaming.rb |  14 +++
 spec/ruby_llm/chat_streaming_spec.rb       | 127 +++++++++------------
 spec/spec_helper.rb                        |   1 +
 spec/support/streaming_error_helpers.rb    | 111 ++++++++++++++++++
 4 files changed, 183 insertions(+), 70 deletions(-)
 create mode 100644 spec/support/streaming_error_helpers.rb

diff --git a/lib/ruby_llm/providers/openai/streaming.rb b/lib/ruby_llm/providers/openai/streaming.rb
index ba3134475..36d05a5db 100644
--- a/lib/ruby_llm/providers/openai/streaming.rb
+++ b/lib/ruby_llm/providers/openai/streaming.rb
@@ -21,6 +21,20 @@ def build_chunk(data)
             output_tokens: data.dig('usage', 'completion_tokens')
           )
         end
+
+        def parse_streaming_error(data)
+          error_data = JSON.parse(data)
+          return unless error_data['error']
+
+          case error_data.dig('error', 'type')
+          when 'server_error'
+            [500, error_data['error']['message']]
+          when 'rate_limit_exceeded', 'insufficient_quota'
+            [429, error_data['error']['message']]
+          else
+            [400, error_data['error']['message']]
+          end
+        end
       end
     end
   end
diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb
index 8228d537c..13a947c55 100644
--- a/spec/ruby_llm/chat_streaming_spec.rb
+++ b/spec/ruby_llm/chat_streaming_spec.rb
@@ -4,6 +4,7 @@
 
 RSpec.describe RubyLLM::Chat do
   include_context 'with configured RubyLLM'
+  include StreamingErrorHelpers
 
   describe 'streaming responses' do
     CHAT_MODELS.each do |model_info|
@@ -43,94 +44,80 @@
   end
 
   describe 'Error handling' do
-    let(:chat) { RubyLLM.chat(model: 'claude-3-5-haiku-20241022', provider: :anthropic) }
-    let(:error_response) do
-      {
-        type: 'error',
-        error: {
-          type: 'overloaded_error',
-          message: 'Overloaded'
-        }
-      }.to_json
-    end
+    CHAT_MODELS.each do |model_info|
+      model = model_info[:model]
+      provider = model_info[:provider]
 
-    describe 'Faraday version 1' do
-      before do
-        stub_const('Faraday::VERSION', '1.10.0')
-      end
+      context "with #{provider}/#{model}" do
+        let(:chat) { RubyLLM.chat(model: model, provider: provider) }
+
+        describe 'Faraday version 1' do # rubocop:disable RSpec/NestedGroups
+          before do
+            stub_const('Faraday::VERSION', '1.10.0')
+          end
 
-      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength
-        stub_request(:post, 'https://api.anthropic.com/v1/messages')
-          .to_return(
-            status: 529,
-            body: "data: #{error_response}\n\n",
-            headers: { 'Content-Type' => 'text/event-stream' }
-          )
+          it "#{provider}/#{model} supports handling streaming error chunks" do # rubocop:disable RSpec/ExampleLength
+            skip('Error handling not implemented yet') unless error_handling_supported?(provider)
 
-        chunks = []
+            stub_error_response(provider, :chunk)
+
+            chunks = []
 
-        expect do
-          chat.ask('Count from 1 to 3') do |chunk|
-            chunks << chunk
+            expect do
+              chat.ask('Count from 1 to 3') do |chunk|
+                chunks << chunk
+              end
+            end.to raise_error(expected_error_for(provider))
           end
-        end.to raise_error(RubyLLM::OverloadedError)
-      end
 
-      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength
-        stub_request(:post, 'https://api.anthropic.com/v1/messages')
-          .to_return(
-            status: 200,
-            body: "event: error\ndata: #{error_response}\n\n",
-            headers: { 'Content-Type' => 'text/event-stream' }
-          )
+          it "#{provider}/#{model} supports handling streaming error events" do # rubocop:disable RSpec/ExampleLength
+            skip('Error handling not implemented yet') unless error_handling_supported?(provider)
 
-        chunks = []
+            stub_error_response(provider, :event)
+
+            chunks = []
 
-        expect do
-          chat.ask('Count from 1 to 3') do |chunk|
-            chunks << chunk
+            expect do
+              chat.ask('Count from 1 to 3') do |chunk|
+                chunks << chunk
+              end
+            end.to raise_error(expected_error_for(provider))
           end
-        end.to raise_error(RubyLLM::OverloadedError)
-      end
-    end
+        end
 
-    describe 'Faraday version 2' do
-      before do
-        stub_const('Faraday::VERSION', '2.0.0')
-      end
+        describe 'Faraday version 2' do # rubocop:disable RSpec/NestedGroups
+          before do
+            stub_const('Faraday::VERSION', '2.0.0')
+          end
 
-      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength
-        stub_request(:post, 'https://api.anthropic.com/v1/messages')
-          .to_return(
-            status: 529,
-            body: "data: #{error_response}\n\n",
-            headers: { 'Content-Type' => 'text/event-stream' }
-          )
+          it "#{provider}/#{model} supports handling streaming error chunks" do # rubocop:disable RSpec/ExampleLength
+            skip('Error handling not implemented yet') unless error_handling_supported?(provider)
 
-        chunks = []
+            stub_error_response(provider, :chunk)
 
-        expect do
-          chat.ask('Count from 1 to 3') do |chunk|
-            chunks << chunk
+            chunks = []
+
+            expect do
+              chat.ask('Count from 1 to 3') do |chunk|
+                chunks << chunk
+              end
+            end.to raise_error(expected_error_for(provider))
           end
-        end.to raise_error(RubyLLM::OverloadedError)
-      end
 
-      it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength
-        stub_request(:post, 'https://api.anthropic.com/v1/messages')
-          .to_return(
-            status: 200,
-            body: "event: error\ndata: #{error_response}\n\n",
-            headers: { 'Content-Type' => 'text/event-stream' }
-          )
+          it "#{provider}/#{model} supports handling streaming error events" do # rubocop:disable RSpec/ExampleLength
+            skip('Error handling not implemented yet') unless error_handling_supported?(provider)
 
-        chunks = []
+            stub_error_response(provider, :event)
 
-        expect do
-          chat.ask('Count from 1 to 3') do |chunk|
-            chunks << chunk
+            chunks = []
+
+            expect do
+              chat.ask('Count from 1 to 3') do |chunk|
+                chunks << chunk
+              end
+            end.to raise_error(expected_error_for(provider))
           end
-        end.to raise_error(RubyLLM::OverloadedError)
+        end
       end
     end
   end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index e25f67e88..d208dc550 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -42,6 +42,7 @@
 require 'fileutils'
 require 'ruby_llm'
 require 'webmock/rspec'
+require_relative 'support/streaming_error_helpers'
 
 # VCR Configuration
 VCR.configure do |config|
diff --git a/spec/support/streaming_error_helpers.rb b/spec/support/streaming_error_helpers.rb
new file mode 100644
index 000000000..54d102734
--- /dev/null
+++ b/spec/support/streaming_error_helpers.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+module StreamingErrorHelpers
+  ERROR_HANDLING_CONFIGS = {
+    anthropic: {
+      url: 'https://api.anthropic.com/v1/messages',
+      error_response: {
+        type: 'error',
+        error: {
+          type: 'overloaded_error',
+          message: 'Overloaded'
+        }
+      },
+      chunk_status: 529,
+      expected_error: RubyLLM::OverloadedError
+    },
+    openai: {
+      url: 'https://api.openai.com/v1/chat/completions',
+      error_response: {
+        error: {
+          message: 'The server is temporarily overloaded. Please try again later.',
+          type: 'server_error',
+          param: nil,
+          code: nil
+        }
+      },
+      chunk_status: 500,
+      expected_error: RubyLLM::ServerError
+    },
+    gemini: {
+      url: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse',
+      error_response: {
+        error: {
+          code: 529,
+          message: 'Service overloaded - please try again later',
+          status: 'RESOURCE_EXHAUSTED'
+        }
+      },
+      chunk_status: 529,
+      expected_error: RubyLLM::OverloadedError
+    },
+    deepseek: {
+      url: 'https://api.deepseek.com/chat/completions',
+      error_response: {
+        error: {
+          message: 'Service overloaded - please try again later',
+          type: 'server_error',
+          param: nil,
+          code: nil
+        }
+      },
+      chunk_status: 500,
+      expected_error: RubyLLM::ServerError
+    },
+    openrouter: {
+      url: 'https://openrouter.ai/api/v1/chat/completions',
+      error_response: {
+        error: {
+          message: 'Service overloaded - please try again later',
+          type: 'server_error',
+          param: nil,
+          code: nil
+        }
+      },
+      chunk_status: 500,
+      expected_error: RubyLLM::ServerError
+    },
+    ollama: {
+      url: 'http://localhost:11434/v1/chat/completions',
+      error_response: {
+        error: {
+          message: 'Service overloaded - please try again later',
+          type: 'server_error',
+          param: nil,
+          code: nil
+        }
+      },
+      chunk_status: 500,
+      expected_error: RubyLLM::ServerError
+    }
+  }.freeze
+
+  def error_handling_supported?(provider)
+    ERROR_HANDLING_CONFIGS.key?(provider)
+  end
+
+  def expected_error_for(provider)
+    ERROR_HANDLING_CONFIGS[provider][:expected_error]
+  end
+
+  def stub_error_response(provider, type)
+    config = ERROR_HANDLING_CONFIGS[provider]
+    return unless config
+
+    body = case type
+           when :chunk
+             "#{config[:error_response].to_json}\n\n"
+           when :event
+             "event: error\ndata: #{config[:error_response].to_json}\n\n"
+           end
+
+    status = type == :chunk ? config[:chunk_status] : 200
+
+    stub_request(:post, config[:url])
+      .to_return(
+        status: status,
+        body: body,
+        headers: { 'Content-Type' => 'text/event-stream' }
+      )
+  end
+end