From b9adf25e2042e706e8393606f57ea9adae12dd0d Mon Sep 17 00:00:00 2001 From: Dan Singerman Date: Fri, 4 Jul 2025 11:36:02 +0100 Subject: [PATCH 1/3] Added proper handling of streaming error responses across both Faraday V1 and V2 --- lib/ruby_llm/streaming.rb | 28 ++++- ...241022_supports_streaming_error_chunks.yml | 87 +++++++++++++++ ...241022_supports_streaming_error_events.yml | 105 ++++++++++++++++++ spec/ruby_llm/chat_streaming_spec.rb | 76 +++++++++++++ 4 files changed, 291 insertions(+), 5 deletions(-) create mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml create mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml diff --git a/lib/ruby_llm/streaming.rb b/lib/ruby_llm/streaming.rb index b7017896..86e9f56a 100644 --- a/lib/ruby_llm/streaming.rb +++ b/lib/ruby_llm/streaming.rb @@ -55,13 +55,13 @@ def create_stream_processor(parser, buffer, &) end end - def process_stream_chunk(chunk, parser, _env, &) + def process_stream_chunk(chunk, parser, env, &) RubyLLM.logger.debug "Received chunk: #{chunk}" if error_chunk?(chunk) - handle_error_chunk(chunk, nil) + handle_error_chunk(chunk, env) else - yield handle_sse(chunk, parser, nil, &) + yield handle_sse(chunk, parser, env, &) end end @@ -88,7 +88,16 @@ def error_chunk?(chunk) def handle_error_chunk(chunk, env) error_data = chunk.split("\n")[1].delete_prefix('data: ') status, _message = parse_streaming_error(error_data) - error_response = env.merge(body: JSON.parse(error_data), status: status) + parsed_data = JSON.parse(error_data) + + # Create a response-like object that works for both Faraday v1 and v2 + error_response = if env + env.merge(body: parsed_data, status: status) + else + # For Faraday v1, create a simple object that responds to .status and .body + Struct.new(:body, :status).new(parsed_data, status) + end + ErrorMiddleware.parse_error(provider: self, response: error_response) rescue JSON::ParserError => e RubyLLM.logger.debug "Failed to parse error chunk: #{e.message}" @@ -122,7 +131,16 @@ def handle_data(data) def handle_error_event(data, env) status, _message = parse_streaming_error(data) - error_response = env.merge(body: JSON.parse(data), status: status) + parsed_data = JSON.parse(data) + + # Create a response-like object that works for both Faraday v1 and v2 + error_response = if env + env.merge(body: parsed_data, status: status) + else + # For Faraday v1, create a simple object that responds to .status and .body + Struct.new(:body, :status).new(parsed_data, status) + end + ErrorMiddleware.parse_error(provider: self, response: error_response) rescue JSON::ParserError => e RubyLLM.logger.debug "Failed to parse error event: #{e.message}" diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml new file mode 100644 index 00000000..c8252b1f --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml @@ -0,0 +1,87 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count + from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 11 Jun 2025 12:53:20 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-06-11T12:53:18Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-06-11T12:53:18Z' + Anthropic-Ratelimit-Requests-Limit: + - '1000' + Anthropic-Ratelimit-Requests-Remaining: + - '999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-06-11T12:53:19Z' + Anthropic-Ratelimit-Tokens-Limit: + - '120000' + Anthropic-Ratelimit-Tokens-Remaining: + - '120000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-06-11T12:53:18Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 0137b15c-16bf-490d-9f90-8cfd7e325ec0 + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: UTF-8 + string: |+ + event: error + data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"} } + + + recorded_at: Wed, 11 Jun 2025 12:53:21 GMT +recorded_with: VCR 6.3.1 +... diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml new file mode 100644 index 00000000..2f219382 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml @@ -0,0 +1,105 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count + from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 11 Jun 2025 12:53:20 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-06-11T12:53:18Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-06-11T12:53:18Z' + Anthropic-Ratelimit-Requests-Limit: + - '1000' + Anthropic-Ratelimit-Requests-Remaining: + - '999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-06-11T12:53:19Z' + Anthropic-Ratelimit-Tokens-Limit: + - '120000' + Anthropic-Ratelimit-Tokens-Remaining: + - '120000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-06-11T12:53:18Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 0137b15c-16bf-490d-9f90-8cfd7e325ec0 + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: UTF-8 + string: |+ + event: message_start + data: {"type":"message_start","message":{"id":"msg_01C9wXLHGibzr3JZM3HQSiRd","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":1,"service_tier":"standard"}}} + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s counting from 1 to"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"} } + + event: error + data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"} } + + + recorded_at: Wed, 11 Jun 2025 12:53:21 GMT +recorded_with: VCR 6.3.1 +... diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb index 8ea5f614..956f6d77 100644 --- a/spec/ruby_llm/chat_streaming_spec.rb +++ b/spec/ruby_llm/chat_streaming_spec.rb @@ -41,4 +41,80 @@ end end end + + describe 'Error handling' do + let(:chat) { RubyLLM.chat(model: 'claude-3-5-haiku-20241022', provider: :anthropic) } + + describe 'Faraday version 1' do + before do + stub_const('Faraday::VERSION', '1.10.0') + end + + it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength + VCR.use_cassette( + 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks', + record: :none + ) do + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::Error, /Overloaded/) + end + end + + it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength + VCR.use_cassette( + 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events', + record: :none + ) do + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::Error, /Overloaded/) + end + end + end + + describe 'Faraday version 2' do + before do + stub_const('Faraday::VERSION', '2.0.0') + end + + it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength + VCR.use_cassette( + 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks', + record: :none + ) do + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::Error, /Overloaded/) + end + end + + it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength + VCR.use_cassette( + 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events', + record: :none + ) do + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::Error, /Overloaded/) + end + end + end + end end From bbabf607a5733dfda94b630cbd363ad61ea855b7 Mon Sep 17 00:00:00 2001 From: Dan Singerman Date: Wed, 16 Jul 2025 20:36:46 +0100 Subject: [PATCH 2/3] replace VCR with mocks --- ...241022_supports_streaming_error_chunks.yml | 87 -------------- ...241022_supports_streaming_error_events.yml | 105 ---------------- spec/ruby_llm/chat_streaming_spec.rb | 113 ++++++++++-------- 3 files changed, 65 insertions(+), 240 deletions(-) delete mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml delete mode 100644 spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml deleted file mode 100644 index c8252b1f..00000000 --- a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks.yml +++ /dev/null @@ -1,87 +0,0 @@ ---- -http_interactions: -- request: - method: post - uri: https://api.anthropic.com/v1/messages - body: - encoding: UTF-8 - string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count - from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}' - headers: - User-Agent: - - Faraday v2.13.1 - X-Api-Key: - - "" - Anthropic-Version: - - '2023-06-01' - Content-Type: - - application/json - Accept-Encoding: - - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 - Accept: - - "*/*" - response: - status: - code: 200 - message: OK - headers: - Date: - - Wed, 11 Jun 2025 12:53:20 GMT - Content-Type: - - text/event-stream; charset=utf-8 - Transfer-Encoding: - - chunked - Connection: - - keep-alive - Cache-Control: - - no-cache - Anthropic-Ratelimit-Input-Tokens-Limit: - - '100000' - Anthropic-Ratelimit-Input-Tokens-Remaining: - - '100000' - Anthropic-Ratelimit-Input-Tokens-Reset: - - '2025-06-11T12:53:18Z' - Anthropic-Ratelimit-Output-Tokens-Limit: - - '20000' - Anthropic-Ratelimit-Output-Tokens-Remaining: - - '20000' - Anthropic-Ratelimit-Output-Tokens-Reset: - - '2025-06-11T12:53:18Z' - Anthropic-Ratelimit-Requests-Limit: - - '1000' - Anthropic-Ratelimit-Requests-Remaining: - - '999' - Anthropic-Ratelimit-Requests-Reset: - - '2025-06-11T12:53:19Z' - Anthropic-Ratelimit-Tokens-Limit: - - '120000' - Anthropic-Ratelimit-Tokens-Remaining: - - '120000' - Anthropic-Ratelimit-Tokens-Reset: - - '2025-06-11T12:53:18Z' - Request-Id: - - "" - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Anthropic-Organization-Id: - - 0137b15c-16bf-490d-9f90-8cfd7e325ec0 - Via: - - 1.1 google - Cf-Cache-Status: - - DYNAMIC - X-Robots-Tag: - - none - Server: - - cloudflare - Cf-Ray: - - "" - body: - encoding: UTF-8 - string: |+ - event: error - data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"} } - - - recorded_at: Wed, 11 Jun 2025 12:53:21 GMT -recorded_with: VCR 6.3.1 -... diff --git a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml b/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml deleted file mode 100644 index 2f219382..00000000 --- a/spec/fixtures/vcr_cassettes/chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events.yml +++ /dev/null @@ -1,105 +0,0 @@ ---- -http_interactions: -- request: - method: post - uri: https://api.anthropic.com/v1/messages - body: - encoding: UTF-8 - string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Count - from 1 to 3"}]}],"temperature":0.7,"stream":true,"max_tokens":8192}' - headers: - User-Agent: - - Faraday v2.13.1 - X-Api-Key: - - "" - Anthropic-Version: - - '2023-06-01' - Content-Type: - - application/json - Accept-Encoding: - - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 - Accept: - - "*/*" - response: - status: - code: 200 - message: OK - headers: - Date: - - Wed, 11 Jun 2025 12:53:20 GMT - Content-Type: - - text/event-stream; charset=utf-8 - Transfer-Encoding: - - chunked - Connection: - - keep-alive - Cache-Control: - - no-cache - Anthropic-Ratelimit-Input-Tokens-Limit: - - '100000' - Anthropic-Ratelimit-Input-Tokens-Remaining: - - '100000' - Anthropic-Ratelimit-Input-Tokens-Reset: - - '2025-06-11T12:53:18Z' - Anthropic-Ratelimit-Output-Tokens-Limit: - - '20000' - Anthropic-Ratelimit-Output-Tokens-Remaining: - - '20000' - Anthropic-Ratelimit-Output-Tokens-Reset: - - '2025-06-11T12:53:18Z' - Anthropic-Ratelimit-Requests-Limit: - - '1000' - Anthropic-Ratelimit-Requests-Remaining: - - '999' - Anthropic-Ratelimit-Requests-Reset: - - '2025-06-11T12:53:19Z' - Anthropic-Ratelimit-Tokens-Limit: - - '120000' - Anthropic-Ratelimit-Tokens-Remaining: - - '120000' - Anthropic-Ratelimit-Tokens-Reset: - - '2025-06-11T12:53:18Z' - Request-Id: - - "" - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Anthropic-Organization-Id: - - 0137b15c-16bf-490d-9f90-8cfd7e325ec0 - Via: - - 1.1 google - Cf-Cache-Status: - - DYNAMIC - X-Robots-Tag: - - none - Server: - - cloudflare - Cf-Ray: - - "" - body: - encoding: UTF-8 - string: |+ - event: message_start - data: {"type":"message_start","message":{"id":"msg_01C9wXLHGibzr3JZM3HQSiRd","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":1,"service_tier":"standard"}}} - - event: content_block_start - data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } - - event: ping - data: {"type": "ping"} - - event: content_block_delta - data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"} } - - event: content_block_delta - data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s counting from 1 to"} } - - event: content_block_delta - data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"} } - - event: error - data: {"type":"error","error":{"details":null,"type":"overloaded_error","message":"Overloaded"} } - - - recorded_at: Wed, 11 Jun 2025 12:53:21 GMT -recorded_with: VCR 6.3.1 -... diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb index 956f6d77..8228d537 100644 --- a/spec/ruby_llm/chat_streaming_spec.rb +++ b/spec/ruby_llm/chat_streaming_spec.rb @@ -44,6 +44,15 @@ describe 'Error handling' do let(:chat) { RubyLLM.chat(model: 'claude-3-5-haiku-20241022', provider: :anthropic) } + let(:error_response) do + { + type: 'error', + error: { + type: 'overloaded_error', + message: 'Overloaded' + } + }.to_json + end describe 'Faraday version 1' do before do @@ -51,33 +60,37 @@ end it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength - VCR.use_cassette( - 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks', - record: :none - ) do - chunks = [] - - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk - end - end.to raise_error(RubyLLM::Error, /Overloaded/) - end + stub_request(:post, 'https://api.anthropic.com/v1/messages') + .to_return( + status: 529, + body: "data: #{error_response}\n\n", + headers: { 'Content-Type' => 'text/event-stream' } + ) + + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::OverloadedError) end it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength - VCR.use_cassette( - 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events', - record: :none - ) do - chunks = [] - - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk - end - end.to raise_error(RubyLLM::Error, /Overloaded/) - end + stub_request(:post, 'https://api.anthropic.com/v1/messages') + .to_return( + status: 200, + body: "event: error\ndata: #{error_response}\n\n", + headers: { 'Content-Type' => 'text/event-stream' } + ) + + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::OverloadedError) end end @@ -87,33 +100,37 @@ end it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength - VCR.use_cassette( - 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_chunks', - record: :none - ) do - chunks = [] - - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk - end - end.to raise_error(RubyLLM::Error, /Overloaded/) - end + stub_request(:post, 'https://api.anthropic.com/v1/messages') + .to_return( + status: 529, + body: "data: #{error_response}\n\n", + headers: { 'Content-Type' => 'text/event-stream' } + ) + + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::OverloadedError) end it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength - VCR.use_cassette( - 'chat_streaming_responses_anthropic_claude-3-5-haiku-20241022_supports_streaming_error_events', - record: :none - ) do - chunks = [] - - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk - end - end.to raise_error(RubyLLM::Error, /Overloaded/) - end + stub_request(:post, 'https://api.anthropic.com/v1/messages') + .to_return( + status: 200, + body: "event: error\ndata: #{error_response}\n\n", + headers: { 'Content-Type' => 'text/event-stream' } + ) + + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(RubyLLM::OverloadedError) end end end From 038f8fcfa0d0006a84dcdc435a34f70b3f11c4d1 Mon Sep 17 00:00:00 2001 From: Dan Singerman Date: Thu, 17 Jul 2025 11:57:29 +0100 Subject: [PATCH 3/3] Replace cassettes with mock responses, and add support for other providers --- lib/ruby_llm/providers/openai/streaming.rb | 14 +++ spec/ruby_llm/chat_streaming_spec.rb | 127 +++++++++------------ spec/spec_helper.rb | 1 + spec/support/streaming_error_helpers.rb | 111 ++++++++++++++++++ 4 files changed, 183 insertions(+), 70 deletions(-) create mode 100644 spec/support/streaming_error_helpers.rb diff --git a/lib/ruby_llm/providers/openai/streaming.rb b/lib/ruby_llm/providers/openai/streaming.rb index ba313447..36d05a5d 100644 --- a/lib/ruby_llm/providers/openai/streaming.rb +++ b/lib/ruby_llm/providers/openai/streaming.rb @@ -21,6 +21,20 @@ def build_chunk(data) output_tokens: data.dig('usage', 'completion_tokens') ) end + + def parse_streaming_error(data) + error_data = JSON.parse(data) + return unless error_data['error'] + + case error_data.dig('error', 'type') + when 'server_error' + [500, error_data['error']['message']] + when 'rate_limit_exceeded', 'insufficient_quota' + [429, error_data['error']['message']] + else + [400, error_data['error']['message']] + end + end end end end diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb index 8228d537..13a947c5 100644 --- a/spec/ruby_llm/chat_streaming_spec.rb +++ b/spec/ruby_llm/chat_streaming_spec.rb @@ -4,6 +4,7 @@ RSpec.describe RubyLLM::Chat do include_context 'with configured RubyLLM' + include StreamingErrorHelpers describe 'streaming responses' do CHAT_MODELS.each do |model_info| @@ -43,94 +44,80 @@ end describe 'Error handling' do - let(:chat) { RubyLLM.chat(model: 'claude-3-5-haiku-20241022', provider: :anthropic) } - let(:error_response) do - { - type: 'error', - error: { - type: 'overloaded_error', - message: 'Overloaded' - } - }.to_json - end + CHAT_MODELS.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] - describe 'Faraday version 1' do - before do - stub_const('Faraday::VERSION', '1.10.0') - end + context "with #{provider}/#{model}" do + let(:chat) { RubyLLM.chat(model: model, provider: provider) } + + describe 'Faraday version 1' do # rubocop:disable RSpec/NestedGroups + before do + stub_const('Faraday::VERSION', '1.10.0') + end - it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength - stub_request(:post, 'https://api.anthropic.com/v1/messages') - .to_return( - status: 529, - body: "data: #{error_response}\n\n", - headers: { 'Content-Type' => 'text/event-stream' } - ) + it "#{provider}/#{model} supports handling streaming error chunks" do # rubocop:disable RSpec/ExampleLength + skip('Error handling not implemented yet') unless error_handling_supported?(provider) - chunks = [] + stub_error_response(provider, :chunk) + + chunks = [] - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(expected_error_for(provider)) end - end.to raise_error(RubyLLM::OverloadedError) - end - it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength - stub_request(:post, 'https://api.anthropic.com/v1/messages') - .to_return( - status: 200, - body: "event: error\ndata: #{error_response}\n\n", - headers: { 'Content-Type' => 'text/event-stream' } - ) + it "#{provider}/#{model} supports handling streaming error events" do # rubocop:disable RSpec/ExampleLength + skip('Error handling not implemented yet') unless error_handling_supported?(provider) - chunks = [] + stub_error_response(provider, :event) + + chunks = [] - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(expected_error_for(provider)) end - end.to raise_error(RubyLLM::OverloadedError) - end - end + end - describe 'Faraday version 2' do - before do - stub_const('Faraday::VERSION', '2.0.0') - end + describe 'Faraday version 2' do # rubocop:disable RSpec/NestedGroups + before do + stub_const('Faraday::VERSION', '2.0.0') + end - it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error chunks' do # rubocop:disable RSpec/ExampleLength - stub_request(:post, 'https://api.anthropic.com/v1/messages') - .to_return( - status: 529, - body: "data: #{error_response}\n\n", - headers: { 'Content-Type' => 'text/event-stream' } - ) + it "#{provider}/#{model} supports handling streaming error chunks" do # rubocop:disable RSpec/ExampleLength + skip('Error handling not implemented yet') unless error_handling_supported?(provider) - chunks = [] + stub_error_response(provider, :chunk) - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(expected_error_for(provider)) end - end.to raise_error(RubyLLM::OverloadedError) - end - it 'anthropic/claude-3-5-haiku-20241022 supports handling streaming error events' do # rubocop:disable RSpec/ExampleLength - stub_request(:post, 'https://api.anthropic.com/v1/messages') - .to_return( - status: 200, - body: "event: error\ndata: #{error_response}\n\n", - headers: { 'Content-Type' => 'text/event-stream' } - ) + it "#{provider}/#{model} supports handling streaming error events" do # rubocop:disable RSpec/ExampleLength + skip('Error handling not implemented yet') unless error_handling_supported?(provider) - chunks = [] + stub_error_response(provider, :event) - expect do - chat.ask('Count from 1 to 3') do |chunk| - chunks << chunk + chunks = [] + + expect do + chat.ask('Count from 1 to 3') do |chunk| + chunks << chunk + end + end.to raise_error(expected_error_for(provider)) end - end.to raise_error(RubyLLM::OverloadedError) + end end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index e25f67e8..d208dc55 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -42,6 +42,7 @@ require 'fileutils' require 'ruby_llm' require 'webmock/rspec' +require_relative 'support/streaming_error_helpers' # VCR Configuration VCR.configure do |config| diff --git a/spec/support/streaming_error_helpers.rb b/spec/support/streaming_error_helpers.rb new file mode 100644 index 00000000..54d10273 --- /dev/null +++ b/spec/support/streaming_error_helpers.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +module StreamingErrorHelpers + ERROR_HANDLING_CONFIGS = { + anthropic: { + url: 'https://api.anthropic.com/v1/messages', + error_response: { + type: 'error', + error: { + type: 'overloaded_error', + message: 'Overloaded' + } + }, + chunk_status: 529, + expected_error: RubyLLM::OverloadedError + }, + openai: { + url: 'https://api.openai.com/v1/chat/completions', + error_response: { + error: { + message: 'The server is temporarily overloaded. Please try again later.', + type: 'server_error', + param: nil, + code: nil + } + }, + chunk_status: 500, + expected_error: RubyLLM::ServerError + }, + gemini: { + url: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse', + error_response: { + error: { + code: 529, + message: 'Service overloaded - please try again later', + status: 'RESOURCE_EXHAUSTED' + } + }, + chunk_status: 529, + expected_error: RubyLLM::OverloadedError + }, + deepseek: { + url: 'https://api.deepseek.com/chat/completions', + error_response: { + error: { + message: 'Service overloaded - please try again later', + type: 'server_error', + param: nil, + code: nil + } + }, + chunk_status: 500, + expected_error: RubyLLM::ServerError + }, + openrouter: { + url: 'https://openrouter.ai/api/v1/chat/completions', + error_response: { + error: { + message: 'Service overloaded - please try again later', + type: 'server_error', + param: nil, + code: nil + } + }, + chunk_status: 500, + expected_error: RubyLLM::ServerError + }, + ollama: { + url: 'http://localhost:11434/v1/chat/completions', + error_response: { + error: { + message: 'Service overloaded - please try again later', + type: 'server_error', + param: nil, + code: nil + } + }, + chunk_status: 500, + expected_error: RubyLLM::ServerError + } + }.freeze + + def error_handling_supported?(provider) + ERROR_HANDLING_CONFIGS.key?(provider) + end + + def expected_error_for(provider) + ERROR_HANDLING_CONFIGS[provider][:expected_error] + end + + def stub_error_response(provider, type) + config = ERROR_HANDLING_CONFIGS[provider] + return unless config + + body = case type + when :chunk + "#{config[:error_response].to_json}\n\n" + when :event + "event: error\ndata: #{config[:error_response].to_json}\n\n" + end + + status = type == :chunk ? config[:chunk_status] : 200 + + stub_request(:post, config[:url]) + .to_return( + status: status, + body: body, + headers: { 'Content-Type' => 'text/event-stream' } + ) + end +end