|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +# Integration test demonstrating how to escape producer fencing by using the producer.reload |
| 4 | +# event to modify transactional.id. This test shows the recommended pattern for handling fencing |
| 5 | +# in production environments. |
| 6 | +# |
| 7 | +# When a producer gets fenced, simply reloading with the same transactional.id creates an |
| 8 | +# infinite loop. But by subscribing to the producer.reload event and modifying the |
| 9 | +# transactional.id, the producer can escape fencing and continue operating with a new identity. |
| 10 | + |
| 11 | +require 'waterdrop' |
| 12 | +require 'logger' |
| 13 | +require 'securerandom' |
| 14 | + |
| 15 | +BOOTSTRAP_SERVERS = ENV.fetch('BOOTSTRAP_SERVERS', '127.0.0.1:9092') |
| 16 | +# Same ID for both producers initially |
| 17 | +TRANSACTIONAL_ID = "fence-escape-test-#{SecureRandom.uuid}".freeze |
| 18 | + |
| 19 | +# Track instrumentation events |
| 20 | +reload_events = [] |
| 21 | +error_events = [] |
| 22 | + |
| 23 | +# Create first producer with reload enabled |
| 24 | +producer1 = WaterDrop::Producer.new do |config| |
| 25 | + config.kafka = { |
| 26 | + 'bootstrap.servers': BOOTSTRAP_SERVERS, |
| 27 | + 'transactional.id': TRANSACTIONAL_ID, |
| 28 | + 'transaction.timeout.ms': 30_000, |
| 29 | + 'message.timeout.ms': 30_000 |
| 30 | + } |
| 31 | + config.max_wait_timeout = 5_000 |
| 32 | + config.logger = Logger.new($stdout, level: Logger::INFO) |
| 33 | + config.reload_on_transaction_fatal_error = true |
| 34 | + # IMPORTANT: Remove :fenced from non_reloadable_errors to allow reload attempts |
| 35 | + config.non_reloadable_errors = [] |
| 36 | + config.max_attempts_on_transaction_fatal_error = 5 |
| 37 | + config.wait_backoff_on_transaction_fatal_error = 100 |
| 38 | +end |
| 39 | + |
| 40 | +# Subscribe to producer.reload event and modify transactional.id to escape fencing |
| 41 | +producer1.monitor.subscribe('producer.reload') do |event| |
| 42 | + config = event[:caller].config |
| 43 | + config.kafka[:'transactional.id'] = "#{TRANSACTIONAL_ID}-recovered-#{Time.now.to_i}" |
| 44 | +end |
| 45 | + |
| 46 | +producer1.monitor.subscribe('producer.reloaded') { |event| reload_events << event } |
| 47 | +producer1.monitor.subscribe('error.occurred') { |event| error_events << event } |
| 48 | + |
| 49 | +topic_name = "it-fence-escape-#{SecureRandom.hex(6)}" |
| 50 | + |
| 51 | +# First transaction with producer1 |
| 52 | +producer1.transaction do |
| 53 | + producer1.produce_sync(topic: topic_name, payload: 'message1') |
| 54 | +end |
| 55 | + |
| 56 | +# Create second producer with same ID to cause fencing |
| 57 | +producer2 = WaterDrop::Producer.new do |config| |
| 58 | + config.kafka = { |
| 59 | + 'bootstrap.servers': BOOTSTRAP_SERVERS, |
| 60 | + 'transactional.id': TRANSACTIONAL_ID, |
| 61 | + 'transaction.timeout.ms': 30_000, |
| 62 | + 'message.timeout.ms': 30_000 |
| 63 | + } |
| 64 | + config.max_wait_timeout = 5_000 |
| 65 | + config.logger = Logger.new($stdout, level: Logger::INFO) |
| 66 | +end |
| 67 | + |
| 68 | +# This transaction will fence producer1 |
| 69 | +producer2.transaction do |
| 70 | + producer2.produce_sync(topic: topic_name, payload: 'message2') |
| 71 | +end |
| 72 | + |
| 73 | +# This should trigger reload with transactional.id change and succeed |
| 74 | +begin |
| 75 | + producer1.transaction do |
| 76 | + producer1.produce_sync(topic: topic_name, payload: 'message3-recovered') |
| 77 | + end |
| 78 | +rescue Rdkafka::RdkafkaError => e |
| 79 | + # This is expected. User needs to retry transaction if wants |
| 80 | + # Reloading does not mean, that fencing is not re-raised in the transactional mode |
| 81 | + exit(1) unless e.code == :fenced |
| 82 | +end |
| 83 | + |
| 84 | +10.times do |
| 85 | + producer1.transaction do |
| 86 | + producer1.produce_sync(topic: topic_name, payload: 'message3-recovered') |
| 87 | + end |
| 88 | + |
| 89 | + producer2.transaction do |
| 90 | + producer2.produce_sync(topic: topic_name, payload: 'message2') |
| 91 | + end |
| 92 | +end |
| 93 | + |
| 94 | +producer1.close |
| 95 | +producer2.close |
| 96 | + |
| 97 | +# Verify results |
| 98 | +# Should have exactly 1 reload (not multiple like in the loop case) |
| 99 | +success = reload_events.size == 1 && reload_events.first[:attempt] == 1 |
| 100 | + |
| 101 | +exit(success ? 0 : 1) |
0 commit comments