Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
6cb37fe
Initial setup
jterapin Aug 27, 2025
d31ae4d
Minor adjustments
jterapin Aug 29, 2025
74ed189
Directory downloader impl
jterapin Aug 29, 2025
4e8db17
Directory uploader impl
jterapin Aug 29, 2025
098049d
Merge branch 'version-3' into tm-directory-features
jterapin Aug 29, 2025
8f387d2
Merge branch 'version-3' into tm-directory-features
jterapin Sep 8, 2025
7749ba5
Add default executor
jterapin Sep 9, 2025
99f0de6
Add running check to default executor
jterapin Sep 9, 2025
441fa82
Refactor MultipartFileUploader with executor
jterapin Sep 9, 2025
c792439
Fix typo in MultipartFileUploader
jterapin Sep 9, 2025
adce496
Update TM upload file with executor
jterapin Sep 9, 2025
012c2bc
Merge branch 'version-3' into tm-directory-features
jterapin Sep 9, 2025
ee9c9da
Merge branch 'version-3' into tm-directory-features
jterapin Sep 10, 2025
75df844
Merge from version-3
jterapin Sep 12, 2025
e5d3245
Merge branch 'version-3' into tm-directory-features
jterapin Sep 15, 2025
173f5e4
Merge branch 'version-3' into tm-directory-features
jterapin Sep 17, 2025
cf88ff2
Merge branch 'version-3' into tm-directory-features
jterapin Sep 17, 2025
2758c4d
Update to only spawn workers when needed
jterapin Sep 17, 2025
b92d3b3
Update directory uploader
jterapin Sep 18, 2025
6afb495
Update directory uploader
jterapin Sep 18, 2025
86b53e8
Update uploader
jterapin Sep 18, 2025
d587ae1
Merge branch 'version-3' into tm-directory-features
jterapin Sep 18, 2025
eae3814
Add minor improvements to directory uploader
jterapin Sep 19, 2025
14010ef
Merge branch 'version-3' into tm-directory-features
jterapin Sep 23, 2025
8ab4edc
Fix specs
jterapin Sep 23, 2025
face84d
Minor updates to multipart file uploader
jterapin Sep 23, 2025
36a1e87
Minor refactors
jterapin Sep 24, 2025
7dd9f98
Fix options
jterapin Sep 24, 2025
77ab1ba
Refactor DirectoryUploader
jterapin Sep 24, 2025
e843137
Merge version-3 into branch
jterapin Sep 24, 2025
009127d
Update multipartfileuploader
jterapin Sep 24, 2025
39912fd
Refactor FileDownloader
jterapin Sep 25, 2025
f9fb117
Implement Directory Downloader
jterapin Sep 25, 2025
d307555
Add TODO
jterapin Sep 25, 2025
a14649a
Merge version-3 into branch
jterapin Sep 29, 2025
b9231e7
Feedback - update default executor
jterapin Sep 29, 2025
d991128
Refactor file downloader
jterapin Sep 29, 2025
bc533a0
Support FileDownloader changes
jterapin Sep 29, 2025
9efc77f
Extra updates to FileDownloader
jterapin Sep 29, 2025
1cc3fcf
Address feedback for FileUploader and MultipartFileUploader
jterapin Sep 29, 2025
7b6b220
Merge branch 'version-3' into tm-directory-features
jterapin Oct 3, 2025
45d2f5d
Add improvements to directory uploader
jterapin Oct 6, 2025
64d481e
Update DirectoryDownloader based on feedbacks
jterapin Oct 6, 2025
2ab63fb
Minor feedback updates
jterapin Oct 6, 2025
7af3e32
Merge branch 'version-3' into tm-directory-features
jterapin Oct 6, 2025
747965f
Update executor
jterapin Oct 7, 2025
2230478
Improve Directory Uploader
jterapin Oct 7, 2025
cb145a0
Handle failure cases correctly
jterapin Oct 7, 2025
0cb35cd
Improve Executor
jterapin Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gems/aws-sdk-s3/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Unreleased Changes
------------------

* Feature - TODO

1.199.1 (2025-09-25)
------------------

Expand Down
4 changes: 4 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module S3
autoload :BucketRegionCache, 'aws-sdk-s3/bucket_region_cache'
autoload :Encryption, 'aws-sdk-s3/encryption'
autoload :EncryptionV2, 'aws-sdk-s3/encryption_v2'
autoload :DefaultExecutor, 'aws-sdk-s3/default_executor'
autoload :FilePart, 'aws-sdk-s3/file_part'
autoload :FileUploader, 'aws-sdk-s3/file_uploader'
autoload :FileDownloader, 'aws-sdk-s3/file_downloader'
Expand All @@ -18,6 +19,9 @@ module S3
autoload :ObjectMultipartCopier, 'aws-sdk-s3/object_multipart_copier'
autoload :PresignedPost, 'aws-sdk-s3/presigned_post'
autoload :Presigner, 'aws-sdk-s3/presigner'
autoload :DirectoryProgress, '../aws-sdk-s3/directory_progress'
autoload :DirectoryUploader, 'aws-sdk-s3/directory_uploader'
autoload :DirectoryDownloader, 'aws-sdk-s3/directory_downloader'
autoload :TransferManager, 'aws-sdk-s3/transfer_manager'

# s3 express session auth
Expand Down
12 changes: 10 additions & 2 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,17 @@ def upload_stream(options = {}, &block)
# @see Client#upload_part
def upload_file(source, options = {})
uploading_options = options.dup
uploader = FileUploader.new(multipart_threshold: uploading_options.delete(:multipart_threshold), client: client)
executor = DefaultExecutor.new(max_threads: uploading_options.delete(:thread_count))
uploader = FileUploader.new(
client: client,
executor: executor,
multipart_threshold: uploading_options.delete(:multipart_threshold)
)
response = Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do
uploader.upload(source, uploading_options.merge(bucket: bucket_name, key: key))
end
yield response if block_given?
executor.shutdown
true
end
deprecated(:upload_file, use: 'Aws::S3::TransferManager#upload_file', version: 'next major version')
Expand Down Expand Up @@ -539,10 +545,12 @@ def upload_file(source, options = {})
# @see Client#get_object
# @see Client#head_object
def download_file(destination, options = {})
downloader = FileDownloader.new(client: client)
executor = DefaultExecutor.new(max_threads: options[:thread_count])
downloader = FileDownloader.new(client: client, executor: executor)
Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do
downloader.download(destination, options.merge(bucket: bucket_name, key: key))
end
executor.shutdown
true
end
deprecated(:download_file, use: 'Aws::S3::TransferManager#download_file', version: 'next major version')
Expand Down
98 changes: 98 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/default_executor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# frozen_string_literal: true

module Aws
module S3
# @api private
class DefaultExecutor
RUNNING = :running
SHUTTING_DOWN = :shutting_down
SHUTDOWN = :shutdown

def initialize(options = {})
@max_threads = options[:max_threads] || 10
@state = RUNNING
@queue = Queue.new
@pool = []
@mutex = Mutex.new
end

def post(*args, &block)
@mutex.synchronize do
raise 'Executor has been shutdown and is no longer accepting tasks' unless @state == RUNNING

@queue << [args, block]
ensure_worker_available
end
true
end

def kill
@mutex.synchronize do
@state = SHUTDOWN
@pool.each(&:kill)
@pool.clear
@queue.clear
end
true
end

def shutdown(timeout = nil)
@mutex.synchronize do
return true if @state == SHUTDOWN

@state = SHUTTING_DOWN
@pool.size.times { @queue << :shutdown }
end

if timeout
deadline = Time.now + timeout
@pool.each do |thread|
remaining = deadline - Time.now
break if remaining <= 0

thread.join([remaining, 0].max)
end
@pool.select(&:alive?).each(&:kill)
else
@pool.each(&:join)
end

@pool.clear
@state = SHUTDOWN
true
end

def running?
@state == RUNNING
end

def shutting_down?
@state == SHUTTING_DOWN
end

def shutdown?
@state == SHUTDOWN
end

private

def ensure_worker_available
return unless @state == RUNNING

@pool.select!(&:alive?)
@pool << spawn_worker if @pool.size < @max_threads
end

def spawn_worker
Thread.new do
while (job = @queue.shift)
break if job == :shutdown

args, block = job
block.call(*args)
end
end
end
end
end
end
175 changes: 175 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/directory_downloader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# frozen_string_literal: true

module Aws
module S3
# Raised when DirectoryDownloader fails to download objects from S3 bucket
class DirectoryDownloadError < StandardError
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By convention we were putting these in separate files right? If you want to promote the other two (multipart errors) to the files where they are used that's fine too, but let's stay consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, I'm planning to separate them out.

def initialize(message, errors = [])
@errors = errors
super(message)
end

# @return [Array<StandardError>] The list of errors encountered when downloading objects
attr_reader :errors
end

# @api private
class DirectoryDownloader
def initialize(options = {})
@client = options[:client]
@executor = options[:executor]
@abort_requested = false
@mutex = Mutex.new
end

attr_reader :abort_requested

def download(destination, bucket:, **options)
if File.exist?(destination)
raise ArgumentError, 'invalid destination, expected a directory' unless File.directory?(destination)
else
FileUtils.mkdir_p(destination)
end

download_opts = build_download_opts(destination, bucket, options)
downloader = FileDownloader.new(client: @client, executor: @executor)
producer = ObjectProducer.new(download_opts.merge(client: @client, directory_downloader: self))
downloads, errors = process_download_queue(producer, downloader, download_opts)
build_result(downloads, errors)
ensure
@abort_requested = false
end

private

def request_abort
@mutex.synchronize { @abort_requested = true }
end
def build_download_opts(destination, bucket, opts)
{
destination: destination,
bucket: bucket,
s3_prefix: opts.delete(:s3_prefix),
ignore_failure: opts.delete(:ignore_failure) || false,
filter_callback: opts.delete(:filter_callback),
progress_callback: opts.delete(:progress_callback)
}
end

def build_result(download_count, errors)
if @abort_requested
msg = "directory download failed: #{errors.map(&:message).join('; ')}"
raise DirectoryDownloadError.new(msg, errors)
else
{
completed_downloads: [download_count - errors.count, 0].max,
failed_downloads: errors.count,
errors: errors.any? ? errors : nil
}.compact
end
end

def handle_error(executor, opts)
return if opts[:ignore_failure]

request_abort
executor.kill
end

def process_download_queue(producer, downloader, opts)
# Separate executor for lightweight queuing tasks,
# avoiding interference with main @executor lifecycle
queue_executor = DefaultExecutor.new
progress = DirectoryProgress.new(opts[:progress_callback]) if opts[:progress_callback]
download_attempts = 0
errors = []
begin
producer.each do |object|
break if @abort_requested

download_attempts += 1
queue_executor.post(object) do |o|
dir_path = File.dirname(o[:path])
FileUtils.mkdir_p(dir_path) unless dir_path == opts[:destination] || Dir.exist?(dir_path)

downloader.download(o[:path], bucket: opts[:bucket], key: o[:key])
progress&.call(File.size(o[:path]))
rescue StandardError => e
errors << e
handle_error(queue_executor, opts)
end
end
rescue StandardError => e
errors << e
handle_error(queue_executor, opts)
end
queue_executor.shutdown
[download_attempts, errors]
end

# @api private
class ObjectProducer
include Enumerable

DEFAULT_QUEUE_SIZE = 100

def initialize(options = {})
@destination_dir = options[:destination]
@client = options[:client]
@bucket = options[:bucket]
@s3_prefix = options[:s3_prefix]
@filter_callback = options[:filter_callback]
@directory_downloader = options[:directory_downloader]
@object_queue = SizedQueue.new(DEFAULT_QUEUE_SIZE)
end

def each
producer_thread = Thread.new do
stream_objects
ensure
@object_queue << :done
end

# Yield objects from internal queue
while (object = @object_queue.shift) != :done
break if @directory_downloader.abort_requested

yield object
end
ensure
producer_thread.join
end

private

def build_object_entry(key)
{ path: File.join(@destination_dir, normalize_key(key)), key: key }
end

# TODO: double check handling of objects that ends with /
def stream_objects(continuation_token: nil)
resp = @client.list_objects_v2(bucket: @bucket, prefix: @s3_prefix, continuation_token: continuation_token)
resp.contents.each do |o|
break if @directory_downloader.abort_requested
next if o.key.end_with?('/')
next unless include_object?(o.key)

@object_queue << build_object_entry(o.key)
end
stream_objects(continuation_token: resp.next_continuation_token) if resp.next_continuation_token
end

def include_object?(key)
return true unless @filter_callback

@filter_callback.call(key)
end

def normalize_key(key)
key = key.delete_prefix(@s3_prefix) if @s3_prefix
File::SEPARATOR == '/' ? key : key.tr('/', File::SEPARATOR)
end
end
end
end
end
24 changes: 24 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/directory_progress.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# frozen_string_literal: true

module Aws
module S3
# @api private
class DirectoryProgress
def initialize(progress_callback)
@transferred_bytes = 0
@transferred_files = 0
@progress_callback = progress_callback
@mutex = Mutex.new
end

def call(bytes_transferred)
@mutex.synchronize do
@transferred_bytes += bytes_transferred
@transferred_files += 1

@progress_callback.call(@transferred_bytes, @transferred_files)
end
end
end
end
end
Loading
Loading