Skip to content

Commit 2adc605

Browse files
Merge pull request #16 from synccomputingcode/graham/jobs-submit
Fix some errors that occur when applying cluster recommendations to DBX jobs
2 parents 66df82f + 900223c commit 2adc605

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ path = "sync/__init__.py"
5454
sync-cli = "sync.__main__:main"
5555

5656
[project.urls]
57-
Home = "https://github.com/synccomputingcode/sync_sdk"
57+
Home = "https://github.com/synccomputingcode/syncsparkpy"
5858

5959
[tool.black]
6060
line-length = 100

sync/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Library for leveraging the power of Sync"""
2-
__version__ = "0.0.6"
2+
__version__ = "0.0.7"
33

44
TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

sync/awsdatabricks.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,18 @@ def get_prediction_job(
271271
if tasks := job_settings.get("tasks", []):
272272
cluster_response = _get_job_cluster(tasks, job_settings.get("job_clusters", []))
273273
if cluster := cluster_response.result:
274-
prediction_cluster = _deep_update(
275-
cluster, prediction["solutions"][preference]["configuration"]
276-
)
274+
# num_workers/autoscale are mutually exclusive settings, and we are relying on our Prediction
275+
# Recommendations to set these appropriately. Since we may recommend a Static cluster (i.e. a cluster
276+
# with `num_workers`) for a cluster that was originally autoscaled, we want to make sure to remove this
277+
# prior configuration
278+
if "num_workers" in cluster:
279+
del cluster["num_workers"]
280+
281+
if "autoscale" in cluster:
282+
del cluster["autoscale"]
283+
284+
prediction_cluster = _deep_update(cluster, prediction["solutions"][preference]["configuration"])
285+
277286
if cluster_key := tasks[0].get("job_cluster_key"):
278287
job_settings["job_clusters"] = [
279288
j
@@ -380,9 +389,20 @@ def run_job_object(job: dict) -> Response[str]:
380389

381390
if cluster := cluster_response.result:
382391
if len(tasks) == 1:
392+
# For `new_cluster` definitions, Databricks will automatically assign the newly created cluster a name,
393+
# and will reject any run submissions where the `cluster_name` is pre-populated
394+
if "cluster_name" in cluster:
395+
del cluster["cluster_name"]
396+
383397
tasks[0]["new_cluster"] = cluster
384398
del tasks[0]["job_cluster_key"]
385399
else:
400+
# If the original Job has a pre-existing Policy, we want to remove this from the `create_cluster` payload,
401+
# since we are not allowed to create clusters with certain policies via that endpoint, e.g. we cannot
402+
# create a `Job Compute` cluster via this endpoint.
403+
if "policy_id" in cluster:
404+
del cluster["policy_id"]
405+
386406
# Create an "All-Purpose Compute" cluster
387407
cluster["cluster_name"] = cluster["cluster_name"] or job["settings"]["name"]
388408
cluster["autotermination_minutes"] = 10 # 10 minutes is the minimum

0 commit comments

Comments
 (0)