synccomputingcode
diff --git a/‎sync/_databricks.py
Lines changed: 108 additions & 6 deletions b/‎sync/_databricks.py
Lines changed: 108 additions & 6 deletions
diff --git a/‎sync/api/projects.py
Lines changed: 119 additions & 20 deletions b/‎sync/api/projects.py
Lines changed: 119 additions & 20 deletions
@@ -15,7 +15,11 @@
 import boto3 as boto
 
 from sync.api.predictions import create_prediction_with_eventlog_bytes, get_prediction
-from sync.api.projects import create_project_submission_with_eventlog_bytes, get_project
+from sync.api.projects import (
+    create_project_submission_with_eventlog_bytes,
+    get_project,
+    get_project_recommendation,
+)
 from sync.clients.databricks import get_default_client
 from sync.config import CONFIG
 from sync.models import DatabricksAPIError, DatabricksClusterReport, DatabricksError, Response
@@ -381,11 +385,11 @@ def get_cluster_report(
     if "error_code" in run:
         return Response(error=DatabricksAPIError(**run))
 
-    project_cluster_tasks = _get_project_cluster_tasks(run, exclude_tasks, project_id)
+    project_cluster_tasks = _get_project_cluster_tasks(run, exclude_tasks)
     cluster_tasks = project_cluster_tasks.get(project_id)
     if not cluster_tasks:
         return Response(
-            error=DatabricksError(f"Failed to locate cluster for project ID {project_id}")
+            error=DatabricksError(message=f"Failed to locate cluster for project ID {project_id}")
         )
 
     return _get_cluster_report(
@@ -582,6 +586,92 @@ def get_prediction_cluster(
     return prediction_response
 
 
+def get_recommendation_job(job_id: str, project_id: str, recommendation_id: str) -> Response[dict]:
+    """Apply the recommendation to the specified job.
+
+    The basis job can only have tasks that run on the same cluster. That cluster is updated with the
+    configuration from the prediction and returned in the result job configuration. Use this function
+    to apply a prediction to an existing job or test a prediction with a one-off run.
+
+    :param job_id: basis job ID
+    :type job_id: str
+    :param project_id: Sync project ID
+    :type project_id: str
+    :param recommendation_id: recommendation ID
+    :type recommendation_id: str
+    :return: job object with recommendation applied to it
+    :rtype: Response[dict]
+    """
+    job = get_default_client().get_job(job_id)
+
+    if "error_code" in job:
+        return Response(error=DatabricksAPIError(**job))
+
+    job_settings = job["settings"]
+    tasks = job_settings.get("tasks", [])
+    if tasks:
+        cluster_response = _get_job_cluster(tasks, job_settings.get("job_clusters", []))
+        cluster = cluster_response.result
+        if cluster:
+            recommendation_cluster_response = get_recommendation_cluster(
+                cluster, project_id, recommendation_id
+            )
+            recommendation_cluster = recommendation_cluster_response.result
+            if recommendation_cluster:
+                cluster_key = tasks[0].get("job_cluster_key")
+                if cluster_key:
+                    job_settings["job_clusters"] = [
+                        j
+                        for j in job_settings["job_clusters"]
+                        if j.get("job_cluster_key") != cluster_key
+                    ] + [{"job_cluster_key": cluster_key, "new_cluster": recommendation_cluster}]
+                else:
+                    # For `new_cluster` definitions, Databricks will automatically assign the newly created cluster a name,
+                    # and will reject any run submissions where the `cluster_name` is pre-populated
+                    if "cluster_name" in recommendation_cluster:
+                        del recommendation_cluster["cluster_name"]
+                    tasks[0]["new_cluster"] = recommendation_cluster
+                return Response(result=job)
+            return recommendation_cluster_response
+        return cluster_response
+    return Response(error=DatabricksError(message="No task found in job"))
+
+
+def get_recommendation_cluster(
+    cluster: dict, project_id: str, recommendation_id: str
+) -> Response[dict]:
+    """Apply the recommendation to the provided cluster.
+
+    The cluster is updated with configuration from the prediction and returned in the result.
+
+    :param cluster: Databricks cluster object
+    :type cluster: dict
+    :param project_id: Sync project ID
+    :type project_id: str
+    :param recommendation_id: The id of the recommendation to fetch and apply to the given cluster
+    :type recommendation_id: str, optional
+    :return: job object with prediction applied to it
+    :rtype: Response[dict]
+    """
+    recommendation_response = get_project_recommendation(project_id, recommendation_id)
+    recommendation = recommendation_response.result.get("recommendation")
+    if recommendation:
+        # num_workers/autoscale are mutually exclusive settings, and we are relying on our Prediction
+        #  Recommendations to set these appropriately. Since we may recommend a Static cluster (i.e. a cluster
+        #  with `num_workers`) for a cluster that was originally autoscaled, we want to make sure to remove this
+        #  prior configuration
+        if "num_workers" in cluster:
+            del cluster["num_workers"]
+
+        if "autoscale" in cluster:
+            del cluster["autoscale"]
+
+        recommendation_cluster = _deep_update(cluster, recommendation["configuration"])
+
+        return Response(result=recommendation_cluster)
+    return recommendation_response
+
+
 def get_project_job(job_id: str, project_id: str, region_name: str = None) -> Response[dict]:
     """Apply project configuration to a job.
 
@@ -672,20 +762,32 @@ def get_project_cluster_settings(project_id: str, region_name: str = None) -> Re
             }
         }
 
-        s3_url = project.get("s3_url")
-        if s3_url:
+        cluster_log_url = urlparse(project.get("cluster_log_url"))
+        if cluster_log_url.scheme == "s3":
             result.update(
                 {
                     "cluster_log_conf": {
                         "s3": {
-                            "destination": f"{s3_url}/{project_id}",
+                            "destination": f"{cluster_log_url.geturl()}/{project_id}",
                             "enable_encryption": True,
                             "region": region_name or boto.client("s3").meta.region_name,
                             "canned_acl": "bucket-owner-full-control",
                         }
                     }
                 }
             )
+
+        elif cluster_log_url.scheme == "dbfs":
+            result.update(
+                {
+                    "cluster_log_conf": {
+                        "dbfs": {
+                            "destination": f"{cluster_log_url.geturl()}/{project_id}",
+                        }
+                    }
+                }
+            )
+
         return Response(result=result)
     return project_response
 
 
@@ -2,14 +2,22 @@
 """
 import io
 import logging
+from time import sleep
 from typing import List
 from urllib.parse import urlparse
 
 import httpx
 
 from sync.api.predictions import generate_presigned_url, get_predictions
 from sync.clients.sync import get_default_client
-from sync.models import Platform, Preference, ProjectError, Response, SubmissionError
+from sync.models import (
+    Platform,
+    Preference,
+    ProjectError,
+    RecommendationError,
+    Response,
+    SubmissionError,
+)
 
 logger = logging.getLogger()
 
@@ -45,7 +53,7 @@ def create_project(
     product_code: str,
     description: str = None,
     job_id: str = None,
-    s3_url: str = None,
+    cluster_log_url: str = None,
     prediction_preference: Preference = Preference.ECONOMY,
     prediction_params: dict = None,
     app_id: str = None,
@@ -60,8 +68,8 @@ def create_project(
     :type description: str, optional
     :param job_id: Databricks job ID, defaults to None
     :type job_id: str, optional
-    :param s3_url: S3 URL under which to store project configurations and logs, defaults to None
-    :type s3_url: str, optional
+    :param cluster_log_url: S3 or DBFS URL under which to store project configurations and logs, defaults to None
+    :type cluster_log_url: str, optional
     :param prediction_preference: preferred prediction solution, defaults to `Preference.ECONOMY`
     :type prediction_preference: Preference, optional
     :param prediction_params: dictionary of prediction parameters, defaults to None. Valid options are documented `here <https://developers.synccomputing.com/reference/create_project_v1_projects_post>`__
@@ -78,7 +86,7 @@ def create_project(
                 "product_code": product_code,
                 "description": description,
                 "job_id": job_id,
-                "s3_url": s3_url,
+                "cluster_log_url": cluster_log_url,
                 "prediction_preference": prediction_preference,
                 "prediction_params": prediction_params,
                 "app_id": app_id,
@@ -101,7 +109,7 @@ def get_project(project_id: str) -> Response[dict]:
 def update_project(
     project_id: str,
     description: str = None,
-    s3_url: str = None,
+    cluster_log_url: str = None,
     app_id: str = None,
     prediction_preference: Preference = None,
     prediction_params: dict = None,
@@ -112,8 +120,8 @@ def update_project(
     :type project_id: str
     :param description: description, defaults to None
     :type description: str, optional
-    :param s3_url: location of project event logs and configurations, defaults to None
-    :type s3_url: str, optional
+    :param cluster_log_url: location of project event logs and configurations, defaults to None
+    :type cluster_log_url: str, optional
     :param app_id: external identifier, defaults to None
     :type app_id: str, optional
     :param prediction_preference: default preference for predictions, defaults to None
@@ -126,8 +134,8 @@ def update_project(
     project_update = {}
     if description:
         project_update["description"] = description
-    if s3_url:
-        project_update["s3_url"] = s3_url
+    if cluster_log_url:
+        project_update["cluster_log_url"] = cluster_log_url
     if app_id:
         project_update["app_id"] = app_id
     if prediction_preference:
@@ -187,7 +195,7 @@ def delete_project(project_id: str) -> Response[str]:
 def create_project_submission(
     platform: Platform, cluster_report: dict, eventlog_url: str, project_id: str
 ) -> Response[str]:
-    """Create prediction
+    """Create a submission
 
     :param platform: platform, e.g. "aws-emr"
     :type platform: Platform
@@ -211,13 +219,17 @@ def create_project_submission(
     else:
         return Response(error=SubmissionError(message="Unsupported event log URL scheme"))
 
+    payload = {
+        "product": platform,
+        "cluster_report": cluster_report,
+        "event_log_uri": eventlog_http_url,
+    }
+
+    logger.info(payload)
+
     response = get_default_client().create_project_submission(
         project_id,
-        {
-            "product": platform,
-            "cluster_report": cluster_report,
-            "event_log_uri": eventlog_http_url,
-        },
+        payload,
     )
 
     if response.get("error"):
@@ -226,14 +238,43 @@ def create_project_submission(
     return Response(result=response["result"]["submission_id"])
 
 
+def _clear_cluster_report_errors(cluster_report_orig: dict) -> dict:
+    """Clears error messages from the cluster_events field
+    This circumvents issues where certain strange characters in the error fields of Azure cluster
+    reports were causing the client to throw errors when trying to make submissions.
+
+    :param cluster_report_orig: cluster_report
+    :type cluster_report_orig: dict
+    :return: cleared cluster report
+    :rtype: dict
+    """
+    cluster_report = cluster_report_orig.copy()
+
+    def clear_error(event: dict):
+        try:
+            del event["details"]["reason"]["parameters"]["azure_error_message"]
+        except KeyError:
+            pass
+        try:
+            del event["details"]["reason"]["parameters"]["databricks_error_message"]
+        except KeyError:
+            pass
+
+    try:
+        list(map(clear_error, cluster_report["cluster_events"]["events"]))
+    except KeyError:
+        pass
+    return cluster_report
+
+
 def create_project_submission_with_eventlog_bytes(
     platform: Platform,
     cluster_report: dict,
     eventlog_name: str,
     eventlog_bytes: bytes,
     project_id: str,
 ) -> Response[str]:
-    """Creates a prediction giving event log bytes instead of a URL
+    """Creates a submission given event log bytes instead of a URL
 
     :param platform: platform, e.g. "aws-emr"
     :type platform: Platform
@@ -243,14 +284,15 @@ def create_project_submission_with_eventlog_bytes(
     :type eventlog_name: str
     :param eventlog_bytes: encoded event log
     :type eventlog_bytes: bytes
-    :param project_id: ID of project to which the prediction belongs, defaults to None
-    :type project_id: str, optional
+    :param project_id: ID of project to which the submission belongs
+    :type project_id: str
     :return: prediction ID
     :rtype: Response[str]
     """
     # TODO - best way to handle "no eventlog"
+    cluster_report_clear = _clear_cluster_report_errors(cluster_report)
     response = get_default_client().create_project_submission(
-        project_id, {"product_code": platform, "cluster_report": cluster_report}
+        project_id, {"product_code": platform, "cluster_report": cluster_report_clear}
     )
 
     if response.get("error"):
@@ -269,3 +311,60 @@ def create_project_submission_with_eventlog_bytes(
         return Response(error=SubmissionError(message="Failed to upload event log"))
 
     return Response(result=response["result"]["submission_id"])
+
+
+def create_project_recommendation(project_id: str, **options) -> Response[str]:
+    """Creates a prediction given a project id
+
+    :param project_id: ID of project to which the prediction belongs, defaults to None
+    :type project_id: str, optional
+    :return: prediction ID
+    :rtype: Response[str]
+    """
+    response = get_default_client().create_project_recommendation(project_id, **options)
+
+    if response.get("error"):
+        return Response(**response)
+
+    return Response(result=response["result"]["id"])
+
+
+def wait_for_recommendation(project_id: str, recommendation_id: str) -> Response[dict]:
+    """Get a recommendation, wait if it's not ready
+
+    :param project_id: project ID
+    :type project_id: str
+    :param recommendation_id: recommendation ID
+    :type recommendation_id: str
+    :return: recommendation object
+    :rtype: Response[dict]
+    """
+    response = get_project_recommendation(project_id, recommendation_id)
+    while response:
+        result = response.result
+        if result:
+            if result["state"] == "SUCCESS":
+                return Response(result=result)
+            if result["state"] == "FAILURE":
+                return Response(error=RecommendationError(message="Recommendation failed"))
+        logger.info("Waiting for recommendation")
+        sleep(10)
+        response = get_project_recommendation(project_id, recommendation_id)
+
+
+def get_project_recommendation(project_id: str, recommendation_id: str) -> Response[dict]:
+    """Get a specific recommendation for a project id
+
+    :param project_id: project ID
+    :type project_id: str
+    :param recommendation_id: recommendation ID
+    :type recommendation_id: str
+    :return: recommendation object
+    :rtype: Response[dict]
+    """
+    response = get_default_client().get_project_recommendation(project_id, recommendation_id)
+
+    if response.get("error"):
+        return Response(**response)
+
+    return Response(result=response["result"])