|
64 | 64 | "get_cluster_report",
|
65 | 65 | "get_all_cluster_events",
|
66 | 66 | "monitor_cluster",
|
| 67 | + "monitor_once", |
67 | 68 | "create_cluster",
|
68 | 69 | "get_cluster",
|
69 | 70 | "handle_successful_job_run",
|
|
85 | 86 | "apply_project_recommendation",
|
86 | 87 | ]
|
87 | 88 |
|
88 |
| - |
89 | 89 | logger = logging.getLogger(__name__)
|
90 | 90 |
|
91 | 91 |
|
@@ -268,7 +268,6 @@ def _create_cluster_report(
|
268 | 268 |
|
269 | 269 |
|
270 | 270 | def _load_aws_cluster_info(cluster: dict) -> Tuple[Response[dict], Response[dict]]:
|
271 |
| - |
272 | 271 | cluster_info = None
|
273 | 272 | cluster_id = None
|
274 | 273 | cluster_log_dest = _cluster_log_destination(cluster)
|
@@ -312,7 +311,6 @@ def _load_aws_cluster_info(cluster: dict) -> Tuple[Response[dict], Response[dict
|
312 | 311 |
|
313 | 312 |
|
314 | 313 | def _get_aws_cluster_info(cluster: dict) -> Tuple[Response[dict], Response[dict], Response[dict]]:
|
315 |
| - |
316 | 314 | aws_region_name = DB_CONFIG.aws_region_name
|
317 | 315 |
|
318 | 316 | cluster_info, cluster_id = _load_aws_cluster_info(cluster)
|
@@ -394,7 +392,6 @@ def _monitor_cluster(
|
394 | 392 | kill_on_termination: bool = False,
|
395 | 393 | write_function=None,
|
396 | 394 | ) -> None:
|
397 |
| - |
398 | 395 | (log_url, filesystem, bucket, base_prefix) = cluster_log_destination
|
399 | 396 | # If the event log destination is just a *bucket* without any sub-path, then we don't want to include
|
400 | 397 | # a leading `/` in our Prefix (which will make it so that we never actually find the event log), so
|
@@ -458,6 +455,42 @@ def _monitor_cluster(
|
458 | 455 | sleep(polling_period)
|
459 | 456 |
|
460 | 457 |
|
| 458 | +def monitor_once(cluster_id: str, in_progress_cluster={}): |
| 459 | + all_inst_by_id = in_progress_cluster.get("all_inst_by_id") or {} |
| 460 | + active_timelines_by_id = in_progress_cluster.get("active_timelines_by_id") or {} |
| 461 | + retired_timelines = in_progress_cluster.get("retired_timelines") or [] |
| 462 | + recorded_volumes_by_id = in_progress_cluster.get("recorded_volumes_by_id") or {} |
| 463 | + |
| 464 | + aws_region_name = DB_CONFIG.aws_region_name |
| 465 | + ec2 = boto.client("ec2", region_name=aws_region_name) |
| 466 | + |
| 467 | + current_insts = _get_ec2_instances(cluster_id, ec2) |
| 468 | + recorded_volumes_by_id.update( |
| 469 | + {v["VolumeId"]: v for v in _get_ebs_volumes_for_instances(current_insts, ec2)} |
| 470 | + ) |
| 471 | + |
| 472 | + # Record new (or overwrite) existing instances. |
| 473 | + # Separately record the ids of those that are in the "running" state. |
| 474 | + running_inst_ids = set({}) |
| 475 | + for inst in current_insts: |
| 476 | + all_inst_by_id[inst["InstanceId"]] = inst |
| 477 | + if inst["State"]["Name"] == "running": |
| 478 | + running_inst_ids.add(inst["InstanceId"]) |
| 479 | + |
| 480 | + active_timelines_by_id, new_retired_timelines = _update_monitored_timelines( |
| 481 | + running_inst_ids, active_timelines_by_id |
| 482 | + ) |
| 483 | + |
| 484 | + retired_timelines.extend(new_retired_timelines) |
| 485 | + |
| 486 | + return { |
| 487 | + "all_inst_by_id": all_inst_by_id, |
| 488 | + "active_timelines_by_id": active_timelines_by_id, |
| 489 | + "retired_timelines": retired_timelines, |
| 490 | + "recorded_volumes_by_id": recorded_volumes_by_id, |
| 491 | + } |
| 492 | + |
| 493 | + |
461 | 494 | def _define_write_file(file_key, filesystem, bucket, write_function):
|
462 | 495 | if filesystem == "lambda":
|
463 | 496 |
|
@@ -499,7 +532,6 @@ def write_file(body: bytes):
|
499 | 532 |
|
500 | 533 |
|
501 | 534 | def _get_ec2_instances(cluster_id: str, ec2_client: "botocore.client.ec2") -> List[dict]:
|
502 |
| - |
503 | 535 | filters = [
|
504 | 536 | {"Name": "tag:Vendor", "Values": ["Databricks"]},
|
505 | 537 | {"Name": "tag:ClusterId", "Values": [cluster_id]},
|
|
0 commit comments