diff --git a/conf.d/docker_daemon.yaml b/conf.d/docker_daemon.yaml index 1c4d76da..8f57dd4a 100644 --- a/conf.d/docker_daemon.yaml +++ b/conf.d/docker_daemon.yaml @@ -20,6 +20,22 @@ init_config: # tls_cacert: /path/to/ca.pem # tls_verify: True + # Initialization retries + # + # if the agent is expected to start before Docker, + # use these settings to configure the retry policy. + # + # init_retry_interval defines how long (in seconds) the docker client + # will wait before retrying initialization. + # Defaults to 0. + # + # init_retry_interval: 20 + # + # init_retries configures how many retries are made before failing permanently. + # Defaults to 0. + # + # init_retries: 5 + instances: - ## Daemon and system configuration ## @@ -37,26 +53,62 @@ instances: # # collect_events: false + # By default we do not collect events with a status ['top', 'exec_start', 'exec_create']. + # Here can be added additional statuses to be filtered. + # List of available statuses can be found here https://docs.docker.com/engine/reference/commandline/events/#object-types + # filtered_event_types: + # - 'top' + # - 'exec_start' + # - 'exec_create' + # Collect disk usage per container with docker.container.size_rw and # docker.container.size_rootfs metrics. # Warning: This might take time for Docker daemon to generate, # ensure that `docker ps -a -q` run fast before enabling it. # Defaults to false. # - # collect_container_size: false + # collect_container_size: true + + # Do you use custom cgroups for this particular instance? + # Note: enabling this option modifies the way in which we inspect the containers and causes + # some overhead - if you run a high volume of containers we may timeout. + # + # custom_cgroups: false + + # Report docker container healthcheck events as service checks + # Note: enabling this option modifies the way in which we inspect the containers and causes + # some overhead - if you run a high volume of containers we may timeout. + # Container Healthchecks are available starting with docker 1.12, enabling with older + # versions will result in an UNKNOWN state for the service check. + # + # You must whitelist the containers you wish to submit health service checks for. + # Use the same mechanism as the tagging system (see Tag:performance_tags section). + # Example: ["docker_image:tomcat", "container_name:web_front_nginx"] + # + # health_service_check_whitelist: [] + + # Collect the container count tagged by state (running, paused, exited, dead) + # Defaults to false. + # + # collect_container_count: true + + # Collect the volume count for attached and dangling volumes. + # Defaults to false. + # + # collect_volume_count: true # Collect images stats # Number of available active images and intermediate images as gauges. # Defaults to false. # - # collect_images_stats: false + # collect_images_stats: true # Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics. # The check gets this size with the `docker images` command. # Requires collect_images_stats to be enabled. # Defaults to false. # - # collect_image_size: false + # collect_image_size: true # Collect disk metrics (total, used, free) through the docker info command for data and metadata. # This is useful when these values can't be obtained by the disk check. @@ -68,9 +120,18 @@ instances: # # collect_disk_stats: true + # Collect containers exit codes and send service checks critical when exit code is not 0 + # Defaults to false. + # + # collect_exit_codes: true + # Exclude containers based on their tags - # An excluded container will be completely ignored. The rule is a regex on the tags. + # An excluded container will not get any individual container metric reported for it. + # However it will still appear in the container count since ignoring it here would give + # a wrong impression about the docker daemon load. + # + # The rule is a regex on the tags. # # How it works: exclude first. # If a tag matches an exclude rule, it won't be included unless it also matches an include rule. @@ -79,11 +140,14 @@ instances: # exclude: [".*"] # include: ["docker_image:ubuntu", "docker_image:debian"] # - # include all, except ubuntu and debian. - # exclude: ["docker_image:ubuntu", "docker_image:debian"] + # include all, except ubuntu and Kubernetes pause containers. + # exclude: ["docker_image:ubuntu", "image_name:gcr.io/google_containers/pause.*", "image_name:openshift/origin-pod"] # include: [] # - # Default: include all containers + # Default: include all containers except for Kubernetes pause containers. + # Warning: pause containers exclusion works only if you deploy the agent the recommended way (in a pod). + # To customize this default behavior, override exclude. + # If you do so, default exclusion patterns won't apply anymore and will need to be added explicitly. @@ -113,7 +177,7 @@ instances: # - container_command: Command ran by the container (example: "echo 1") # - container_id: Id of the container # - # performance_tags: ["container_name", image_name", "image_tag", "docker_image"] + # performance_tags: ["container_name", "image_name", "image_tag", "docker_image"] # Tags for containers count metrics. # Available: ["image_name", "image_tag", "docker_image", "container_command"] @@ -124,3 +188,16 @@ instances: # Default to None # Example: # collect_labels_as_tags: ["com.docker.compose.service", "com.docker.compose.project"] + # List of docker event attributes to add as tags of the datadog events + # Defaults to None. + # + # event_attributes_as_tags: ["signal"] + + ## Rate Filtering + ## + + # Allows ad-hoc spike filtering if the system reports incorrect metrics. + # This will drop points if the computed rate is higher than the cap value + # capped_metrics: + # docker.cpu.user: 1000 + # docker.cpu.system: 1000