DataDog · hkaj · Oct 5, 2017
@@ -20,6 +20,22 @@ init_config:
   # tls_cacert: /path/to/ca.pem
   # tls_verify: True
 
+  # Initialization retries
+  #
+  # if the agent is expected to start before Docker,
+  # use these settings to configure the retry policy.
+  #
+  # init_retry_interval defines how long (in seconds) the docker client
+  # will wait before retrying initialization.
+  # Defaults to 0.
+  #
+  # init_retry_interval: 20
+  #
+  # init_retries configures how many retries are made before failing permanently.
+  # Defaults to 0.
+  #
+  # init_retries: 5
+
 instances:
   - ## Daemon and system configuration
     ##
@@ -37,26 +53,62 @@ instances:
     #
     # collect_events: false
 
+    # By default we do not collect events with a status ['top', 'exec_start', 'exec_create'].
+    # Here can be added additional statuses to be filtered. 
+    # List of available statuses can be found here https://docs.docker.com/engine/reference/commandline/events/#object-types
+    # filtered_event_types:
+    #    - 'top'
+    #    - 'exec_start'
+    #    - 'exec_create'
+
     # Collect disk usage per container with docker.container.size_rw and
     # docker.container.size_rootfs metrics.
     # Warning: This might take time for Docker daemon to generate,
     # ensure that `docker ps -a -q` run fast before enabling it.
     # Defaults to false.
     #
-    # collect_container_size: false
+    # collect_container_size: true
+
+    # Do you use custom cgroups for this particular instance?
+    # Note: enabling this option modifies the way in which we inspect the containers and causes
+    #       some overhead - if you run a high volume of containers we may timeout.
+    #
+    # custom_cgroups: false
+
+    # Report docker container healthcheck events as service checks
+    # Note: enabling this option modifies the way in which we inspect the containers and causes
+    #       some overhead - if you run a high volume of containers we may timeout.
+    #       Container Healthchecks are available starting with docker 1.12, enabling with older
+    #       versions will result in an UNKNOWN state for the service check.
+    #
+    # You must whitelist the containers you wish to submit health service checks for.
+    # Use the same mechanism as the tagging system (see Tag:performance_tags section).
+    # Example: ["docker_image:tomcat", "container_name:web_front_nginx"]
+    #
+    # health_service_check_whitelist: []
+
+    # Collect the container count tagged by state (running, paused, exited, dead)
+    # Defaults to false.
+    #
+    # collect_container_count: true
+
+    # Collect the volume count for attached and dangling volumes.
+    # Defaults to false.
+    #
+    # collect_volume_count: true
 
     # Collect images stats
     # Number of available active images and intermediate images as gauges.
     # Defaults to false.
     #
-    # collect_images_stats: false
+    # collect_images_stats: true
 
     # Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics.
     # The check gets this size with the `docker images` command.
     # Requires collect_images_stats to be enabled.
     # Defaults to false.
     #
-    # collect_image_size: false
+    # collect_image_size: true
 
     # Collect disk metrics (total, used, free) through the docker info command for data and metadata.
     # This is useful when these values can't be obtained by the disk check.
@@ -68,9 +120,18 @@ instances:
     #
     # collect_disk_stats: true
 
+    # Collect containers exit codes and send service checks critical when exit code is not 0
+    # Defaults to false.
+    #
+    # collect_exit_codes: true
+
 
     # Exclude containers based on their tags
-    # An excluded container will be completely ignored. The rule is a regex on the tags.
+    # An excluded container will not get any individual container metric reported for it.
+    # However it will still appear in the container count since ignoring it here would give
+    # a wrong impression about the docker daemon load.
+    #
+    # The rule is a regex on the tags.
     #
     # How it works: exclude first.
     # If a tag matches an exclude rule, it won't be included unless it also matches an include rule.
@@ -79,11 +140,14 @@ instances:
     # exclude: [".*"]
     # include: ["docker_image:ubuntu", "docker_image:debian"]
     #
-    # include all, except ubuntu and debian.
-    # exclude: ["docker_image:ubuntu", "docker_image:debian"]
+    # include all, except ubuntu and Kubernetes pause containers.
+    # exclude: ["docker_image:ubuntu", "image_name:gcr.io/google_containers/pause.*", "image_name:openshift/origin-pod"]
     # include: []
     #
-    # Default: include all containers
+    # Default: include all containers except for Kubernetes pause containers.
+    # Warning: pause containers exclusion works only if you deploy the agent the recommended way (in a pod).
+    # To customize this default behavior, override exclude.
+    # If you do so, default exclusion patterns won't apply anymore and will need to be added explicitly.
 
 
 
@@ -113,7 +177,7 @@ instances:
     #   - container_command: Command ran by the container (example: "echo 1")
     #   - container_id: Id of the container
     #
-    # performance_tags: ["container_name", image_name", "image_tag", "docker_image"]
+    # performance_tags: ["container_name", "image_name", "image_tag", "docker_image"]
 
     # Tags for containers count metrics.
     # Available: ["image_name", "image_tag", "docker_image", "container_command"]
@@ -124,3 +188,16 @@ instances:
     # Default to None
     # Example:
     # collect_labels_as_tags: ["com.docker.compose.service", "com.docker.compose.project"]
+    # List of docker event attributes to add as tags of the datadog events
+    # Defaults to None.
+    #
+    # event_attributes_as_tags: ["signal"]
+
+    ## Rate Filtering
+    ##
+
+    # Allows ad-hoc spike filtering if the system reports incorrect metrics.
+    # This will drop points if the computed rate is higher than the cap value
+    # capped_metrics:
+    #   docker.cpu.user: 1000
+    #   docker.cpu.system: 1000