Add promtail chart to collect logs from cluster

This adds the promtail chart to the installation that allows to collect the logs of the applications in the cluster, which are written to stdout of the containers. This will only collect logs from pods in the same namespace as the monitoring setup. In a later change also logs from Gerrit instances in Kubernetes will be added. Change-Id: I86c5c5470eaa31191fb5ac339ee21dee85106097
2020-05-13 16:08:04 +02:00 · 2020-05-13 16:08:04 +02:00 · de8fee4f68
parent aab93a806b
commit de8fee4f68
7 changed files with 254 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -10,9 +10,10 @@ The setup is provided as a helm chart. It can be installed using Helm
 The charts used in this setup are the chart provided in the open source and can be
 found on GitHub:

- [Prometheus](https://github.com/helm/charts/tree/master/stable/prometheus)
 - [Grafana](https://github.com/helm/charts/tree/master/stable/grafana)
 - [Loki](https://github.com/grafana/loki/tree/master/production/helm/loki)
+- [Prometheus](https://github.com/helm/charts/tree/master/stable/prometheus)
+- [Promtail](https://github.com/grafana/loki/tree/master/production/helm/promtail)

 This project just provides `values.yaml`-files that are already configured to
 work with the `metrics-reporter-prometheus`-plugin of Gerrit to make the setup
--- a/charts/grafana/grafana.yaml
+++ b/charts/grafana/grafana.yaml
@ -101,7 +101,8 @@ downloadDashboards:
 # podAnnotations: {}

 ## Pod Labels
-# podLabels: {}
+podLabels:
+  app: grafana

 podPortName: grafana

--- a/charts/promtail/VERSION
+++ b/charts/promtail/VERSION
@ -0,0 +1 @@
+0.22.1
--- a/charts/promtail/promtail.yaml
+++ b/charts/promtail/promtail.yaml
@ -0,0 +1,244 @@
+#@ load("@ytt:data", "data")
+
+## Affinity for pod assignment
+## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
+affinity: {}
+
+annotations: {}
+
+# The update strategy to apply to the DaemonSet
+##
+deploymentStrategy: {}
+#  rollingUpdate:
+#    maxUnavailable: 1
+#  type: RollingUpdate
+
+initContainer:
+  enabled: false
+  fsInotifyMaxUserInstances: 128
+
+image:
+  repository: grafana/promtail
+  tag: v1.3.0
+  pullPolicy: IfNotPresent
+
+livenessProbe: {}
+
+loki:
+  serviceName: #@ "loki-{}".format(data.values.namespace)
+  servicePort: 3100
+  serviceScheme: http
+  user: #@ data.values.loki.username
+  password: #@ data.values.loki.password
+
+nameOverride: #@ "promtail-{}".format(data.values.namespace, data.values.namespace)
+
+## Node labels for pod assignment
+## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
+nodeSelector: {}
+
+pipelineStages:
+- docker: {}
+
+## Pod Labels
+podLabels: {}
+
+podAnnotations:
+  prometheus.io/scrape: "true"
+  prometheus.io/port: "http-metrics"
+
+## Assign a PriorityClassName to pods if set
+# priorityClassName:
+
+rbac:
+  create: true
+  pspEnabled: true
+
+readinessProbe:
+  failureThreshold: 5
+  httpGet:
+    path: /ready
+    port: http-metrics
+  initialDelaySeconds: 10
+  periodSeconds: 10
+  successThreshold: 1
+  timeoutSeconds: 1
+
+resources:
+  limits:
+    cpu: 200m
+    memory: 128Mi
+  requests:
+    cpu: 100m
+    memory: 128Mi
+
+# Custom scrape_configs to override the default ones in the configmap
+scrapeConfigs:
+  - job_name: kubernetes-pods-monitoring
+    pipeline_stages:
+    - docker: {}
+    kubernetes_sd_configs:
+    - role: pod
+    relabel_configs:
+    - action: labeldrop
+      regex: '__meta_kubernetes_pod_label_app_kubernetes_io_.*'
+    - action: labeldrop
+      regex: '__meta_kubernetes_pod_label_statefulset_kubernetes_io_.*'
+    - action: labeldrop
+      regex: '__meta_kubernetes_pod_label_controller_revision_hash'
+    - action: labeldrop
+      regex: '__meta_kubernetes_pod_label_pod_template_.*'
+    - source_labels:
+      - __meta_kubernetes_pod_label_name
+      target_label: __service__
+    - source_labels:
+      - __meta_kubernetes_pod_node_name
+      target_label: __host__
+    - action: labelmap
+      regex: __meta_kubernetes_pod_label_(.+)
+    - action: replace
+      replacement: $1
+      separator: /
+      source_labels:
+      - __meta_kubernetes_namespace
+      - __meta_kubernetes_pod_label_app
+      - __service__
+      target_label: job
+    - action: replace
+      source_labels:
+      - __meta_kubernetes_namespace
+      target_label: namespace
+    - action: keep
+      regex: #@ data.values.namespace
+      source_labels:
+      - namespace
+    - action: replace
+      source_labels:
+      - release
+      target_label: chart_release
+    - action: replace
+      source_labels:
+      - __meta_kubernetes_pod_name
+      target_label: instance
+    - action: replace
+      source_labels:
+      - __meta_kubernetes_pod_container_name
+      target_label: container_name
+    - replacement: /var/log/pods/*$1/*.log
+      separator: /
+      source_labels:
+      - __meta_kubernetes_pod_uid
+      - __meta_kubernetes_pod_container_name
+      target_label: __path__
+
+# Custom scrape_configs together with the default ones in the configmap
+extraScrapeConfigs: []
+
+securityContext:
+  readOnlyRootFilesystem: true
+  runAsGroup: 0
+  runAsUser: 0
+
+serviceAccount:
+  create: true
+  name: promtail
+
+## Tolerations for pod assignment
+## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
+tolerations:
+- key: node-role.kubernetes.io/master
+  operator: Exists
+  effect: NoSchedule
+
+# Extra volumes to scrape logs from
+volumes:
+- name: docker
+  hostPath:
+    path: /var/lib/docker/containers
+- name: pods
+  hostPath:
+    path: /var/log/pods
+
+# Custom volumes together with the default ones
+extraVolumes: []
+
+volumeMounts:
+- name: docker
+  mountPath: /var/lib/docker/containers
+  readOnly: true
+- name: pods
+  mountPath: /var/log/pods
+  readOnly: true
+
+# Custom volumeMounts together with the default ones
+extraVolumeMounts: []
+
+# Add extra Commandline args while starting up promtail.
+# more info : https://github.com/grafana/loki/pull/1530
+
+extraCommandlineArgs: []
+# example:
+# extraCommandlineArgs:
+#   - -client.external-labels=hostname=$(HOSTNAME)
+
+config:
+  client:
+    # Maximum wait period before sending batch
+    batchwait: 1s
+    # Maximum batch size to accrue before sending, unit is byte
+    batchsize: 102400
+
+    # Maximum time to wait for server to respond to a request
+    timeout: 10s
+
+    backoff_config:
+      # Initial backoff time between retries
+      minbackoff: 100ms
+      # Maximum backoff time between retries
+      maxbackoff: 5s
+      # Maximum number of retries when sending batches, 0 means infinite retries
+      maxretries: 20
+
+    # The labels to add to any time series or alerts when communicating with loki
+    external_labels: {}
+
+  server:
+    http_listen_port: 3101
+
+  positions:
+    filename: /run/promtail/positions.yaml
+  target_config:
+    # Period to resync directories being watched and files being tailed
+    sync_period: 10s
+
+serviceMonitor:
+  enabled: false
+  interval: ""
+  additionalLabels: {}
+  # scrapeTimeout: 10s
+
+# Extra env variables to pass to the promtail container
+env: []
+
+# enable and configure if using the syslog scrape config
+syslogService:
+  enabled: false
+  type: ClusterIP
+  port: 1514
+  ## Specify the nodePort value for the LoadBalancer and NodePort service types.
+  ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport
+  ##
+  # nodePort:
+  ## Provide any additional annotations which may be required. This can be used to
+  ## set the LoadBalancer service type to internal only.
+  ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer
+  ##
+  annotations: {}
+  labels: {}
+  ## Use loadBalancerIP to request a specific static IP,
+  ## otherwise leave blank
+  ##
+  loadBalancerIP:
+  # loadBalancerSourceRanges: []
+  ## Set the externalTrafficPolicy in the Service to either Cluster or Local
+  # externalTrafficPolicy: Cluster
--- a/promtail/promtailLocalConfig.yaml
+++ b/promtail/promtailLocalConfig.yaml
--- a/subcommands/_globals.py
+++ b/subcommands/_globals.py
@ -16,4 +16,5 @@ HELM_CHARTS = {
    "grafana": "stable/grafana",
    "loki": "loki/loki",
    "prometheus": "stable/prometheus",
+    "promtail": "loki/promtail",
 }
--- a/subcommands/install.py
+++ b/subcommands/install.py
@ -26,6 +26,7 @@ from ._globals import HELM_CHARTS
 TEMPLATES = [
    "charts/namespace.yaml",
    "charts/prometheus",
+    "charts/promtail",
    "charts/loki",
    "charts/grafana",
    "promtail",
@ -79,7 +80,7 @@ def _create_promtail_configs(config, output_dir):
    if not os.path.exists(os.path.join(output_dir, "promtail")):
        os.mkdir(os.path.join(output_dir, "promtail"))

-    with open(os.path.join(output_dir, "promtail.yaml")) as f:
+    with open(os.path.join(output_dir, "promtailLocalConfig.yaml")) as f:
        for promtail_config in yaml.load_all(f, Loader=yaml.SafeLoader):
            with open(
                os.path.join(
@ -94,7 +95,7 @@ def _create_promtail_configs(config, output_dir):
            ) as f:
                yaml.dump(promtail_config, f)

-    os.remove(os.path.join(output_dir, "promtail.yaml"))
+    os.remove(os.path.join(output_dir, "promtailLocalConfig.yaml"))

    if not config["tls"]["skipVerify"]:
        try:
@ -235,7 +236,7 @@ def install(config_manager, output_dir, dryrun, update_repo):
    namespace = config_manager.get_config()["namespace"]
    _create_dashboard_configmaps(output_dir, namespace)

-    if os.path.exists(os.path.join(output_dir, "promtail.yaml")):
+    if os.path.exists(os.path.join(output_dir, "promtailLocalConfig.yaml")):
        _create_promtail_configs(config, output_dir)
        if not dryrun:
            _download_promtail(output_dir)