diff --git a/README.md b/README.md index ff7f45e..bd93f0f 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,10 @@ The setup is provided as a helm chart. It can be installed using Helm The charts used in this setup are the chart provided in the open source and can be found on GitHub: -- [Prometheus](https://github.com/helm/charts/tree/master/stable/prometheus) - [Grafana](https://github.com/helm/charts/tree/master/stable/grafana) - [Loki](https://github.com/grafana/loki/tree/master/production/helm/loki) +- [Prometheus](https://github.com/helm/charts/tree/master/stable/prometheus) +- [Promtail](https://github.com/grafana/loki/tree/master/production/helm/promtail) This project just provides `values.yaml`-files that are already configured to work with the `metrics-reporter-prometheus`-plugin of Gerrit to make the setup @@ -76,43 +77,43 @@ setup, some configuration is highly dependent on the specific installation. These options have to be configured in the `./config.yaml` before installing and are listed here: -| option | description | -|-----------------------------------------|----------------------------------------------------------------------------------------| -| `gerritServers` | List of Gerrit servers to scrape. For details refer to section [below](#gerritServers) | -| `namespace` | The namespace the charts are installed to | -| `tls.skipVerify` | Whether to skip TLS certificate verification | -| `tls.caCert` | CA certificate used for TLS certificate verification | -| `prometheus.server.host` | Prometheus server ingress hostname | -| `prometheus.server.username` | Username for Prometheus | -| `prometheus.server.password` | Password for Prometheus | -| `prometheus.server.tls.cert` | TLS certificate | -| `prometheus.server.tls.key` | TLS key | -| `prometheus.alertmanager.slack.apiUrl` | API URL of the Slack Webhook | -| `prometheus.alertmanager.slack.channel` | Channel to which the alerts should be posted | -| `loki.host` | Loki ingress hostname | -| `loki.username` | Username for Loki | -| `loki.password` | Password for Loki | -| `loki.s3.protocol` | Protocol used for communicating with S3 | -| `loki.s3.host` | Hostname of the S3 object store | -| `loki.s3.accessToken` | The EC2 accessToken used for authentication with S3 | -| `loki.s3.secret` | The secret associated with the accessToken | -| `loki.s3.bucket` | The name of the S3 bucket | -| `loki.s3.region` | The region in which the S3 bucket is hosted | -| `loki.tls.cert` | TLS certificate | -| `loki.tls.key` | TLS key | -| `grafana.host` | Grafana ingress hostname | -| `grafana.tls.cert` | TLS certificate | -| `grafana.tls.key` | TLS key | -| `grafana.admin.username` | Username for the admin user | -| `grafana.admin.password` | Password for the admin user | -| `grafana.ldap.enabled` | Whether to enable LDAP | -| `grafana.ldap.host` | Hostname of LDAP server | -| `grafana.ldap.port` | Port of LDAP server (Has to be `quoted`!) | -| `grafana.ldap.password` | Password of LDAP server | -| `grafana.ldap.bind_dn` | Bind DN (username) of the LDAP server | -| `grafana.ldap.accountBases` | List of base DNs to discover accounts (Has to have the format `"['a', 'b']"`) | -| `grafana.ldap.groupBases` | List of base DNs to discover groups (Has to have the format `"['a', 'b']"`) | -| `grafana.dashboards.editable` | Whether dashboards can be edited manually in the UI | +| option | description | +|----------------------------------------------------|----------------------------------------------------------------------------------------| +| `gerritServers` | List of Gerrit servers to scrape. For details refer to section [below](#gerritServers) | +| `namespace` | The namespace the charts are installed to | +| `tls.skipVerify` | Whether to skip TLS certificate verification | +| `tls.caCert` | CA certificate used for TLS certificate verification | +| `monitoring.prometheus.server.host` | Prometheus server ingress hostname | +| `monitoring.prometheus.server.username` | Username for Prometheus | +| `monitoring.prometheus.server.password` | Password for Prometheus | +| `monitoring.prometheus.server.tls.cert` | TLS certificate | +| `monitoring.prometheus.server.tls.key` | TLS key | +| `monitoring.prometheus.alertmanager.slack.apiUrl` | API URL of the Slack Webhook | +| `monitoring.prometheus.alertmanager.slack.channel` | Channel to which the alerts should be posted | +| `monitoring.grafana.host` | Grafana ingress hostname | +| `monitoring.grafana.tls.cert` | TLS certificate | +| `monitoring.grafana.tls.key` | TLS key | +| `monitoring.grafana.admin.username` | Username for the admin user | +| `monitoring.grafana.admin.password` | Password for the admin user | +| `monitoring.grafana.ldap.enabled` | Whether to enable LDAP | +| `monitoring.grafana.ldap.host` | Hostname of LDAP server | +| `monitoring.grafana.ldap.port` | Port of LDAP server (Has to be `quoted`!) | +| `monitoring.grafana.ldap.password` | Password of LDAP server | +| `monitoring.grafana.ldap.bind_dn` | Bind DN (username) of the LDAP server | +| `monitoring.grafana.ldap.accountBases` | List of base DNs to discover accounts (Has to have the format `"['a', 'b']"`) | +| `monitoring.grafana.ldap.groupBases` | List of base DNs to discover groups (Has to have the format `"['a', 'b']"`) | +| `monitoring.grafana.dashboards.editable` | Whether dashboards can be edited manually in the UI | +| `logging.loki.host` | Loki ingress hostname | +| `logging.loki.username` | Username for Loki | +| `logging.loki.password` | Password for Loki | +| `logging.loki.s3.protocol` | Protocol used for communicating with S3 | +| `logging.loki.s3.host` | Hostname of the S3 object store | +| `logging.loki.s3.accessToken` | The EC2 accessToken used for authentication with S3 | +| `logging.loki.s3.secret` | The secret associated with the accessToken | +| `logging.loki.s3.bucket` | The name of the S3 bucket | +| `logging.loki.s3.region` | The region in which the S3 bucket is hosted | +| `logging.loki.tls.cert` | TLS certificate | +| `logging.loki.tls.key` | TLS key | ### `gerritServers` diff --git a/cfgmgr/abstract.py b/cfgmgr/abstract.py index 4e5cf61..1b1a3b8 100644 --- a/cfgmgr/abstract.py +++ b/cfgmgr/abstract.py @@ -26,8 +26,8 @@ class AbstractConfigManager(abc.ABC): self.config_path = config_path self.requires_htpasswd = [ - ["loki"], - ["prometheus", "server"], + ["logging", "loki"], + ["monitoring", "prometheus", "server"], ] def get_config(self): diff --git a/charts/grafana/configuration/grafana.ca.secret.yaml b/charts/grafana/configuration/grafana.ca.secret.yaml index 9853ce7..12e86f1 100644 --- a/charts/grafana/configuration/grafana.ca.secret.yaml +++ b/charts/grafana/configuration/grafana.ca.secret.yaml @@ -1,6 +1,6 @@ #@ load("@ytt:data", "data") #@ load("@ytt:base64", "base64") -#@ if data.values.grafana.ldap.enabled and not data.values.tls.skipVerify: +#@ if data.values.monitoring.grafana.ldap.enabled and not data.values.tls.skipVerify: apiVersion: v1 kind: Secret metadata: diff --git a/charts/grafana/configuration/grafana.secret.yaml b/charts/grafana/configuration/grafana.secret.yaml index a6b0338..d74a582 100644 --- a/charts/grafana/configuration/grafana.secret.yaml +++ b/charts/grafana/configuration/grafana.secret.yaml @@ -7,9 +7,9 @@ metadata: name: grafana-credentials namespace: #@ data.values.namespace data: - admin-user: #@ base64.encode(data.values.grafana.admin.username) - admin-password: #@ base64.encode(data.values.grafana.admin.password) - #@ if data.values.grafana.ldap.enabled: + admin-user: #@ base64.encode(data.values.monitoring.grafana.admin.username) + admin-password: #@ base64.encode(data.values.monitoring.grafana.admin.password) + #@ if data.values.monitoring.grafana.ldap.enabled: ldap-toml: #@ base64.encode(format_ldap_toml()) #@ end type: Opaque diff --git a/charts/grafana/configuration/grafana.tls.secret.yaml b/charts/grafana/configuration/grafana.tls.secret.yaml index d32d491..7a21443 100644 --- a/charts/grafana/configuration/grafana.tls.secret.yaml +++ b/charts/grafana/configuration/grafana.tls.secret.yaml @@ -7,5 +7,5 @@ metadata: namespace: #@ data.values.namespace type: kubernetes.io/tls data: - tls.crt: #@ base64.encode(data.values.grafana.tls.cert) - tls.key: #@ base64.encode(data.values.grafana.tls.key) + tls.crt: #@ base64.encode(data.values.monitoring.grafana.tls.cert) + tls.key: #@ base64.encode(data.values.monitoring.grafana.tls.key) diff --git a/charts/grafana/configuration/ldap.lib.txt b/charts/grafana/configuration/ldap.lib.txt index 67a3450..66063ea 100644 --- a/charts/grafana/configuration/ldap.lib.txt +++ b/charts/grafana/configuration/ldap.lib.txt @@ -2,18 +2,18 @@ (@ def format_ldap_toml(): -@) [[servers]] -host = "(@= data.values.grafana.ldap.host @)" -port = (@= data.values.grafana.ldap.port @) +host = "(@= data.values.monitoring.grafana.ldap.host @)" +port = (@= data.values.monitoring.grafana.ldap.port @) use_ssl = true start_tls = false ssl_skip_verify = (@= "{}".format(data.values.tls.skipVerify).lower() @) root_ca_cert = "/etc/secrets/server.ca.crt" -bind_dn = "(@= data.values.grafana.ldap.bind_dn @)" -bind_password = "(@= data.values.grafana.ldap.password @)" +bind_dn = "(@= data.values.monitoring.grafana.ldap.bind_dn @)" +bind_password = "(@= data.values.monitoring.grafana.ldap.password @)" search_filter = "(cn=%s)" -search_base_dns = (@= data.values.grafana.ldap.accountBases @) +search_base_dns = (@= data.values.monitoring.grafana.ldap.accountBases @) group_search_filter = "(cn=%s)" -group_search_base_dns = (@= data.values.grafana.ldap.groupBases @) +group_search_base_dns = (@= data.values.monitoring.grafana.ldap.groupBases @) [[servers.group_mappings]] group_dn = "*" diff --git a/charts/grafana/grafana.yaml b/charts/grafana/grafana.yaml index 64b54de..8913de9 100644 --- a/charts/grafana/grafana.yaml +++ b/charts/grafana/grafana.yaml @@ -101,7 +101,8 @@ downloadDashboards: # podAnnotations: {} ## Pod Labels -# podLabels: {} +podLabels: + app: grafana podPortName: grafana @@ -129,7 +130,7 @@ ingress: labels: {} path: / hosts: - - #@ data.values.grafana.host + - #@ data.values.monitoring.grafana.host ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. extraPaths: [] # - path: /* @@ -139,7 +140,7 @@ ingress: tls: - secretName: grafana-server-tls hosts: - - #@ data.values.grafana.host + - #@ data.values.monitoring.grafana.host resources: limits: @@ -270,7 +271,7 @@ envRenderSecret: {} ## Additional grafana server secret mounts # Defines additional mounts with secrets. Secrets must be manually created in the namespace. extraSecretMounts: -#@ if data.values.grafana.ldap.enabled and not data.values.tls.skipVerify: +#@ if data.values.monitoring.grafana.ldap.enabled and not data.values.tls.skipVerify: - name: tls-ca mountPath: /etc/secrets secretName: grafana-ca @@ -395,7 +396,7 @@ grafana.ini: ## LDAP Authentication can be enabled with the following values on grafana.ini ## NOTE: Grafana will fail to start if the value for ldap.toml is invalid auth.ldap: - enabled: #@ data.values.grafana.ldap.enabled + enabled: #@ data.values.monitoring.grafana.ldap.enabled allow_sign_up: true config_file: /etc/grafana/ldap.toml @@ -405,7 +406,7 @@ grafana.ini: ## ref: http://docs.grafana.org/installation/configuration/#auth-ldap ## ref: http://docs.grafana.org/installation/ldap/#configuration ldap: - enabled: #@ data.values.grafana.ldap.enabled + enabled: #@ data.values.monitoring.grafana.ldap.enabled # `existingSecret` is a reference to an existing secret containing the ldap configuration # for Grafana in a key `ldap-toml`. existingSecret: "grafana-credentials" @@ -474,7 +475,7 @@ sidecar: # disableDelete to activate a import-only behaviour disableDelete: true # allow updating provisioned dashboards from the UI - allowUiUpdates: #@ data.values.grafana.dashboards.editable + allowUiUpdates: #@ data.values.monitoring.grafana.dashboards.editable datasources: enabled: false ## Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. diff --git a/charts/loki/configuration/loki.basic-auth.secret.yaml b/charts/loki/configuration/loki.basic-auth.secret.yaml index 8bc0b5f..33ecbd8 100644 --- a/charts/loki/configuration/loki.basic-auth.secret.yaml +++ b/charts/loki/configuration/loki.basic-auth.secret.yaml @@ -6,5 +6,5 @@ metadata: name: loki-basic-auth namespace: #@ data.values.namespace data: - auth: #@ base64.encode(data.values.loki.htpasswd) + auth: #@ base64.encode(data.values.logging.loki.htpasswd) type: Opaque diff --git a/charts/loki/configuration/loki.tls.secret.yaml b/charts/loki/configuration/loki.tls.secret.yaml index b1a0db8..3a789ca 100644 --- a/charts/loki/configuration/loki.tls.secret.yaml +++ b/charts/loki/configuration/loki.tls.secret.yaml @@ -7,5 +7,5 @@ metadata: namespace: #@ data.values.namespace type: kubernetes.io/tls data: - tls.crt: #@ base64.encode(data.values.loki.tls.cert) - tls.key: #@ base64.encode(data.values.loki.tls.key) + tls.crt: #@ base64.encode(data.values.logging.loki.tls.cert) + tls.key: #@ base64.encode(data.values.logging.loki.tls.key) diff --git a/charts/loki/loki.yaml b/charts/loki/loki.yaml index f3f63c5..d91343e 100644 --- a/charts/loki/loki.yaml +++ b/charts/loki/loki.yaml @@ -14,13 +14,13 @@ ingress: nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' # kubernetes.io/tls-acme: "true" hosts: - - host: #@ data.values.loki.host + - host: #@ data.values.logging.loki.host paths: - / tls: - secretName: loki-server-tls hosts: - - #@ data.values.loki.host + - #@ data.values.logging.loki.host ## Affinity for pod assignment ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity @@ -87,7 +87,7 @@ config: boltdb: directory: /data/loki/index aws: - s3: #@ "{}://{}:{}@{}/{}".format(data.values.loki.s3.protocol, data.values.loki.s3.accessToken, data.values.loki.s3.secret, data.values.loki.s3.host, data.values.loki.s3.bucket) + s3: #@ "{}://{}:{}@{}/{}".format(data.values.logging.loki.s3.protocol, data.values.logging.loki.s3.accessToken, data.values.logging.loki.s3.secret, data.values.logging.loki.s3.host, data.values.logging.loki.s3.bucket) s3forcepathstyle: true chunk_store_config: max_look_back_period: 0 @@ -243,4 +243,4 @@ extraPorts: [] # Extra env variables to pass to the loki container env: - name: AWS_REGION - value: #@ data.values.loki.s3.region + value: #@ data.values.logging.loki.s3.region diff --git a/charts/prometheus/configuration/prometheus.basic-auth.secret.yaml b/charts/prometheus/configuration/prometheus.basic-auth.secret.yaml index 9c213df..6969fa7 100644 --- a/charts/prometheus/configuration/prometheus.basic-auth.secret.yaml +++ b/charts/prometheus/configuration/prometheus.basic-auth.secret.yaml @@ -6,5 +6,5 @@ metadata: name: prometheus-basic-auth namespace: #@ data.values.namespace data: - auth: #@ base64.encode(data.values.prometheus.server.htpasswd) + auth: #@ base64.encode(data.values.monitoring.prometheus.server.htpasswd) type: Opaque diff --git a/charts/prometheus/configuration/prometheus.secret.yaml b/charts/prometheus/configuration/prometheus.secret.yaml index 07757e9..18ec751 100644 --- a/charts/prometheus/configuration/prometheus.secret.yaml +++ b/charts/prometheus/configuration/prometheus.secret.yaml @@ -17,7 +17,7 @@ data: #@ if not data.values.tls.skipVerify: server.ca.crt: #@ base64.encode(data.values.tls.caCert) - server.crt: #@ base64.encode(data.values.prometheus.server.tls.cert) - server.key: #@ base64.encode(data.values.prometheus.server.tls.key) + server.crt: #@ base64.encode(data.values.monitoring.prometheus.server.tls.cert) + server.key: #@ base64.encode(data.values.monitoring.prometheus.server.tls.key) #@ end type: Opaque diff --git a/charts/prometheus/configuration/prometheus.tls.secret.yaml b/charts/prometheus/configuration/prometheus.tls.secret.yaml index c9a834a..31590fd 100644 --- a/charts/prometheus/configuration/prometheus.tls.secret.yaml +++ b/charts/prometheus/configuration/prometheus.tls.secret.yaml @@ -7,5 +7,5 @@ metadata: namespace: #@ data.values.namespace type: kubernetes.io/tls data: - tls.crt: #@ base64.encode(data.values.prometheus.server.tls.cert) - tls.key: #@ base64.encode(data.values.prometheus.server.tls.key) + tls.crt: #@ base64.encode(data.values.monitoring.prometheus.server.tls.cert) + tls.key: #@ base64.encode(data.values.monitoring.prometheus.server.tls.key) diff --git a/charts/prometheus/prometheus.yaml b/charts/prometheus/prometheus.yaml index 3623c4a..6aa5823 100644 --- a/charts/prometheus/prometheus.yaml +++ b/charts/prometheus/prometheus.yaml @@ -675,7 +675,7 @@ server: ## Must be provided if Ingress is enabled ## hosts: - - #@ data.values.prometheus.server.host + - #@ data.values.monitoring.prometheus.server.host # - prometheus.domain.com # - domain.com/prometheus @@ -692,7 +692,7 @@ server: tls: - secretName: prometheus-server-tls hosts: - - #@ data.values.prometheus.server.host + - #@ data.values.monitoring.prometheus.server.host ## Server Deployment Strategy type # strategy: @@ -1055,12 +1055,12 @@ pushgateway: alertmanagerFiles: alertmanager.yml: global: - slack_api_url: #@ data.values.prometheus.alertmanager.slack.apiUrl + slack_api_url: #@ data.values.monitoring.prometheus.alertmanager.slack.apiUrl receivers: - name: gerrit-admin slack_configs: - - channel: #@ data.values.prometheus.alertmanager.slack.channel + - channel: #@ data.values.monitoring.prometheus.alertmanager.slack.channel send_resolved: true title: "{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}" text: "{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}" diff --git a/charts/promtail/VERSION b/charts/promtail/VERSION new file mode 100644 index 0000000..a723ece --- /dev/null +++ b/charts/promtail/VERSION @@ -0,0 +1 @@ +0.22.1 diff --git a/charts/promtail/promtail.yaml b/charts/promtail/promtail.yaml new file mode 100644 index 0000000..8021291 --- /dev/null +++ b/charts/promtail/promtail.yaml @@ -0,0 +1,300 @@ +#@ load("@ytt:data", "data") + +## Affinity for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +affinity: {} + +annotations: {} + +# The update strategy to apply to the DaemonSet +## +deploymentStrategy: {} +# rollingUpdate: +# maxUnavailable: 1 +# type: RollingUpdate + +initContainer: + enabled: false + fsInotifyMaxUserInstances: 128 + +image: + repository: grafana/promtail + tag: v1.3.0 + pullPolicy: IfNotPresent + +livenessProbe: {} + +loki: + serviceName: #@ "loki-{}".format(data.values.namespace) + servicePort: 3100 + serviceScheme: http + user: #@ data.values.logging.loki.username + password: #@ data.values.logging.loki.password + +nameOverride: #@ "promtail-{}".format(data.values.namespace, data.values.namespace) + +## Node labels for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +nodeSelector: {} + +pipelineStages: +- docker: {} + +## Pod Labels +podLabels: {} + +podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "http-metrics" + +## Assign a PriorityClassName to pods if set +# priorityClassName: + +rbac: + create: true + pspEnabled: true + +readinessProbe: + failureThreshold: 5 + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +resources: + limits: + cpu: 200m + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + +# Custom scrape_configs to override the default ones in the configmap +scrapeConfigs: + - job_name: kubernetes-pods-monitoring + pipeline_stages: + - docker: {} + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: labeldrop + regex: '__meta_kubernetes_pod_label_app_kubernetes_io_.*' + - action: labeldrop + regex: '__meta_kubernetes_pod_label_statefulset_kubernetes_io_.*' + - action: labeldrop + regex: '__meta_kubernetes_pod_label_controller_revision_hash' + - action: labeldrop + regex: '__meta_kubernetes_pod_label_pod_template_.*' + - source_labels: + - __meta_kubernetes_pod_label_name + target_label: __service__ + - source_labels: + - __meta_kubernetes_pod_node_name + target_label: __host__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + replacement: $1 + separator: / + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_pod_label_app + - __service__ + target_label: job + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: keep + regex: #@ data.values.namespace + source_labels: + - namespace + - action: replace + source_labels: + - release + target_label: chart_release + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: instance + - action: replace + source_labels: + - __meta_kubernetes_pod_container_name + target_label: container_name + - replacement: /var/log/pods/*$1/*.log + separator: / + source_labels: + - __meta_kubernetes_pod_uid + - __meta_kubernetes_pod_container_name + target_label: __path__ + #@ for gerrit in data.values.gerritServers.kubernetes: + - job_name: #@ "kubernetes-pods-gerrit-{}".format(gerrit.namespace) + pipeline_stages: + - docker: {} + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: labeldrop + regex: '__meta_kubernetes_pod_label_pod_template_.*' + - source_labels: + - __meta_kubernetes_pod_label_name + target_label: __service__ + - source_labels: + - __meta_kubernetes_pod_node_name + target_label: __host__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + replacement: $1 + separator: / + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_pod_label_app + - __service__ + target_label: job + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: keep + regex: #@ gerrit.namespace + source_labels: + - namespace + - source_labels: + - #@ "__meta_kubernetes_pod_label_{}".format(gerrit.label.name) + regex: #@ gerrit.label.value + action: keep + - action: replace + source_labels: + - release + target_label: chart_release + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: instance + - action: replace + source_labels: + - __meta_kubernetes_pod_container_name + target_label: container_name + - replacement: /var/log/pods/*$1/*.log + separator: / + source_labels: + - __meta_kubernetes_pod_uid + - __meta_kubernetes_pod_container_name + target_label: __path__ + #@ end + +# Custom scrape_configs together with the default ones in the configmap +extraScrapeConfigs: [] + +securityContext: + readOnlyRootFilesystem: true + runAsGroup: 0 + runAsUser: 0 + +serviceAccount: + create: true + name: promtail + +## Tolerations for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: +- key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + +# Extra volumes to scrape logs from +volumes: +- name: docker + hostPath: + path: /var/lib/docker/containers +- name: pods + hostPath: + path: /var/log/pods + +# Custom volumes together with the default ones +extraVolumes: [] + +volumeMounts: +- name: docker + mountPath: /var/lib/docker/containers + readOnly: true +- name: pods + mountPath: /var/log/pods + readOnly: true + +# Custom volumeMounts together with the default ones +extraVolumeMounts: [] + +# Add extra Commandline args while starting up promtail. +# more info : https://github.com/grafana/loki/pull/1530 + +extraCommandlineArgs: [] +# example: +# extraCommandlineArgs: +# - -client.external-labels=hostname=$(HOSTNAME) + +config: + client: + # Maximum wait period before sending batch + batchwait: 1s + # Maximum batch size to accrue before sending, unit is byte + batchsize: 102400 + + # Maximum time to wait for server to respond to a request + timeout: 10s + + backoff_config: + # Initial backoff time between retries + minbackoff: 100ms + # Maximum backoff time between retries + maxbackoff: 5s + # Maximum number of retries when sending batches, 0 means infinite retries + maxretries: 20 + + # The labels to add to any time series or alerts when communicating with loki + external_labels: {} + + server: + http_listen_port: 3101 + + positions: + filename: /run/promtail/positions.yaml + target_config: + # Period to resync directories being watched and files being tailed + sync_period: 10s + +serviceMonitor: + enabled: false + interval: "" + additionalLabels: {} + # scrapeTimeout: 10s + +# Extra env variables to pass to the promtail container +env: [] + +# enable and configure if using the syslog scrape config +syslogService: + enabled: false + type: ClusterIP + port: 1514 + ## Specify the nodePort value for the LoadBalancer and NodePort service types. + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + ## + # nodePort: + ## Provide any additional annotations which may be required. This can be used to + ## set the LoadBalancer service type to internal only. + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer + ## + annotations: {} + labels: {} + ## Use loadBalancerIP to request a specific static IP, + ## otherwise leave blank + ## + loadBalancerIP: + # loadBalancerSourceRanges: [] + ## Set the externalTrafficPolicy in the Service to either Cluster or Local + # externalTrafficPolicy: Cluster diff --git a/config.yaml b/config.yaml index 52e98bd..41f2057 100644 --- a/config.yaml +++ b/config.yaml @@ -19,47 +19,49 @@ namespace: namespace tls: skipVerify: true caCert: -prometheus: - server: - host: prometheus.example.com - username: - password: +monitoring: + prometheus: + server: + host: prometheus.example.com + username: + password: + tls: + cert: + key: + alertmanager: + slack: + apiUrl: https://hooks.slack.com/services/xxx/xxx + channel: '#alerts' + grafana: + host: grafana.example.com tls: cert: key: - alertmanager: - slack: - apiUrl: https://hooks.slack.com/services/xxx/xxx - channel: '#alerts' -loki: - host: loki.example.com - username: - password: - s3: - protocol: https - host: s3.eu-de-1.example.com - accessToken: abcd - secret: "1234" - bucket: bucket - region: eu-de-1 - tls: - cert: - key: -grafana: - host: grafana.example.com - tls: - cert: - key: - admin: - username: admin - password: secret - ldap: - enabled: false - host: - port: "" + admin: + username: admin + password: secret + ldap: + enabled: false + host: + port: "" + password: + bind_dn: + accountBases: "[]" + groupBases: "[]" + dashboards: + editable: false +logging: + loki: + host: loki.example.com + username: password: - bind_dn: - accountBases: "[]" - groupBases: "[]" - dashboards: - editable: false + s3: + protocol: https + host: s3.eu-de-1.example.com + accessToken: abcd + secret: "1234" + bucket: bucket + region: eu-de-1 + tls: + cert: + key: diff --git a/promtail/promtail.yaml b/promtail/promtailLocalConfig.yaml similarity index 92% rename from promtail/promtail.yaml rename to promtail/promtailLocalConfig.yaml index a56e625..d17d038 100644 --- a/promtail/promtail.yaml +++ b/promtail/promtailLocalConfig.yaml @@ -10,15 +10,15 @@ positions: filename: #@ "{}/positions.yaml".format(data.values.gerritServers.other[i].promtail.storagePath) clients: - - url: #@ "https://{}/loki/api/v1/push".format(data.values.loki.host) + - url: #@ "https://{}/loki/api/v1/push".format(data.values.logging.loki.host) tls_config: insecure_skip_verify: #@ data.values.tls.skipVerify #@ if not data.values.tls.skipVerify: ca_file: #@ "{}/promtail.ca.crt".format(data.values.gerritServers.other[i].promtail.storagePath) #@ end basic_auth: - username: #@ data.values.loki.username - password: #@ data.values.loki.password + username: #@ data.values.logging.loki.username + password: #@ data.values.logging.loki.password scrape_configs: - job_name: gerrit_error static_configs: diff --git a/subcommands/_globals.py b/subcommands/_globals.py index 43f5ffe..987b5bd 100644 --- a/subcommands/_globals.py +++ b/subcommands/_globals.py @@ -16,4 +16,5 @@ HELM_CHARTS = { "grafana": "stable/grafana", "loki": "loki/loki", "prometheus": "stable/prometheus", + "promtail": "loki/promtail", } diff --git a/subcommands/install.py b/subcommands/install.py index cc50b95..e5fa1fa 100644 --- a/subcommands/install.py +++ b/subcommands/install.py @@ -14,7 +14,9 @@ import os.path import stat +import shutil import subprocess +import sys import zipfile import requests @@ -26,6 +28,7 @@ from ._globals import HELM_CHARTS TEMPLATES = [ "charts/namespace.yaml", "charts/prometheus", + "charts/promtail", "charts/loki", "charts/grafana", "promtail", @@ -79,7 +82,7 @@ def _create_promtail_configs(config, output_dir): if not os.path.exists(os.path.join(output_dir, "promtail")): os.mkdir(os.path.join(output_dir, "promtail")) - with open(os.path.join(output_dir, "promtail.yaml")) as f: + with open(os.path.join(output_dir, "promtailLocalConfig.yaml")) as f: for promtail_config in yaml.load_all(f, Loader=yaml.SafeLoader): with open( os.path.join( @@ -94,7 +97,7 @@ def _create_promtail_configs(config, output_dir): ) as f: yaml.dump(promtail_config, f) - os.remove(os.path.join(output_dir, "promtail.yaml")) + os.remove(os.path.join(output_dir, "promtailLocalConfig.yaml")) if not config["tls"]["skipVerify"]: try: @@ -145,7 +148,7 @@ def _run_ytt(config, output_dir): command += ["-f", template] command += [ - "--output-directory", + "--output-files", output_dir, "--ignore-unknown-comments", "-f", @@ -229,13 +232,30 @@ def install(config_manager, output_dir, dryrun, update_repo): if not os.path.exists(output_dir): os.mkdir(output_dir) + elif os.listdir(output_dir): + while True: + response = input( + ( + "Output directory already exists. This may lead to file conflicts " + "and unwanted configuration applied to the cluster. Do you want " + "to empty the directory? [y/n] " + ) + ) + if response == "y": + shutil.rmtree(output_dir) + os.mkdir(output_dir) + break + if response == "n": + print("Aborting installation. Please provide empty directory.") + sys.exit(1) + print("Unknown input.") _run_ytt(config, output_dir) namespace = config_manager.get_config()["namespace"] _create_dashboard_configmaps(output_dir, namespace) - if os.path.exists(os.path.join(output_dir, "promtail.yaml")): + if os.path.exists(os.path.join(output_dir, "promtailLocalConfig.yaml")): _create_promtail_configs(config, output_dir) if not dryrun: _download_promtail(output_dir)