fad4eba966
Gerrit instances that are loadbalanced cannot easily e scraped by
an external Prometheus, since the request won't end up at a specified
Gerrit instance. A typical setup to solve this issue, is to install a
local Prometheus and scrape the local Prometheus from the central
Prometheus. This is a so called federated setup.
Now such a setup is supported and can be configured.
Change-Id: I0119d3c1d846cd8e975e5732f4d59cf863c6d2b8
1701 lines
55 KiB
YAML
1701 lines
55 KiB
YAML
#@ load("@ytt:data", "data")
|
|
|
|
rbac:
|
|
create: true
|
|
|
|
podSecurityPolicy:
|
|
enabled: true
|
|
|
|
imagePullSecrets:
|
|
# - name: "image-pull-secret"
|
|
|
|
## Define serviceAccount names for components. Defaults to component's fully qualified name.
|
|
##
|
|
serviceAccounts:
|
|
alertmanager:
|
|
create: true
|
|
name:
|
|
annotations: {}
|
|
nodeExporter:
|
|
create: false
|
|
name:
|
|
annotations: {}
|
|
pushgateway:
|
|
create: false
|
|
name:
|
|
annotations: {}
|
|
server:
|
|
create: true
|
|
name:
|
|
annotations: {}
|
|
|
|
alertmanager:
|
|
## If false, alertmanager will not be installed
|
|
##
|
|
enabled: true
|
|
|
|
## Use a ClusterRole (and ClusterRoleBinding)
|
|
## - If set to false - we define a Role and RoleBinding in the defined namespaces ONLY
|
|
## This makes alertmanager work - for users who do not have ClusterAdmin privs, but want alertmanager to operate on their own namespaces, instead of clusterwide.
|
|
useClusterRole: true
|
|
|
|
## Set to a rolename to use existing role - skipping role creation - but still doing serviceaccount and rolebinding to the rolename set here.
|
|
useExistingRole: false
|
|
|
|
## alertmanager container name
|
|
##
|
|
name: alertmanager
|
|
|
|
## alertmanager container image
|
|
##
|
|
image:
|
|
repository: quay.io/prometheus/alertmanager
|
|
tag: v0.21.0
|
|
pullPolicy: IfNotPresent
|
|
|
|
## alertmanager priorityClassName
|
|
##
|
|
priorityClassName: ""
|
|
|
|
## Additional alertmanager container arguments
|
|
##
|
|
extraArgs: {}
|
|
|
|
## Additional InitContainers to initialize the pod
|
|
##
|
|
extraInitContainers: []
|
|
|
|
## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
|
|
## so that the various internal URLs are still able to access as they are in the default case.
|
|
## (Optional)
|
|
prefixURL: ""
|
|
|
|
## External URL which can access alertmanager
|
|
baseURL: "http://localhost:9093"
|
|
|
|
## Additional alertmanager container environment variable
|
|
## For instance to add a http_proxy
|
|
##
|
|
extraEnv: {}
|
|
|
|
## Additional alertmanager Secret mounts
|
|
# Defines additional mounts with secrets. Secrets must be manually created in the namespace.
|
|
extraSecretMounts: []
|
|
# - name: secret-files
|
|
# mountPath: /etc/secrets
|
|
# subPath: ""
|
|
# secretName: alertmanager-secret-files
|
|
# readOnly: true
|
|
|
|
## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.alertmanager.configMapOverrideName}}
|
|
## Defining configMapOverrideName will cause templates/alertmanager-configmap.yaml
|
|
## to NOT generate a ConfigMap resource
|
|
##
|
|
configMapOverrideName: ""
|
|
|
|
## The name of a secret in the same kubernetes namespace which contains the Alertmanager config
|
|
## Defining configFromSecret will cause templates/alertmanager-configmap.yaml
|
|
## to NOT generate a ConfigMap resource
|
|
##
|
|
configFromSecret: ""
|
|
|
|
## The configuration file name to be loaded to alertmanager
|
|
## Must match the key within configuration loaded from ConfigMap/Secret
|
|
##
|
|
configFileName: alertmanager.yml
|
|
|
|
ingress:
|
|
## If true, alertmanager Ingress will be created
|
|
##
|
|
enabled: false
|
|
|
|
## alertmanager Ingress annotations
|
|
##
|
|
annotations: {}
|
|
# kubernetes.io/ingress.class: nginx
|
|
# kubernetes.io/tls-acme: 'true'
|
|
|
|
## alertmanager Ingress additional labels
|
|
##
|
|
extraLabels: {}
|
|
|
|
## alertmanager Ingress hostnames with optional path
|
|
## Must be provided if Ingress is enabled
|
|
##
|
|
hosts: []
|
|
# - alertmanager.domain.com
|
|
# - domain.com/alertmanager
|
|
|
|
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
|
|
extraPaths: []
|
|
# - path: /*
|
|
# backend:
|
|
# serviceName: ssl-redirect
|
|
# servicePort: use-annotation
|
|
|
|
## alertmanager Ingress TLS configuration
|
|
## Secrets must be manually created in the namespace
|
|
##
|
|
tls: []
|
|
# - secretName: prometheus-alerts-tls
|
|
# hosts:
|
|
# - alertmanager.domain.com
|
|
|
|
## Alertmanager Deployment Strategy type
|
|
# strategy:
|
|
# type: Recreate
|
|
|
|
## Node tolerations for alertmanager scheduling to nodes with taints
|
|
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
|
|
##
|
|
tolerations: []
|
|
# - key: "key"
|
|
# operator: "Equal|Exists"
|
|
# value: "value"
|
|
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
|
|
|
|
## Node labels for alertmanager pod assignment
|
|
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
|
|
##
|
|
nodeSelector: {}
|
|
|
|
## Pod affinity
|
|
##
|
|
affinity: {}
|
|
|
|
## PodDisruptionBudget settings
|
|
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
|
|
##
|
|
podDisruptionBudget:
|
|
enabled: false
|
|
maxUnavailable: 1
|
|
|
|
## Use an alternate scheduler, e.g. "stork".
|
|
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
|
|
##
|
|
# schedulerName:
|
|
|
|
persistentVolume:
|
|
## If true, alertmanager will create/use a Persistent Volume Claim
|
|
## If false, use emptyDir
|
|
##
|
|
enabled: true
|
|
|
|
## alertmanager data Persistent Volume access modes
|
|
## Must match those of existing PV or dynamic provisioner
|
|
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
|
|
##
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
|
|
## alertmanager data Persistent Volume Claim annotations
|
|
##
|
|
annotations: {}
|
|
|
|
## alertmanager data Persistent Volume existing claim name
|
|
## Requires alertmanager.persistentVolume.enabled: true
|
|
## If defined, PVC must be created manually before volume will be bound
|
|
existingClaim: ""
|
|
|
|
## alertmanager data Persistent Volume mount root path
|
|
##
|
|
mountPath: /data
|
|
|
|
## alertmanager data Persistent Volume size
|
|
##
|
|
size: 2Gi
|
|
|
|
## alertmanager data Persistent Volume Storage Class
|
|
## If defined, storageClassName: <storageClass>
|
|
## If set to "-", storageClassName: "", which disables dynamic provisioning
|
|
## If undefined (the default) or set to null, no storageClassName spec is
|
|
## set, choosing the default provisioner. (gp2 on AWS, standard on
|
|
## GKE, AWS & OpenStack)
|
|
##
|
|
# storageClass: "-"
|
|
|
|
## alertmanager data Persistent Volume Binding Mode
|
|
## If defined, volumeBindingMode: <volumeBindingMode>
|
|
## If undefined (the default) or set to null, no volumeBindingMode spec is
|
|
## set, choosing the default mode.
|
|
##
|
|
# volumeBindingMode: ""
|
|
|
|
## Subdirectory of alertmanager data Persistent Volume to mount
|
|
## Useful if the volume's root directory is not empty
|
|
##
|
|
subPath: ""
|
|
|
|
emptyDir:
|
|
## alertmanager emptyDir volume size limit
|
|
##
|
|
sizeLimit: ""
|
|
|
|
## Annotations to be added to alertmanager pods
|
|
##
|
|
podAnnotations: {}
|
|
## Tell prometheus to use a specific set of alertmanager pods
|
|
## instead of all alertmanager pods found in the same namespace
|
|
## Useful if you deploy multiple releases within the same namespace
|
|
##
|
|
## prometheus.io/probe: alertmanager-teamA
|
|
|
|
## Labels to be added to Prometheus AlertManager pods
|
|
##
|
|
podLabels: {}
|
|
|
|
## Specify if a Pod Security Policy for node-exporter must be created
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
|
|
##
|
|
podSecurityPolicy:
|
|
annotations: {}
|
|
## Specify pod annotations
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
|
|
##
|
|
# seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
|
|
# seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default'
|
|
# apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
|
|
|
|
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
|
|
##
|
|
replicaCount: 1
|
|
|
|
## Annotations to be added to deployment
|
|
##
|
|
deploymentAnnotations: {}
|
|
|
|
statefulSet:
|
|
## If true, use a statefulset instead of a deployment for pod management.
|
|
## This allows to scale replicas to more than 1 pod
|
|
##
|
|
enabled: false
|
|
|
|
annotations: {}
|
|
labels: {}
|
|
podManagementPolicy: OrderedReady
|
|
|
|
## Alertmanager headless service to use for the statefulset
|
|
##
|
|
headless:
|
|
annotations: {}
|
|
labels: {}
|
|
|
|
## Enabling peer mesh service end points for enabling the HA alert manager
|
|
## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
|
|
enableMeshPeer: false
|
|
|
|
servicePort: 80
|
|
|
|
## alertmanager resource requests and limits
|
|
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
|
##
|
|
resources:
|
|
limits:
|
|
cpu: 10m
|
|
memory: 32Mi
|
|
requests:
|
|
cpu: 10m
|
|
memory: 32Mi
|
|
|
|
## Security context to be added to alertmanager pods
|
|
##
|
|
securityContext:
|
|
runAsUser: 65534
|
|
runAsNonRoot: true
|
|
runAsGroup: 65534
|
|
fsGroup: 65534
|
|
|
|
service:
|
|
annotations: {}
|
|
labels: {}
|
|
clusterIP: ""
|
|
|
|
## Enabling peer mesh service end points for enabling the HA alert manager
|
|
## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
|
|
# enableMeshPeer : true
|
|
|
|
## List of IP addresses at which the alertmanager service is available
|
|
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
|
|
##
|
|
externalIPs: []
|
|
|
|
loadBalancerIP: ""
|
|
loadBalancerSourceRanges: []
|
|
servicePort: 80
|
|
# nodePort: 30000
|
|
sessionAffinity: None
|
|
type: ClusterIP
|
|
|
|
## Monitors ConfigMap changes and POSTs to a URL
|
|
## Ref: https://github.com/jimmidyson/configmap-reload
|
|
##
|
|
configmapReload:
|
|
prometheus:
|
|
## If false, the configmap-reload container will not be deployed
|
|
##
|
|
enabled: true
|
|
## configmap-reload container name
|
|
##
|
|
name: configmap-reload
|
|
## configmap-reload container image
|
|
##
|
|
image:
|
|
repository: jimmidyson/configmap-reload
|
|
tag: v0.4.0
|
|
pullPolicy: IfNotPresent
|
|
## Additional configmap-reload container arguments
|
|
##
|
|
extraArgs: {}
|
|
## Additional configmap-reload volume directories
|
|
##
|
|
extraVolumeDirs: []
|
|
## Additional configmap-reload mounts
|
|
##
|
|
extraConfigmapMounts: []
|
|
# - name: prometheus-alerts
|
|
# mountPath: /etc/alerts.d
|
|
# subPath: ""
|
|
# configMap: prometheus-alerts
|
|
# readOnly: true
|
|
## configmap-reload resource requests and limits
|
|
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
|
##
|
|
resources: {}
|
|
alertmanager:
|
|
## If false, the configmap-reload container will not be deployed
|
|
##
|
|
enabled: true
|
|
## configmap-reload container name
|
|
##
|
|
name: configmap-reload
|
|
## configmap-reload container image
|
|
##
|
|
image:
|
|
repository: jimmidyson/configmap-reload
|
|
tag: v0.4.0
|
|
pullPolicy: IfNotPresent
|
|
## Additional configmap-reload container arguments
|
|
##
|
|
extraArgs: {}
|
|
## Additional configmap-reload volume directories
|
|
##
|
|
extraVolumeDirs: []
|
|
## Additional configmap-reload mounts
|
|
##
|
|
extraConfigmapMounts: []
|
|
# - name: prometheus-alerts
|
|
# mountPath: /etc/alerts.d
|
|
# subPath: ""
|
|
# configMap: prometheus-alerts
|
|
# readOnly: true
|
|
## configmap-reload resource requests and limits
|
|
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
|
##
|
|
resources: {}
|
|
|
|
kubeStateMetrics:
|
|
## If false, kube-state-metrics sub-chart will not be installed
|
|
##
|
|
enabled: false
|
|
|
|
## kube-state-metrics sub-chart configurable values
|
|
## Please see https://github.com/helm/charts/tree/master/stable/kube-state-metrics
|
|
##
|
|
# kube-state-metrics:
|
|
|
|
nodeExporter:
|
|
## If false, node-exporter will not be installed
|
|
##
|
|
enabled: false
|
|
|
|
## If true, node-exporter pods share the host network namespace
|
|
##
|
|
hostNetwork: true
|
|
|
|
## If true, node-exporter pods share the host PID namespace
|
|
##
|
|
hostPID: true
|
|
|
|
## node-exporter container name
|
|
##
|
|
name: node-exporter
|
|
|
|
## node-exporter container image
|
|
##
|
|
image:
|
|
repository: quay.io/prometheus/node-exporter
|
|
tag: v1.0.1
|
|
pullPolicy: IfNotPresent
|
|
|
|
## Specify if a Pod Security Policy for node-exporter must be created
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
|
|
##
|
|
podSecurityPolicy:
|
|
annotations: {}
|
|
## Specify pod annotations
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
|
|
##
|
|
# seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
|
|
# seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default'
|
|
# apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
|
|
|
|
## node-exporter priorityClassName
|
|
##
|
|
priorityClassName: ""
|
|
|
|
## Custom Update Strategy
|
|
##
|
|
updateStrategy:
|
|
type: RollingUpdate
|
|
|
|
## Additional node-exporter container arguments
|
|
##
|
|
extraArgs: {}
|
|
|
|
## Additional InitContainers to initialize the pod
|
|
##
|
|
extraInitContainers: []
|
|
|
|
## Additional node-exporter hostPath mounts
|
|
##
|
|
extraHostPathMounts: []
|
|
# - name: textfile-dir
|
|
# mountPath: /srv/txt_collector
|
|
# hostPath: /var/lib/node-exporter
|
|
# readOnly: true
|
|
# mountPropagation: HostToContainer
|
|
|
|
extraConfigmapMounts: []
|
|
# - name: certs-configmap
|
|
# mountPath: /prometheus
|
|
# configMap: certs-configmap
|
|
# readOnly: true
|
|
|
|
## Node tolerations for node-exporter scheduling to nodes with taints
|
|
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
|
|
##
|
|
tolerations: []
|
|
# - key: "key"
|
|
# operator: "Equal|Exists"
|
|
# value: "value"
|
|
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
|
|
|
|
## Node labels for node-exporter pod assignment
|
|
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
|
|
##
|
|
nodeSelector: {}
|
|
|
|
## Annotations to be added to node-exporter pods
|
|
##
|
|
podAnnotations: {}
|
|
|
|
## Labels to be added to node-exporter pods
|
|
##
|
|
pod:
|
|
labels: {}
|
|
|
|
## PodDisruptionBudget settings
|
|
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
|
|
##
|
|
podDisruptionBudget:
|
|
enabled: false
|
|
maxUnavailable: 1
|
|
|
|
## node-exporter resource limits & requests
|
|
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
|
|
##
|
|
resources: {}
|
|
# limits:
|
|
# cpu: 200m
|
|
# memory: 50Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 30Mi
|
|
|
|
## Security context to be added to node-exporter pods
|
|
##
|
|
securityContext: {}
|
|
# runAsUser: 0
|
|
|
|
service:
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
labels: {}
|
|
|
|
# Exposed as a headless service:
|
|
# https://kubernetes.io/docs/concepts/services-networking/service/#headless-services
|
|
clusterIP: None
|
|
|
|
## List of IP addresses at which the node-exporter service is available
|
|
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
|
|
##
|
|
externalIPs: []
|
|
|
|
hostPort: 9100
|
|
loadBalancerIP: ""
|
|
loadBalancerSourceRanges: []
|
|
servicePort: 9100
|
|
type: ClusterIP
|
|
|
|
server:
|
|
## Prometheus server container name
|
|
##
|
|
enabled: true
|
|
|
|
## Use a ClusterRole (and ClusterRoleBinding)
|
|
## - If set to false - we define a RoleBinding in the defined namespaces ONLY
|
|
##
|
|
## NB: because we need a Role with nonResourceURL's ("/metrics") - you must get someone with Cluster-admin privileges to define this role for you, before running with this setting enabled.
|
|
## This makes prometheus work - for users who do not have ClusterAdmin privs, but wants prometheus to operate on their own namespaces, instead of clusterwide.
|
|
##
|
|
## You MUST also set namespaces to the ones you have access to and want monitored by Prometheus.
|
|
##
|
|
# useExistingClusterRoleName: nameofclusterrole
|
|
## namespaces to monitor (instead of monitoring all - clusterwide). Needed if you want to run without Cluster-admin privileges.
|
|
# namespaces:
|
|
# - yournamespace
|
|
|
|
name: server
|
|
sidecarContainers:
|
|
#@ for instance in data.values.gerritServers.other:
|
|
#@ if instance.healthcheck:
|
|
- name: #@ "{}-health-ping".format(instance.host).replace('.', '-')
|
|
image: curlimages/curl:7.73.0
|
|
command:
|
|
- "watch"
|
|
- "-n"
|
|
- "30"
|
|
args:
|
|
- #@ "curl -Lk https://{}/config/server/healthcheck~status || echo 'Healthcheck failed'".format(instance.host)
|
|
resources:
|
|
limits:
|
|
cpu: 50m
|
|
memory: 128Mi
|
|
requests:
|
|
cpu: 10m
|
|
memory: 32Mi
|
|
#@ end
|
|
#@ end
|
|
|
|
## Prometheus server container image
|
|
##
|
|
image:
|
|
repository: quay.io/prometheus/prometheus
|
|
tag: v2.22.1
|
|
pullPolicy: IfNotPresent
|
|
|
|
## prometheus server priorityClassName
|
|
##
|
|
priorityClassName: ""
|
|
|
|
## EnableServiceLinks indicates whether information about services should be injected
|
|
## into pod's environment variables, matching the syntax of Docker links.
|
|
## WARNING: the field is unsupported and will be skipped in K8s prior to v1.13.0.
|
|
##
|
|
enableServiceLinks: true
|
|
|
|
## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
|
|
## so that the various internal URLs are still able to access as they are in the default case.
|
|
## (Optional)
|
|
prefixURL: ""
|
|
|
|
## External URL which can access prometheus
|
|
## Maybe same with Ingress host name
|
|
baseURL: ""
|
|
|
|
## Additional server container environment variables
|
|
##
|
|
## You specify this manually like you would a raw deployment manifest.
|
|
## This means you can bind in environment variables from secrets.
|
|
##
|
|
## e.g. static environment variable:
|
|
## - name: DEMO_GREETING
|
|
## value: "Hello from the environment"
|
|
##
|
|
## e.g. secret environment variable:
|
|
## - name: USERNAME
|
|
## valueFrom:
|
|
## secretKeyRef:
|
|
## name: mysecret
|
|
## key: username
|
|
env: []
|
|
|
|
extraFlags:
|
|
- web.enable-lifecycle
|
|
## web.enable-admin-api flag controls access to the administrative HTTP API which includes functionality such as
|
|
## deleting time series. This is disabled by default.
|
|
# - web.enable-admin-api
|
|
##
|
|
## storage.tsdb.no-lockfile flag controls BD locking
|
|
# - storage.tsdb.no-lockfile
|
|
##
|
|
## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL)
|
|
# - storage.tsdb.wal-compression
|
|
|
|
## Path to a configuration file on prometheus server container FS
|
|
configPath: /etc/config/prometheus.yml
|
|
|
|
global:
|
|
## How frequently to scrape targets by default
|
|
##
|
|
scrape_interval: 1m
|
|
## How long until a scrape request times out
|
|
##
|
|
scrape_timeout: 10s
|
|
## How frequently to evaluate rules
|
|
##
|
|
evaluation_interval: 1m
|
|
## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
|
|
##
|
|
remoteWrite: []
|
|
## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read
|
|
##
|
|
remoteRead: []
|
|
|
|
## Additional Prometheus server container arguments
|
|
##
|
|
extraArgs: {}
|
|
|
|
## Additional InitContainers to initialize the pod
|
|
##
|
|
extraInitContainers: []
|
|
|
|
## Additional Prometheus server Volume mounts
|
|
##
|
|
extraVolumeMounts: []
|
|
|
|
## Additional Prometheus server Volumes
|
|
##
|
|
extraVolumes: []
|
|
|
|
## Additional Prometheus server hostPath mounts
|
|
##
|
|
extraHostPathMounts: []
|
|
# - name: certs-dir
|
|
# mountPath: /etc/kubernetes/certs
|
|
# subPath: ""
|
|
# hostPath: /etc/kubernetes/certs
|
|
# readOnly: true
|
|
|
|
extraConfigmapMounts: []
|
|
# - name: certs-configmap
|
|
# mountPath: /prometheus
|
|
# subPath: ""
|
|
# configMap: certs-configmap
|
|
# readOnly: true
|
|
|
|
## Additional Prometheus server Secret mounts
|
|
# Defines additional mounts with secrets. Secrets must be manually created in the namespace.
|
|
extraSecretMounts:
|
|
- name: prometheus-secrets
|
|
mountPath: /etc/secrets
|
|
secretName: prometheus-secrets
|
|
readOnly: true
|
|
|
|
## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}}
|
|
## Defining configMapOverrideName will cause templates/server-configmap.yaml
|
|
## to NOT generate a ConfigMap resource
|
|
##
|
|
configMapOverrideName: ""
|
|
|
|
ingress:
|
|
## If true, Prometheus server Ingress will be created
|
|
##
|
|
enabled: true
|
|
|
|
## Prometheus server Ingress annotations
|
|
##
|
|
annotations:
|
|
kubernetes.io/ingress.class: nginx
|
|
nginx.ingress.kubernetes.io/auth-type: basic
|
|
nginx.ingress.kubernetes.io/auth-secret: prometheus-basic-auth
|
|
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
|
|
# kubernetes.io/tls-acme: 'true'
|
|
|
|
## Prometheus server Ingress additional labels
|
|
##
|
|
extraLabels: {}
|
|
|
|
## Prometheus server Ingress hostnames with optional path
|
|
## Must be provided if Ingress is enabled
|
|
##
|
|
hosts:
|
|
- #@ data.values.monitoring.prometheus.server.host
|
|
# - prometheus.domain.com
|
|
# - domain.com/prometheus
|
|
|
|
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
|
|
extraPaths: []
|
|
# - path: /*
|
|
# backend:
|
|
# serviceName: ssl-redirect
|
|
# servicePort: use-annotation
|
|
|
|
## Prometheus server Ingress TLS configuration
|
|
## Secrets must be manually created in the namespace
|
|
##
|
|
tls:
|
|
- secretName: prometheus-server-tls
|
|
hosts:
|
|
- #@ data.values.monitoring.prometheus.server.host
|
|
|
|
## Server Deployment Strategy type
|
|
# strategy:
|
|
# type: Recreate
|
|
|
|
## hostAliases allows adding entries to /etc/hosts inside the containers
|
|
hostAliases: []
|
|
# - ip: "127.0.0.1"
|
|
# hostnames:
|
|
# - "example.com"
|
|
|
|
## Node tolerations for server scheduling to nodes with taints
|
|
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
|
|
##
|
|
tolerations: []
|
|
# - key: "key"
|
|
# operator: "Equal|Exists"
|
|
# value: "value"
|
|
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
|
|
|
|
## Node labels for Prometheus server pod assignment
|
|
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
|
|
##
|
|
nodeSelector: {}
|
|
|
|
## Pod affinity
|
|
##
|
|
affinity: {}
|
|
|
|
## PodDisruptionBudget settings
|
|
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
|
|
##
|
|
podDisruptionBudget:
|
|
enabled: false
|
|
maxUnavailable: 1
|
|
|
|
## Use an alternate scheduler, e.g. "stork".
|
|
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
|
|
##
|
|
# schedulerName:
|
|
|
|
persistentVolume:
|
|
## If true, Prometheus server will create/use a Persistent Volume Claim
|
|
## If false, use emptyDir
|
|
##
|
|
enabled: true
|
|
|
|
## Prometheus server data Persistent Volume access modes
|
|
## Must match those of existing PV or dynamic provisioner
|
|
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
|
|
##
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
|
|
## Prometheus server data Persistent Volume annotations
|
|
##
|
|
annotations: {}
|
|
|
|
## Prometheus server data Persistent Volume existing claim name
|
|
## Requires server.persistentVolume.enabled: true
|
|
## If defined, PVC must be created manually before volume will be bound
|
|
existingClaim: "prometheus-server-pvc"
|
|
|
|
## Prometheus server data Persistent Volume mount root path
|
|
##
|
|
mountPath: /data
|
|
|
|
## Prometheus server data Persistent Volume size
|
|
##
|
|
size: 8Gi
|
|
|
|
## Prometheus server data Persistent Volume Storage Class
|
|
## If defined, storageClassName: <storageClass>
|
|
## If set to "-", storageClassName: "", which disables dynamic provisioning
|
|
## If undefined (the default) or set to null, no storageClassName spec is
|
|
## set, choosing the default provisioner. (gp2 on AWS, standard on
|
|
## GKE, AWS & OpenStack)
|
|
##
|
|
# storageClass: "-"
|
|
|
|
## Prometheus server data Persistent Volume Binding Mode
|
|
## If defined, volumeBindingMode: <volumeBindingMode>
|
|
## If undefined (the default) or set to null, no volumeBindingMode spec is
|
|
## set, choosing the default mode.
|
|
##
|
|
# volumeBindingMode: ""
|
|
|
|
## Subdirectory of Prometheus server data Persistent Volume to mount
|
|
## Useful if the volume's root directory is not empty
|
|
##
|
|
subPath: ""
|
|
|
|
emptyDir:
|
|
## Prometheus server emptyDir volume size limit
|
|
##
|
|
sizeLimit: ""
|
|
|
|
## Annotations to be added to Prometheus server pods
|
|
##
|
|
podAnnotations: {}
|
|
# iam.amazonaws.com/role: prometheus
|
|
|
|
## Labels to be added to Prometheus server pods
|
|
##
|
|
podLabels: {}
|
|
|
|
## Prometheus AlertManager configuration
|
|
##
|
|
alertmanagers: []
|
|
|
|
## Specify if a Pod Security Policy for node-exporter must be created
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
|
|
##
|
|
podSecurityPolicy:
|
|
annotations: {}
|
|
## Specify pod annotations
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
|
|
##
|
|
# seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
|
|
# seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default'
|
|
# apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
|
|
|
|
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
|
|
##
|
|
replicaCount: 1
|
|
|
|
## Annotations to be added to deployment
|
|
##
|
|
deploymentAnnotations: {}
|
|
|
|
statefulSet:
|
|
## If true, use a statefulset instead of a deployment for pod management.
|
|
## This allows to scale replicas to more than 1 pod
|
|
##
|
|
enabled: false
|
|
|
|
annotations: {}
|
|
labels: {}
|
|
podManagementPolicy: OrderedReady
|
|
|
|
## Alertmanager headless service to use for the statefulset
|
|
##
|
|
headless:
|
|
annotations: {}
|
|
labels: {}
|
|
servicePort: 80
|
|
## Enable gRPC port on service to allow auto discovery with thanos-querier
|
|
gRPC:
|
|
enabled: false
|
|
servicePort: 10901
|
|
# nodePort: 10901
|
|
|
|
## Prometheus server readiness and liveness probe initial delay and timeout
|
|
## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
|
##
|
|
readinessProbeInitialDelay: 30
|
|
readinessProbePeriodSeconds: 5
|
|
readinessProbeTimeout: 30
|
|
readinessProbeFailureThreshold: 3
|
|
readinessProbeSuccessThreshold: 1
|
|
livenessProbeInitialDelay: 30
|
|
livenessProbePeriodSeconds: 15
|
|
livenessProbeTimeout: 30
|
|
livenessProbeFailureThreshold: 3
|
|
livenessProbeSuccessThreshold: 1
|
|
|
|
## Prometheus server resource requests and limits
|
|
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
|
##
|
|
resources:
|
|
limits:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
requests:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
|
|
## Vertical Pod Autoscaler config
|
|
## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
|
|
verticalAutoscaler:
|
|
## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs)
|
|
enabled: false
|
|
# updateMode: "Auto"
|
|
# containerPolicies:
|
|
# - containerName: 'prometheus-server'
|
|
|
|
## Security context to be added to server pods
|
|
##
|
|
securityContext:
|
|
runAsUser: 65534
|
|
runAsNonRoot: true
|
|
runAsGroup: 65534
|
|
fsGroup: 65534
|
|
|
|
service:
|
|
annotations: {}
|
|
labels: {}
|
|
clusterIP: ""
|
|
|
|
## List of IP addresses at which the Prometheus server service is available
|
|
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
|
|
##
|
|
externalIPs: []
|
|
|
|
loadBalancerIP: ""
|
|
loadBalancerSourceRanges: []
|
|
servicePort: 80
|
|
sessionAffinity: None
|
|
type: ClusterIP
|
|
|
|
## Enable gRPC port on service to allow auto discovery with thanos-querier
|
|
gRPC:
|
|
enabled: false
|
|
servicePort: 10901
|
|
# nodePort: 10901
|
|
## If using a statefulSet (statefulSet.enabled=true), configure the
|
|
## service to connect to a specific replica to have a consistent view
|
|
## of the data.
|
|
statefulsetReplica:
|
|
enabled: false
|
|
replica: 0
|
|
|
|
## Prometheus server pod termination grace period
|
|
##
|
|
terminationGracePeriodSeconds: 300
|
|
|
|
## Prometheus data retention period (default if not specified is 15 days)
|
|
##
|
|
retention: "15d"
|
|
|
|
pushgateway:
|
|
## If false, pushgateway will not be installed
|
|
##
|
|
enabled: false
|
|
|
|
## Use an alternate scheduler, e.g. "stork".
|
|
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
|
|
##
|
|
# schedulerName:
|
|
|
|
## pushgateway container name
|
|
##
|
|
name: pushgateway
|
|
|
|
## pushgateway container image
|
|
##
|
|
image:
|
|
repository: prom/pushgateway
|
|
tag: v1.3.0
|
|
pullPolicy: IfNotPresent
|
|
|
|
## pushgateway priorityClassName
|
|
##
|
|
priorityClassName: ""
|
|
|
|
## Additional pushgateway container arguments
|
|
##
|
|
## for example: persistence.file: /data/pushgateway.data
|
|
extraArgs: {}
|
|
|
|
## Additional InitContainers to initialize the pod
|
|
##
|
|
extraInitContainers: []
|
|
|
|
ingress:
|
|
## If true, pushgateway Ingress will be created
|
|
##
|
|
enabled: false
|
|
|
|
## pushgateway Ingress annotations
|
|
##
|
|
annotations: {}
|
|
# kubernetes.io/ingress.class: nginx
|
|
# kubernetes.io/tls-acme: 'true'
|
|
|
|
## pushgateway Ingress hostnames with optional path
|
|
## Must be provided if Ingress is enabled
|
|
##
|
|
hosts: []
|
|
# - pushgateway.domain.com
|
|
# - domain.com/pushgateway
|
|
|
|
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
|
|
extraPaths: []
|
|
# - path: /*
|
|
# backend:
|
|
# serviceName: ssl-redirect
|
|
# servicePort: use-annotation
|
|
|
|
## pushgateway Ingress TLS configuration
|
|
## Secrets must be manually created in the namespace
|
|
##
|
|
tls: []
|
|
# - secretName: prometheus-alerts-tls
|
|
# hosts:
|
|
# - pushgateway.domain.com
|
|
|
|
## Node tolerations for pushgateway scheduling to nodes with taints
|
|
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
|
|
##
|
|
tolerations: []
|
|
# - key: "key"
|
|
# operator: "Equal|Exists"
|
|
# value: "value"
|
|
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
|
|
|
|
## Node labels for pushgateway pod assignment
|
|
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
|
|
##
|
|
nodeSelector: {}
|
|
|
|
## Annotations to be added to pushgateway pods
|
|
##
|
|
podAnnotations: {}
|
|
|
|
## Labels to be added to pushgateway pods
|
|
##
|
|
podLabels: {}
|
|
|
|
## Specify if a Pod Security Policy for node-exporter must be created
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
|
|
##
|
|
podSecurityPolicy:
|
|
annotations: {}
|
|
## Specify pod annotations
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
|
|
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
|
|
##
|
|
# seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
|
|
# seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default'
|
|
# apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
|
|
|
|
replicaCount: 1
|
|
|
|
## Annotations to be added to deployment
|
|
##
|
|
deploymentAnnotations: {}
|
|
## PodDisruptionBudget settings
|
|
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
|
|
##
|
|
podDisruptionBudget:
|
|
enabled: false
|
|
maxUnavailable: 1
|
|
|
|
## pushgateway resource requests and limits
|
|
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
|
##
|
|
resources: {}
|
|
# limits:
|
|
# cpu: 10m
|
|
# memory: 32Mi
|
|
# requests:
|
|
# cpu: 10m
|
|
# memory: 32Mi
|
|
|
|
## Security context to be added to push-gateway pods
|
|
##
|
|
securityContext:
|
|
runAsUser: 65534
|
|
runAsNonRoot: true
|
|
|
|
service:
|
|
annotations:
|
|
prometheus.io/probe: pushgateway
|
|
labels: {}
|
|
clusterIP: ""
|
|
|
|
## List of IP addresses at which the pushgateway service is available
|
|
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
|
|
##
|
|
externalIPs: []
|
|
|
|
loadBalancerIP: ""
|
|
loadBalancerSourceRanges: []
|
|
servicePort: 9091
|
|
type: ClusterIP
|
|
|
|
## pushgateway Deployment Strategy type
|
|
# strategy:
|
|
# type: Recreate
|
|
|
|
persistentVolume:
|
|
## If true, pushgateway will create/use a Persistent Volume Claim
|
|
##
|
|
enabled: false
|
|
|
|
## pushgateway data Persistent Volume access modes
|
|
## Must match those of existing PV or dynamic provisioner
|
|
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
|
|
##
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
|
|
## pushgateway data Persistent Volume Claim annotations
|
|
##
|
|
annotations: {}
|
|
|
|
## pushgateway data Persistent Volume existing claim name
|
|
## Requires pushgateway.persistentVolume.enabled: true
|
|
## If defined, PVC must be created manually before volume will be bound
|
|
existingClaim: ""
|
|
|
|
## pushgateway data Persistent Volume mount root path
|
|
##
|
|
mountPath: /data
|
|
|
|
## pushgateway data Persistent Volume size
|
|
##
|
|
size: 2Gi
|
|
|
|
## pushgateway data Persistent Volume Storage Class
|
|
## If defined, storageClassName: <storageClass>
|
|
## If set to "-", storageClassName: "", which disables dynamic provisioning
|
|
## If undefined (the default) or set to null, no storageClassName spec is
|
|
## set, choosing the default provisioner. (gp2 on AWS, standard on
|
|
## GKE, AWS & OpenStack)
|
|
##
|
|
# storageClass: "-"
|
|
|
|
## pushgateway data Persistent Volume Binding Mode
|
|
## If defined, volumeBindingMode: <volumeBindingMode>
|
|
## If undefined (the default) or set to null, no volumeBindingMode spec is
|
|
## set, choosing the default mode.
|
|
##
|
|
# volumeBindingMode: ""
|
|
|
|
## Subdirectory of pushgateway data Persistent Volume to mount
|
|
## Useful if the volume's root directory is not empty
|
|
##
|
|
subPath: ""
|
|
|
|
|
|
## alertmanager ConfigMap entries
|
|
##
|
|
alertmanagerFiles:
|
|
alertmanager.yml:
|
|
global:
|
|
slack_api_url: #@ data.values.monitoring.prometheus.alertmanager.slack.apiUrl
|
|
|
|
receivers:
|
|
- name: gerrit-admin
|
|
slack_configs:
|
|
- channel: #@ data.values.monitoring.prometheus.alertmanager.slack.channel
|
|
send_resolved: true
|
|
title: "{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}"
|
|
text: "{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}"
|
|
|
|
route:
|
|
group_wait: 10s
|
|
group_interval: 5m
|
|
receiver: gerrit-admin
|
|
repeat_interval: 3h
|
|
|
|
## Prometheus server ConfigMap entries
|
|
##
|
|
serverFiles:
|
|
|
|
## Alerts configuration
|
|
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
|
|
alerting_rules.yml:
|
|
groups:
|
|
- name: Instances
|
|
rules:
|
|
- alert: InstanceDown
|
|
expr: up == 0
|
|
for: 3m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: '{{ $labels.instance }} has been unresponsive for 3 min.'
|
|
summary: '{{ $labels.instance }} down'
|
|
- alert: ThreadDeadlock
|
|
expr: proc_jvm_thread_num_deadlocked_threads > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: '{{ $labels.instance }} has deadlocked threads.'
|
|
summary: '{{ $labels.instance }} deadlocks'
|
|
- alert: SSHBatchUserScheduledTasks
|
|
expr: queue_ssh_batch_worker_scheduled_tasks > 200
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: >
|
|
More than 200 scheduled tasks for SSH batch workers on
|
|
{{ $labels.instance }} for 5 min.
|
|
summary: '{{ $labels.instance }}: High SSH batch user workload'
|
|
- alert: SSHInteractiveUserScheduledTasks
|
|
expr: queue_ssh_interactive_worker_scheduled_tasks > 200
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: >
|
|
More than 200 scheduled tasks for SSH interactive workers on
|
|
{{ $labels.instance }} for 5 min.
|
|
summary: '{{ $labels.instance }}: High SSH interactive user workload'
|
|
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
|
|
alerts: {}
|
|
|
|
## Records configuration
|
|
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
|
|
recording_rules.yml: {}
|
|
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
|
|
rules: {}
|
|
|
|
prometheus.yml:
|
|
rule_files:
|
|
- /etc/config/recording_rules.yml
|
|
- /etc/config/alerting_rules.yml
|
|
## Below two files are DEPRECATED will be removed from this default values file
|
|
- /etc/config/rules
|
|
- /etc/config/alerts
|
|
|
|
scrape_configs:
|
|
#@ for gerrit in data.values.gerritServers.kubernetes:
|
|
- job_name: #@ "gerrit-{}".format(gerrit.namespace)
|
|
metrics_path: /a/plugins/metrics-reporter-prometheus/metrics
|
|
scheme: http
|
|
basic_auth:
|
|
username: #@ gerrit.username
|
|
password_file: #@ "/etc/secrets/.pwd_{}".format(gerrit.namespace)
|
|
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
namespaces:
|
|
names:
|
|
- #@ gerrit.namespace
|
|
|
|
relabel_configs:
|
|
- source_labels:
|
|
- #@ "__meta_kubernetes_pod_label_{}".format(gerrit.label.name)
|
|
regex: #@ gerrit.label.value
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_pod_container_name]
|
|
regex: #@ gerrit.containerName
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
|
regex: #@ gerrit.port
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
regex: (.*)
|
|
target_label: instance
|
|
replacement: kubernetes_$1
|
|
action: replace
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
regex: (.*)
|
|
target_label: replica
|
|
replacement: $1
|
|
action: replace
|
|
#@ end
|
|
#@ for prometheus in data.values.gerritServers.federatedPrometheus:
|
|
- job_name: #@ "gerrit-{}".format(prometheus.host)
|
|
honor_labels: true
|
|
metrics_path: '/federate'
|
|
params:
|
|
'match[]':
|
|
- #@ '{{instance=~"{}.*"}}'.format(prometheus.host)
|
|
scheme: https
|
|
tls_config:
|
|
insecure_skip_verify: #@ data.values.tls.skipVerify
|
|
#@ if not data.values.tls.skipVerify:
|
|
ca_file: /etc/secrets/server.ca.crt
|
|
cert_file: /etc/secrets/server.crt
|
|
key_file: /etc/secrets/server.key
|
|
#@ end
|
|
static_configs:
|
|
- targets:
|
|
- #@ '{}:{}'.format(prometheus.host, prometheus.port)
|
|
basic_auth:
|
|
username: #@ prometheus.username
|
|
password_file: #@ "/etc/secrets/.pwd_{}".format(prometheus.host)
|
|
#@ end
|
|
#@ for gerrit in data.values.gerritServers.other:
|
|
- job_name: #@ "gerrit-{}".format(gerrit.host)
|
|
metrics_path: /a/plugins/metrics-reporter-prometheus/metrics
|
|
scheme: https
|
|
tls_config:
|
|
insecure_skip_verify: #@ data.values.tls.skipVerify
|
|
#@ if not data.values.tls.skipVerify:
|
|
ca_file: /etc/secrets/server.ca.crt
|
|
cert_file: /etc/secrets/server.crt
|
|
key_file: /etc/secrets/server.key
|
|
#@ end
|
|
static_configs:
|
|
- targets:
|
|
- #@ gerrit.host
|
|
labels:
|
|
replica: main
|
|
basic_auth:
|
|
username: #@ gerrit.username
|
|
password_file: #@ "/etc/secrets/.pwd_{}".format(gerrit.host)
|
|
#@ end
|
|
- job_name: prometheus
|
|
static_configs:
|
|
- targets:
|
|
- localhost:9090
|
|
relabel_configs:
|
|
- source_labels: ["__address__"]
|
|
regex: ".*"
|
|
target_label: instance
|
|
replacement: #@ "prometheus-{}".format(data.values.namespace)
|
|
- job_name: loki
|
|
static_configs:
|
|
- targets:
|
|
- #@ "loki-{}.{}.svc.cluster.local:3100".format(data.values.namespace, data.values.namespace)
|
|
relabel_configs:
|
|
- source_labels: ["__address__"]
|
|
regex: ".*"
|
|
target_label: instance
|
|
replacement: #@ "loki-{}".format(data.values.namespace)
|
|
|
|
# # A scrape configuration for running Prometheus on a Kubernetes cluster.
|
|
# # This uses separate scrape configs for cluster components (i.e. API server, node)
|
|
# # and services to allow each to use different authentication configs.
|
|
# #
|
|
# # Kubernetes labels will be added as Prometheus labels on metrics via the
|
|
# # `labelmap` relabeling action.
|
|
# # Scrape config for API servers.
|
|
# #
|
|
# # Kubernetes exposes API servers as endpoints to the default/kubernetes
|
|
# # service so this uses `endpoints` role and uses relabelling to only keep
|
|
# # the endpoints associated with the default/kubernetes service using the
|
|
# # default named port `https`. This works for single API server deployments as
|
|
# # well as HA API server deployments.
|
|
# - job_name: 'kubernetes-apiservers'
|
|
# kubernetes_sd_configs:
|
|
# - role: endpoints
|
|
# # Default to scraping over https. If required, just disable this or change to
|
|
# # `http`.
|
|
# scheme: https
|
|
# # This TLS & bearer token file config is used to connect to the actual scrape
|
|
# # endpoints for cluster components. This is separate to discovery auth
|
|
# # configuration because discovery & scraping are two separate concerns in
|
|
# # Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# # the cluster. Otherwise, more config options have to be provided within the
|
|
# # <kubernetes_sd_config>.
|
|
# tls_config:
|
|
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
# # If your node certificates are self-signed or use a different CA to the
|
|
# # master CA, then disable certificate verification below. Note that
|
|
# # certificate verification is an integral part of a secure infrastructure
|
|
# # so this should only be disabled in a controlled environment. You can
|
|
# # disable certificate verification by uncommenting the line below.
|
|
# #
|
|
# insecure_skip_verify: true
|
|
# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
# # Keep only the default/kubernetes service endpoints for the https port. This
|
|
# # will add targets for each API server which Kubernetes adds an endpoint to
|
|
# # the default/kubernetes service.
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
# action: keep
|
|
# regex: default;kubernetes;https
|
|
# - job_name: 'kubernetes-nodes'
|
|
# # Default to scraping over https. If required, just disable this or change to
|
|
# # `http`.
|
|
# scheme: https
|
|
# # This TLS & bearer token file config is used to connect to the actual scrape
|
|
# # endpoints for cluster components. This is separate to discovery auth
|
|
# # configuration because discovery & scraping are two separate concerns in
|
|
# # Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# # the cluster. Otherwise, more config options have to be provided within the
|
|
# # <kubernetes_sd_config>.
|
|
# tls_config:
|
|
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
# # If your node certificates are self-signed or use a different CA to the
|
|
# # master CA, then disable certificate verification below. Note that
|
|
# # certificate verification is an integral part of a secure infrastructure
|
|
# # so this should only be disabled in a controlled environment. You can
|
|
# # disable certificate verification by uncommenting the line below.
|
|
# #
|
|
# insecure_skip_verify: true
|
|
# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
# kubernetes_sd_configs:
|
|
# - role: node
|
|
# relabel_configs:
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_node_label_(.+)
|
|
# - target_label: __address__
|
|
# replacement: kubernetes.default.svc:443
|
|
# - source_labels: [__meta_kubernetes_node_name]
|
|
# regex: (.+)
|
|
# target_label: __metrics_path__
|
|
# replacement: /api/v1/nodes/$1/proxy/metrics
|
|
# - job_name: 'kubernetes-nodes-cadvisor'
|
|
# # Default to scraping over https. If required, just disable this or change to
|
|
# # `http`.
|
|
# scheme: https
|
|
# # This TLS & bearer token file config is used to connect to the actual scrape
|
|
# # endpoints for cluster components. This is separate to discovery auth
|
|
# # configuration because discovery & scraping are two separate concerns in
|
|
# # Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# # the cluster. Otherwise, more config options have to be provided within the
|
|
# # <kubernetes_sd_config>.
|
|
# tls_config:
|
|
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
# # If your node certificates are self-signed or use a different CA to the
|
|
# # master CA, then disable certificate verification below. Note that
|
|
# # certificate verification is an integral part of a secure infrastructure
|
|
# # so this should only be disabled in a controlled environment. You can
|
|
# # disable certificate verification by uncommenting the line below.
|
|
# #
|
|
# insecure_skip_verify: true
|
|
# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
# kubernetes_sd_configs:
|
|
# - role: node
|
|
# # This configuration will work only on kubelet 1.7.3+
|
|
# # As the scrape endpoints for cAdvisor have changed
|
|
# # if you are using older version you need to change the replacement to
|
|
# # replacement: /api/v1/nodes/$1:4194/proxy/metrics
|
|
# # more info here https://github.com/coreos/prometheus-operator/issues/633
|
|
# relabel_configs:
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_node_label_(.+)
|
|
# - target_label: __address__
|
|
# replacement: kubernetes.default.svc:443
|
|
# - source_labels: [__meta_kubernetes_node_name]
|
|
# regex: (.+)
|
|
# target_label: __metrics_path__
|
|
# replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
|
# # Scrape config for service endpoints.
|
|
# #
|
|
# # The relabeling allows the actual service scrape endpoint to be configured
|
|
# # via the following annotations:
|
|
# #
|
|
# # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
|
|
# # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
# # to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
# # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# # * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
# # service then set this appropriately.
|
|
# - job_name: 'kubernetes-service-endpoints'
|
|
# kubernetes_sd_configs:
|
|
# - role: endpoints
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
# action: keep
|
|
# regex: true
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
# action: replace
|
|
# target_label: __scheme__
|
|
# regex: (https?)
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
# action: replace
|
|
# target_label: __metrics_path__
|
|
# regex: (.+)
|
|
# - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
# action: replace
|
|
# target_label: __address__
|
|
# regex: ([^:]+)(?::\d+)?;(\d+)
|
|
# replacement: $1:$2
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_service_label_(.+)
|
|
# - source_labels: [__meta_kubernetes_namespace]
|
|
# action: replace
|
|
# target_label: kubernetes_namespace
|
|
# - source_labels: [__meta_kubernetes_service_name]
|
|
# action: replace
|
|
# target_label: kubernetes_name
|
|
# - source_labels: [__meta_kubernetes_pod_node_name]
|
|
# action: replace
|
|
# target_label: kubernetes_node
|
|
# # Scrape config for slow service endpoints; same as above, but with a larger
|
|
# # timeout and a larger interval
|
|
# #
|
|
# # The relabeling allows the actual service scrape endpoint to be configured
|
|
# # via the following annotations:
|
|
# #
|
|
# # * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
|
|
# # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
# # to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
# # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# # * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
# # service then set this appropriately.
|
|
# - job_name: 'kubernetes-service-endpoints-slow'
|
|
# scrape_interval: 5m
|
|
# scrape_timeout: 30s
|
|
# kubernetes_sd_configs:
|
|
# - role: endpoints
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
|
# action: keep
|
|
# regex: true
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
# action: replace
|
|
# target_label: __scheme__
|
|
# regex: (https?)
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
# action: replace
|
|
# target_label: __metrics_path__
|
|
# regex: (.+)
|
|
# - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
# action: replace
|
|
# target_label: __address__
|
|
# regex: ([^:]+)(?::\d+)?;(\d+)
|
|
# replacement: $1:$2
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_service_label_(.+)
|
|
# - source_labels: [__meta_kubernetes_namespace]
|
|
# action: replace
|
|
# target_label: kubernetes_namespace
|
|
# - source_labels: [__meta_kubernetes_service_name]
|
|
# action: replace
|
|
# target_label: kubernetes_name
|
|
# - source_labels: [__meta_kubernetes_pod_node_name]
|
|
# action: replace
|
|
# target_label: kubernetes_node
|
|
# - job_name: 'prometheus-pushgateway'
|
|
# honor_labels: true
|
|
# kubernetes_sd_configs:
|
|
# - role: service
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
# action: keep
|
|
# regex: pushgateway
|
|
# # Example scrape config for probing services via the Blackbox Exporter.
|
|
# #
|
|
# # The relabeling allows the actual service scrape endpoint to be configured
|
|
# # via the following annotations:
|
|
# #
|
|
# # * `prometheus.io/probe`: Only probe services that have a value of `true`
|
|
# - job_name: 'kubernetes-services'
|
|
# metrics_path: /probe
|
|
# params:
|
|
# module: [http_2xx]
|
|
# kubernetes_sd_configs:
|
|
# - role: service
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
# action: keep
|
|
# regex: true
|
|
# - source_labels: [__address__]
|
|
# target_label: __param_target
|
|
# - target_label: __address__
|
|
# replacement: blackbox
|
|
# - source_labels: [__param_target]
|
|
# target_label: instance
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_service_label_(.+)
|
|
# - source_labels: [__meta_kubernetes_namespace]
|
|
# target_label: kubernetes_namespace
|
|
# - source_labels: [__meta_kubernetes_service_name]
|
|
# target_label: kubernetes_name
|
|
# # Example scrape config for pods
|
|
# #
|
|
# # The relabeling allows the actual pod scrape endpoint to be configured via the
|
|
# # following annotations:
|
|
# #
|
|
# # * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
|
|
# # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
|
# - job_name: 'kubernetes-pods'
|
|
# kubernetes_sd_configs:
|
|
# - role: pod
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
# action: keep
|
|
# regex: true
|
|
# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
# action: replace
|
|
# target_label: __metrics_path__
|
|
# regex: (.+)
|
|
# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
# action: replace
|
|
# regex: ([^:]+)(?::\d+)?;(\d+)
|
|
# replacement: $1:$2
|
|
# target_label: __address__
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_pod_label_(.+)
|
|
# - source_labels: [__meta_kubernetes_namespace]
|
|
# action: replace
|
|
# target_label: kubernetes_namespace
|
|
# - source_labels: [__meta_kubernetes_pod_name]
|
|
# action: replace
|
|
# target_label: kubernetes_pod_name
|
|
# - source_labels: [__meta_kubernetes_pod_phase]
|
|
# regex: Pending|Succeeded|Failed
|
|
# action: drop
|
|
# # Example Scrape config for pods which should be scraped slower. An useful example
|
|
# # would be stackriver-exporter which queries an API on every scrape of the pod
|
|
# #
|
|
# # The relabeling allows the actual pod scrape endpoint to be configured via the
|
|
# # following annotations:
|
|
# #
|
|
# # * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
|
|
# # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
|
# - job_name: 'kubernetes-pods-slow'
|
|
# scrape_interval: 5m
|
|
# scrape_timeout: 30s
|
|
# kubernetes_sd_configs:
|
|
# - role: pod
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
|
# action: keep
|
|
# regex: true
|
|
# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
# action: replace
|
|
# target_label: __metrics_path__
|
|
# regex: (.+)
|
|
# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
# action: replace
|
|
# regex: ([^:]+)(?::\d+)?;(\d+)
|
|
# replacement: $1:$2
|
|
# target_label: __address__
|
|
# - action: labelmap
|
|
# regex: __meta_kubernetes_pod_label_(.+)
|
|
# - source_labels: [__meta_kubernetes_namespace]
|
|
# action: replace
|
|
# target_label: kubernetes_namespace
|
|
# - source_labels: [__meta_kubernetes_pod_name]
|
|
# action: replace
|
|
# target_label: kubernetes_pod_name
|
|
# - source_labels: [__meta_kubernetes_pod_phase]
|
|
# regex: Pending|Succeeded|Failed
|
|
# action: drop
|
|
|
|
# adds additional scrape configs to prometheus.yml
|
|
# must be a string so you have to add a | after extraScrapeConfigs:
|
|
# example adds prometheus-blackbox-exporter scrape config
|
|
extraScrapeConfigs:
|
|
# - job_name: 'prometheus-blackbox-exporter'
|
|
# metrics_path: /probe
|
|
# params:
|
|
# module: [http_2xx]
|
|
# static_configs:
|
|
# - targets:
|
|
# - https://example.com
|
|
# relabel_configs:
|
|
# - source_labels: [__address__]
|
|
# target_label: __param_target
|
|
# - source_labels: [__param_target]
|
|
# target_label: instance
|
|
# - target_label: __address__
|
|
# replacement: prometheus-blackbox-exporter:9115
|
|
|
|
# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager
|
|
# useful in H/A prometheus with different external labels but the same alerts
|
|
alertRelabelConfigs:
|
|
# alert_relabel_configs:
|
|
# - source_labels: [dc]
|
|
# regex: (.+)\d+
|
|
# target_label: dc
|
|
|
|
networkPolicy:
|
|
## Enable creation of NetworkPolicy resources.
|
|
##
|
|
enabled: true
|
|
|
|
# Force namespace of namespaced resources
|
|
forceNamespace: null
|