[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[taler-grid5k] 91/189: split up proxy monitoring
From: |
gnunet |
Subject: |
[taler-grid5k] 91/189: split up proxy monitoring |
Date: |
Thu, 28 Apr 2022 10:47:41 +0200 |
This is an automated email from the git hooks/post-receive script.
marco-boss pushed a commit to branch master
in repository grid5k.
commit c72b8f16dc2e0a1b51077c599a7c6d7cd7a72b7f
Author: Boss Marco <bossm8@bfh.ch>
AuthorDate: Thu Mar 17 21:55:47 2022 +0100
split up proxy monitoring
---
additional/plots/config.yaml | 3 +--
configs/etc/default/prometheus-nginx-exporter | 2 +-
configs/etc/monitor/node-exporters.yaml.tpl | 2 +-
configs/etc/monitor/prometheus.yaml | 5 ++++-
experiment/scripts/database.sh | 6 +++---
experiment/scripts/helpers.sh | 15 ++++++++++-----
experiment/scripts/monitor.sh | 25 +++++++++++++++++++++++--
experiment/scripts/proxy.sh | 3 ++-
experiment/scripts/run.sh | 4 ++++
9 files changed, 49 insertions(+), 16 deletions(-)
diff --git a/additional/plots/config.yaml b/additional/plots/config.yaml
index 9b063a1..eafcd3a 100644
--- a/additional/plots/config.yaml
+++ b/additional/plots/config.yaml
@@ -33,7 +33,7 @@ dashboards:
height: 400
- uid: rYdddlPWk # Nodes
variables: ['node']
- ignore: 'wallet*|monitor*|bank*|merch*|ns*|proxy*'
+ ignore: 'monitor*|bank*|merch*|ns*|wallet*'
graph:
width: 1200
height: 600
@@ -48,4 +48,3 @@ dashboards:
prometheus:
node_exporter_job_name: nodes
-
diff --git a/configs/etc/default/prometheus-nginx-exporter
b/configs/etc/default/prometheus-nginx-exporter
index 57da070..7cf9221 100644
--- a/configs/etc/default/prometheus-nginx-exporter
+++ b/configs/etc/default/prometheus-nginx-exporter
@@ -1,4 +1,4 @@
-ARGS="-nginx.scrape-uri <PROXY_URL_HERE>"
+ARGS="-nginx.scrape-uri http://localhost:80/stub_status"
# Prometheus-nginx-exporter supports the following options:
# -nginx.plus
diff --git a/configs/etc/monitor/node-exporters.yaml.tpl
b/configs/etc/monitor/node-exporters.yaml.tpl
index fe8466b..2bcdc08 100644
--- a/configs/etc/monitor/node-exporters.yaml.tpl
+++ b/configs/etc/monitor/node-exporters.yaml.tpl
@@ -35,7 +35,7 @@
- labels:
component: 'proxy'
targets:
- - '${PROXY_DOMAIN}:9100'
+ # <PROXY_NODES_HERE>
- labels:
component: 'wallet'
targets:
diff --git a/configs/etc/monitor/prometheus.yaml
b/configs/etc/monitor/prometheus.yaml
index 28072c5..45dca88 100644
--- a/configs/etc/monitor/prometheus.yaml
+++ b/configs/etc/monitor/prometheus.yaml
@@ -19,7 +19,10 @@ scrape_configs:
# Exchange Proxy Exporter
- job_name: 'exchange-proxy'
static_configs:
- - targets: ['127.0.0.1:9113']
+ - labels:
+ component: 'nginx'
+ targets:
+ # <NGINX_EXPORTERS_HERE>
# Promtail with custom metrics
- job_name: 'promtail'
diff --git a/experiment/scripts/database.sh b/experiment/scripts/database.sh
index 3dd376f..7ca125d 100755
--- a/experiment/scripts/database.sh
+++ b/experiment/scripts/database.sh
@@ -130,11 +130,11 @@ function setup_config() {
# Too much results in CPU load
#
https://www.postgresql.org/docs/13/runtime-config-resource.html#GUC-EFFECTIVE-IO-CONCURRENCY
- effective_io_concurrency = 100
+ effective_io_concurrency = 200
# Bad when turned off - Recovering db may not be possible
# https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FSYNC
- fsync = on
+ fsync = off
# Not so bad as when turning off fsync, but single transactions might get
lost on crash - but
# like they would have aborted cleanly
@@ -315,7 +315,7 @@ EOF
case ${1} in
init)
setup_config
- # setup_disks
+ setup_disks
# setup_ram_storage
init_db
setup_pgbouncer
diff --git a/experiment/scripts/helpers.sh b/experiment/scripts/helpers.sh
index 99516f6..369395f 100755
--- a/experiment/scripts/helpers.sh
+++ b/experiment/scripts/helpers.sh
@@ -192,11 +192,12 @@ function stop_numbered_services() {
# Get all Grid5000 hosts which are registered under $1
# Returns only the Grid5000 node - e.g. graoully-1
-# $1: the domain, containing a wildcard at position 2 e.g. x.*.y
+# $1: the domain, containing a wildcard at position $2 e.g. x.*.y
+# $2: the location where the wildcard is
function get_hosts() {
IFS=$'\n' read -r -d '' -a HOSTS < <(\
dig -t AXFR "${DNS_ZONE}" "@${DNS_HOSTS}" \
- | grep ${1} | awk '{print $1}' | cut -d '.' -f 2 \
+ | grep ${1} | awk '{print $1}' | cut -d '.' -f $2 \
)
echo ${HOSTS[@]}
}
@@ -204,19 +205,23 @@ function get_hosts() {
# Get all Grid5000 hosts which host wallets
# Returns only the Grid5000 node - e.g. graoully-1
function get_wallet_hosts() {
- get_hosts "${WALLET_DOMAIN}"
+ get_hosts "${WALLET_DOMAIN}" 2
}
# Get all Grid5000 hosts which host shards
# Returns only the Grid5000 node - e.g. graoully-1
function get_shard_hosts() {
- get_hosts "${SHARD_DOMAIN}"
+ get_hosts "${SHARD_DOMAIN}" 2
}
# Get all Grid5000 hosts which host secondary exchanges
# Rerurns only the Grid5000 node - e.g. graoully-1
function get_exchange_hosts() {
- get_hosts "${EXCHANGE_DOMAIN}"
+ get_hosts "${EXCHANGE_DOMAIN}" 2
+}
+
+function get_proxy_hosts() {
+ get_hosts "*.${PROXY_DOMAIN}" 1
}
# Display a help message and exit
diff --git a/experiment/scripts/monitor.sh b/experiment/scripts/monitor.sh
index 5d79980..0c73e25 100755
--- a/experiment/scripts/monitor.sh
+++ b/experiment/scripts/monitor.sh
@@ -90,6 +90,18 @@ function add_wallet_nodes_to_prometheus() {
done
}
+# Add shard servers to be monitored
+# Requires no argument, since servers are retrieved from dns
+function add_proxy_nodes_to_prometheus() {
+ for HOST in $(get_proxy_hosts); do
+ if ! grep -q "${HOST}.${PROXY_DOMAIN}:9100" /etc/monitor/prometheus.yaml;
+ then
+ sed -i "/<PROXY_NODES_HERE/a \ \ \ \ \ \ -
'${HOST}.${PROXY_DOMAIN}:9100'" \
+ /etc/monitor/prometheus.yaml
+ fi
+ done
+}
+
# Add shard servers to be monitored
# Requires no argument, since servers are retrieved from dns
function add_shard_nodes_to_prometheus() {
@@ -174,16 +186,25 @@ function init_monitor() {
envsubst >> /etc/monitor/prometheus.yaml
add_wallet_nodes_to_prometheus
add_shard_nodes_to_prometheus
+ add_proxy_nodes_to_prometheus
add_exchange_nodes_to_prometheus
fi
fi
+ # Add nginx exporters
+ for HOST in $(get_proxy_hosts); do
+ if ! grep -q "${HOST}.${PROXY_DOMAIN}:9113" /etc/monitor/prometheus.yaml;
+ then
+ sed -i "/<NGINX_EXPORTERS_HERE>/a \ \ \ \ \ \ -
'${HOST}.${PROXY_DOMAIN}:9113'" \
+ /etc/monitor/prometheus.yaml
+ fi
+ done
+
add_exchanges_to_prometheus "0" ${NUM_EXCHANGE_PROCESSES}
configure_prometheus_and_exporters
- systemctl restart prometheus-nginx-exporter \
- prometheus-postgres-exporter \
+ systemctl restart prometheus-postgres-exporter \
prometheus
exit 0
diff --git a/experiment/scripts/proxy.sh b/experiment/scripts/proxy.sh
index 1642981..e159278 100755
--- a/experiment/scripts/proxy.sh
+++ b/experiment/scripts/proxy.sh
@@ -92,7 +92,8 @@ function init_proxy() {
# nginx: [emerg] host not found in upstream "exch.perf.taler" ...
wait_for_keys "${PRIMARY_EXCHANGE}:10000"
- systemctl restart nginx
+ systemctl restart nginx \
+ prometheus-nginx-exporter
}
# Remove N exchanges from the upstream list
diff --git a/experiment/scripts/run.sh b/experiment/scripts/run.sh
index e905587..e8417c4 100644
--- a/experiment/scripts/run.sh
+++ b/experiment/scripts/run.sh
@@ -79,7 +79,11 @@ elif [[ "${HOSTNAME}" =~ ${MONITOR_HOSTS} ]]; then
exec ~/scripts/monitor.sh init
elif [[ "${HOSTNAME}" =~ ${PROXY_HOSTS} ]]; then
set_host ${NODE_NAME}
+ # Single domain for DNS load balancing
set_ddn ${PROXY_DOMAIN}
+ # We need a second domain for monitoring to
+ # be able to get information about all proxies
+ set_ddn ${G5K_HOST}.${PROXY_DOMAIN}
enable_netdelay ${PRIMARY_EXCHANGE}
setup_log
enable_logrotate
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [taler-grid5k] 180/189: add centos db initialization script, (continued)
- [taler-grid5k] 180/189: add centos db initialization script, gnunet, 2022/04/28
- [taler-grid5k] 136/189: update dashboards, gnunet, 2022/04/28
- [taler-grid5k] 163/189: update centos, gnunet, 2022/04/28
- [taler-grid5k] 165/189: remove --no-install-recommends, gnunet, 2022/04/28
- [taler-grid5k] 174/189: add mitigations=off in grid5000 base.yaml as it gets overriden, gnunet, 2022/04/28
- [taler-grid5k] 183/189: fix promtail, gnunet, 2022/04/28
- [taler-grid5k] 184/189: add documentation, gnunet, 2022/04/28
- [taler-grid5k] 138/189: possibility to start more wirewatchers, gnunet, 2022/04/28
- [taler-grid5k] 172/189: fix bind package in centos, gnunet, 2022/04/28
- [taler-grid5k] 178/189: kernel_params used to disable mitigations, gnunet, 2022/04/28
- [taler-grid5k] 91/189: split up proxy monitoring,
gnunet <=
- [taler-grid5k] 167/189: add g5k commit sha again, gnunet, 2022/04/28
- [taler-grid5k] 153/189: show that process is running in persist.sh for long during processes, gnunet, 2022/04/28
- [taler-grid5k] 143/189: fixes, changes, gnunet, 2022/04/28
- [taler-grid5k] 161/189: add centos image, gnunet, 2022/04/28
- [taler-grid5k] 131/189: fix backup script, gnunet, 2022/04/28
- [taler-grid5k] 127/189: adjust database logs, gnunet, 2022/04/28
- [taler-grid5k] 166/189: use centos for db only, gnunet, 2022/04/28
- [taler-grid5k] 141/189: update recovery script, gnunet, 2022/04/28
- [taler-grid5k] 160/189: update image, disable mitigations, gnunet, 2022/04/28
- [taler-grid5k] 154/189: more dynamic rsyslog ports, add some info in setup.sh, gnunet, 2022/04/28