From 42e6480f6602c2baede7237bb68852bce1816135 Mon Sep 17 00:00:00 2001 From: Vchanger Date: Tue, 16 May 2023 14:28:59 +0800 Subject: [PATCH] Add deployment tool and support online/offline mode --- deploy/README.en.md | 36 + deploy/README.md | 157 ++++ deploy/arangodb2es.py | 318 +++++++++ deploy/deploy.sh | 1223 ++++++++++++++++++++++++++++++++ deploy/download_offline_res.sh | 331 +++++++++ 5 files changed, 2065 insertions(+) create mode 100644 deploy/README.en.md create mode 100644 deploy/README.md create mode 100644 deploy/arangodb2es.py create mode 100755 deploy/deploy.sh create mode 100755 deploy/download_offline_res.sh diff --git a/deploy/README.en.md b/deploy/README.en.md new file mode 100644 index 0000000..dbd68e3 --- /dev/null +++ b/deploy/README.en.md @@ -0,0 +1,36 @@ +# A-Ops-Tools + +#### Description +A-Ops测试部署脚本仓库 + +#### Software Architecture +Software architecture description + +#### Installation + +1. xxxx +2. xxxx +3. xxxx + +#### Instructions + +1. xxxx +2. xxxx +3. xxxx + +#### Contribution + +1. Fork the repository +2. Create Feat_xxx branch +3. Commit your code +4. Create Pull Request + + +#### Gitee Feature + +1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md +2. Gitee blog [blog.gitee.com](https://blog.gitee.com) +3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) +4. The most valuable open source project [GVP](https://gitee.com/gvp) +5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) +6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 0000000..fddcf51 --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,157 @@ +# Gala-Deploy-Tools + +## 介绍 +GALA组件部署工具仓,支持快速部署gala-gopher、gala-ops(gala-spider/gala-inference/gala-anteater)组件、openGauss服务端、kafka/prometheus/arangodb/es/logstash中间件、grafana/前端页面展示相关组件,并同时支持离线/在线部署两种模式。 + +## 约束限制 + +1. 当前本工具仅支持x86架构与如下OS版本:openEuler 20.03 LTS SP1、openEuler 22.03 LTS、openEuler 22.03 LTS SP1、Kylin V10 +2. 在线部署模式下,本工具运行过程中会从openEuler repo源安装rpm或者从外网下载源码资源,因此内网环境在使用工具前需要提前配置好代理,便于访问外网环境,工具使用结束后建议将代理取消。 +3. gala-gopher、gala-ops组件支持rpm包部署以及容器部署两种方式。在线部署模式下,rpm包部署方式仅支持openEuler 22.03 LTS/openEuler 22.03 LTS SP1 两个版本;离线部署模式下,gala-gopher组件仅支持rpm包部署方式,gala-ops组件在除开openEuler 22.03 LTS/openEuler 22.03 LTS SP1的版本上仅支持容器部署方式。 + +## 环境准备说明 + +准备至少两台符合OS版本与架构要求(见约束限制1)的机器(物理机、虚拟机均可)并保证机器间网络可以正常连通(在线部署模式下需要连接外网),机器规格建议至少为4U8G。 + +- 机器A:**生产节点**,即需要监控运维的目标节点,上面一般运行着业务进程(如数据库、redis、Java因应用),用于部署高保真采集组件gala-gopher。 + **注:如果有多台生产节点,则每个节点上都需要部署gala-gopher。** +- 机器B:**管理节点**,用于部署kafka等中间件以及gala的异常检测、根因定位组件。这些组件的部署相对灵活,可以准备多台管理节点分开部署,只要节点之间网络通即可。 + +## 离线部署教程 + +### 下载部署工具 + +1. 下载离线部署工具压缩包:wget https://gitee.com/openeuler/gala-docs/repository/archive/master.zip ,并将压缩包上传到待部署机器。 +2. 使用unzip解压压缩包后进入deploy目录 + +### 管理节点:部署中间件 + +当前涉及的中间件包括kafka、prometheus、arangodb、elasticsearch/logstash、pyroscope共6个组件,其中elasticsearch和logstash存在依赖关系,需要绑定部署。 + +1. #### 下载离线安装包 + +- kafka + +从Apache官网下载kafka压缩包https://downloads.apache.org/kafka/3.3.2/kafka_2.12-3.3.2.tgz + + + +- elasticsearch/logstash + + + +- pyroscope + +https://dl.pyroscope.io/release/pyroscope-0.37.2-1-x86_64.rpm + + + +执行如下命令安装、配置、启动kafka/prometheus/elasticsearch/logstash/arangodb/pyroscope服务,**-K/-P/-E/-A/-p选项支持分开使用单独部署对应组件**,其中-P用于配置prometheus服务端抓取消息的来源地址(即部署gala-gopher的生产节点)列表,每个地址之间用英文逗号分隔;elasticsearch/logstash由于存在依赖关系,通过-E选项统一控制、绑定安装。 + +```css +sh deploy.sh middleware -K -P -E -A +``` + +选项详细说明 + +| 选项 | 参数说明 | 是否必配 | +| :--------------: | :----------------------------------------------------------: | :------------------------------: | +| -K\|--kafka | 使用该选项用于部署kafka服务器,并配置指定的监听IP地址(一般来说是当前节点的管理IP)。当不使用该选项时,不部署kafka服务 | 需要部署kafka服务时为必 | +| -P\|--prometheus | 使用该选项用于部署prometheus服务器,并配置指定的抓取消息来源地址列表,每个地址之间用英文逗号分隔,地址后可以跟随“:端口号”来指定抓取端口,当不指定时,使用默认端口8888;地址前可以加上”主机名-“来标识该地址。
例如:-P 192.168.0.1,192.168.0.2:18001,vm01-192.168.0.3:18002。当不使用该选项时,不部署prometheus服务 | 需要部署prometheus服务器时为必配 | +| -A\|--arangodb | 使用该选项用于部署并启动arangodb数据库服务,该服务默认监听全IP,因此无需指定监听IP。 | 需要部署arangodb时为必配 | +| -p\|--pyroscope | 使用该选项用于部署并启动pyroscope服务,该服务默认监听全IP,因此无需指定监听IP。 | 需要部署pyroscope服务端时必配 | +| -E\|--elastic | 使用该选项用于部署elasticsearch、logstash服务,并指定logstash读取消息的elasticsearch服务器地址(一般来说是当前节点的管理IP)。当不使用该选项时,不部署elaticsearch服务 | 需要部署elasticsearch为必配 | + +### 生产节点:部署gala-gopher + +执行如下命令安装、配置、启动gala-gopher服务,在线部署模式下,gala-gopher仅支持以rpm包的方式进行安装。 + +```xml +sh deploy.sh gopher -K -p +``` + +选项详细说明: + +| 选项 | 参数说明 | 是否必配 | 离线部署是否支持 | +| :-------------: | :----------------------------------------------------------: | :------: | :--------------: | +| -K\|--kafka | 指定gala-gopher上报采集数据的目标kakfa服务器地址,当不配置该选项时,kafka服务器地址使用localhost | 否 | 是 | +| -p\|--pyroscope | 指定gala-gopher开启火焰图功能后火焰图上传到的pyroscope服务器地址(用于对接前端界面显示),当不配置该选项时,pyroscope服务器地址使用localhost | 否 | 是 | +| --docker | 指定使用容器方式部署gala-gopher,当不配置该选项时,默认采用rpm包部署方式 | 否 | 否 | + +### 管理节点:部署gala-ops + +gala-ops组件支持rpm、容器镜像两种部署方式,部署时需要指定kafka、prometheus、arangodb服务器地址,当不指定时,这些中间件的地址默认使用localhost。 + +- rpm方式(在线模式支持openEuler 22.03 LTS/openEuler 22.03 LTS SP1) + +```shell +sh deploy.sh ops -K -P -A +``` + +- 容器镜像方式: + +```css +sh deploy.sh ops -K -P -A --docker +``` + +选项详细说明: + +| 选项 | 参数说明 | 是否必配 | 离线部署是否支持 | +| :--------------: | :----------------------------------------------------------: | :------: | :--------------: | +| -K\|--kafka | 指定gala-ops读取消息的kakfa服务器地址,当不配置该选项时,kafka服务器地址使用localhost | 否 | 是 | +| -P\|--prometheus | 指定gala-ops读取消息的prometheus服务器地址,当不配置该选项时,prometheus服务器地址使用localhost | 否 | 是 | +| -A\|--arangodb | 指定gala-ops存储关系图数据的的arangodb服务器地址,当不配置该选项时,arangodb服务器地址使用localhost | 否 | 是 | +| --docker | 指定使用容器方式部署gala-ops,当不配置该选项时,默认采用rpm包部署方式 | 否 | 是 | + + + +### 管理节点:部署grafana + +grafana采用容器镜像进行部署,部署完成后可以通过浏览器访问 "http://[部署节点IP]:3000" 来登录grafana页面,默认用户名、密码均为admin。 + +执行如下命令完成部署,grafana会以容器实例方式运行。 + +```xml +sh deploy.sh grafana -P -p -E +``` + +选项详细说明: + +| 选项 | 参数说明 | 是否必配 | 离线部署是否支持 | +| :--------------: | :----------------------------------------------------------: | :------: | :--------------: | +| -P\|--prometheus | 指定grafana中的prometheus数据源地址,当不配置该选项时,prometheus数据源使用localhost | 否 | 是 | +| -p\|--pyroscope | 指定grafana中读取火焰图的pyroscope数据源地址,当不配置该选项时,pyroscope数据源使用localhost | 否 | 是 | +| -E\|--elastic | 指定grafana中读取异常检测、拓扑图、根因定位结果的elasticsearch数据源地址。当不使用该选项时,elasticsearch数据源使用localhost | 否 | 是 | + + + +## 在线部署教程 + +### 获取部署脚本 + +与离线部署方式不同,在线部署方式只需要下载单独的[部署脚本](https://gitee.com/openeuler/gala-docs/blob/master/deploy/deploy.sh),无需下载整个工具,可以通过如下命令下载到待部署机器上: + +``` +wget https://gitee.com/Vchanger/gala-docs/raw/master/deploy/deploy.sh +``` + +### 部署说明 + +在线部署的方式与离线部署基本一致,区别在于执行脚本时需要加上--online选项。 + +这里以部署gala-gopher为例: + +- rpm方式(仅支持openEuler 22.03 LTS/openEuler 22.03 LTS SP1) + +```xml +sh deploy.sh gopher -K -p --online +``` + +- 容器镜像方式: + +```css +sh deploy.sh gopher -K -p --docker --online +``` + + + diff --git a/deploy/arangodb2es.py b/deploy/arangodb2es.py new file mode 100644 index 0000000..019a364 --- /dev/null +++ b/deploy/arangodb2es.py @@ -0,0 +1,318 @@ +import datetime +import time +import pytz +import elasticsearch +import json +import sys +import requests + +from arango import ArangoClient +from elasticsearch import helpers +from pyArango.connection import Connection +from pyArango.database import Database + +class ArangoDB: + def __init__(self, url, db, username, passwd): + self.db = Connection(arangoURL=url).databases[db] + + def fetch_data(self, aql): + return self.db.AQLQuery(aql, rawResults=True) + + def has_collection(self, collection): + return self.db.hasCollection(collection) + + def add_index(self, collection, indexList): + return self.db[collection].ensureHashIndex(indexList) + +class ElasticSearch: + def __init__(self, url): + self.es= elasticsearch.Elasticsearch(url, request_timeout=100) + + def bulk_to_es(self, dlist): + actions = [] + count = 0 + + for action in dlist: + actions.append(action) + count += 1 + + if len(actions) == 500: + helpers.bulk(self.es, actions) + del actions[0:len(actions)] + + if len(actions) > 0: + helpers.bulk(self.es, actions) + del actions[0:len(actions)] + + return count + + def get_cause_nodes_from_es(self): + max_timestamp = int(time.time()) * 1000 + min_timestamp = max_timestamp - 3 * 60 * 1000 + print(min_timestamp, max_timestamp) + + query = { + "bool": { + "must": [ + {"range": {"Timestamp": {"gte": min_timestamp,"lte": max_timestamp}}} + ] + } + } + resp = self.es.search(index="gala_cause_inference-*", query=query) + hits= resp['hits']['hits'] + timestamp = sys.maxsize + nodes = {} + if len(hits) >= 1: + print('hit len {}'.format(len(hits))) + for hit in hits: + cause_metrics = hit['_source']['Resource']['cause_metrics'] + for cause_metric in cause_metrics: + paths = cause_metric['path'] + if hit['_source']['Timestamp'] < timestamp: + timestamp = hit['_source']['Timestamp'] + nodes = {} + elif hit['_source']['Timestamp'] == timestamp: + # one timestamp multi records + timestamp = hit['_source']['Timestamp'] + else: + continue + for path in paths: + if path['entity_id'] not in nodes: + nodes[path['entity_id']] = {'metric_id': path['metric_id'], 'desc': path['desc'], 'count': 1} + else: + if nodes[path['entity_id']]['desc'] is None or path['desc'] in nodes[path['entity_id']]['desc']: + continue + else: + nodes[path['entity_id']]['metric_id'] += "," + path['metric_id'] + if nodes[path['entity_id']]['count'] == 1: + temp = nodes[path['entity_id']]['desc'] + nodes[path['entity_id']]['desc'] += "1." + temp + nodes[path['entity_id']]['count'] += 1 + nodes[path['entity_id']]['desc'] += '\n' + str(nodes[path['entity_id']]['count'] ) + '.' + path['desc'] + print(path['entity_id'], nodes[path['entity_id']]) + print("-------------------") + print(timestamp) + return {'nodes': nodes, 'timestamp': timestamp} + + def has_record_in_graph(self, ts): + query = { + "bool": { + "must": [ + {"range": {"ts": {"gte": ts, "lte": ts}}} + ] + } + } + resp = self.es.search(index="aops_graph2", query=query) + hits = resp['hits']['hits'] + if len(hits) >= 1: + return True + else: + return False + +class AOps: + def __init__(self): + self.arangodbUrl = 'http://localhost:8529' + self.esUrl = 'http://localhost:9200' + self.promethusUrl = 'http://localhost:9090' + self.db_client = ArangoDB(self.arangodbUrl, 'spider', 'root', '') + self.es_client = ElasticSearch(self.esUrl) + self.edge_collection = ['belongs_to', 'connect', 'has_vhost', 'is_peer', 'runs_on', 'store_in'] + self.bad_nodes = {} + self.hosts_map = {} + + def getHostMapFromPromethus(self): + url = self.promethusUrl + "/api/v1/query?query=gala_gopher_host_value" + rsp = requests.get(url).json() + if 'status' in rsp and rsp['status'] == 'success': + for i in rsp['data']['result']: + self.hosts_map[i['metric']['machine_id']] = i['metric']['job'] + + def get_timestamp(self, ts_sec): + if ts_sec == 0: + cur_ts_sec = int(time.time()) + else: + cur_ts_sec = ts_sec + + aql = "For t in Timestamps FILTER TO_NUMBER(t._key) <= {} SORT t._key DESC LIMIT 1 return t._key".format(cur_ts_sec) + timestamp = self.db_client.fetch_data(aql) + if len(timestamp) != 0: + return timestamp[0] + else: + return 0 + + def get_metrics_str_from_node(self, node): + if node['type'] == 'host': + node['metrics'].pop('value') # host metrics-value no use + + # set bad nodes + node_id = node['_id'].split('/')[1] + if node_id in self.bad_nodes: + node['metrics']['health_status'] = 'False' + node['metrics']['health_desc'] = self.bad_nodes[node_id]['desc'] + node['metrics']['health_metric'] = self.bad_nodes[node_id]['metric_id'] + else: + node['metrics']['health_status'] = 'True' + node['metrics']['health_desc'] = '' + node['metrics']['health_metric'] = '' + + # set chao proc node to bad status + if 'comm' in node and 'chaos_os' in node['comm']: + node['metrics']['health_status'] = 'False' + node['metrics']['health_desc'] = '' + node['metrics']['health_metric'] = '' + + return json.dumps(node['metrics']) + + def get_node_info(self, edge_origion, node, dic): + node_type= node['type'] + dic['_source'][edge_origion + '_type'] = node_type + dic['_source'][edge_origion + '_level'] = node['level'] + + if node_type == 'proc': + if 'comm' in node: + dic['_source'][edge_origion + '_comm'] = node['comm'] + node['_key'][len(node['machine_id']):] + else: + dic['_source'][edge_origion + '_comm'] = node['_key'][len(node['machine_id']):] + elif node_type == 'host': + if node['machine_id'] in self.hosts_map.keys(): + dic['_source'][edge_origion + '_comm'] = self.hosts_map[node['machine_id']] + elif 'ceph' in node['hostname']: + dic['_source'][edge_origion + '_comm'] = 'ceph_host_' + node['ip_addr'] + else: + dic['_source'][edge_origion + '_comm'] = node['host_type'] + '_host_' + node['ip_addr'] + elif node_type == 'thread': + dic['_source'][edge_origion + '_comm'] = node['comm'] + node['_key'][len(node['machine_id']):] + elif node_type == 'block': + dic['_source'][edge_origion + '_comm'] = node['disk_name'] + '_' + node['blk_name'] \ + + node['_key'][len(node['machine_id']):] + else: + dic['_source'][edge_origion + '_comm'] = node['_key'][len(node['machine_id']) + 1:] + + + # set bad nodes + node_id = node['_id'].split('/')[1] + if node_id in self.bad_nodes: + dic['_source'][edge_origion + '_status'] = 'bad' + else: + dic['_source'][edge_origion + '_status'] = 'good' + + # set chao proc node to bad status + if 'chaos_os' in dic['_source'][edge_origion + '_comm']: + dic['_source'][edge_origion + '_status'] = 'bad' + + def get_node_by_from(self, edge_from, node, dic): + if node['_id'] == edge_from: + self.get_node_info('src', node, dic) + dic['_source']['metric'] = self.get_metrics_str_from_node(node) + else: + self.get_node_info('dst', node, dic) + dic['_source']['dst_metric'] = self.get_metrics_str_from_node(node) + + # filter redis/gaussdb proc, add proc_comm + def filter_proc(self, data): + redis_container_list = [] + gaussdb_container_list = [] + redis_tcp_list = [] + gaussdb_tcp_list = [] + for it in data: + if 'redis' in it['_source']['src_comm']: + it['_source']['proc_comm'] = 'redis' + redis_container_list.append(it['_source']['dst']) + continue + if 'gaussdb' in it['_source']['src_comm']: + it['_source']['proc_comm'] = 'gaussdb' + gaussdb_container_list.append(it['_source']['dst']) + continue + if 'redis' in it['_source']['dst_comm']: + redis_tcp_list.append(it['_source']['src']) + continue + if 'gaussdb' in it['_source']['dst_comm']: + gaussdb_tcp_list.append(it['_source']['src']) + continue + + + for it in data: + if it['_source']['src'] in redis_container_list or it['_source']['src'] in redis_tcp_list: + it['_source']['proc_comm'] = 'redis' + continue + if it['_source']['src'] in gaussdb_container_list or it['_source']['src'] in gaussdb_tcp_list: + it['_source']['proc_comm'] = 'gaussdb' + continue + + def fetch_graph_data(self): + self.getHostMapFromPromethus() + cause_data = self.es_client.get_cause_nodes_from_es() + if cause_data['timestamp'] == sys.maxsize: + ag_ts = self.get_timestamp(0); + ts = int(ag_ts) * 1000 + else: + ag_ts = self.get_timestamp(int(cause_data['timestamp'] / 1000)) + ts = cause_data['timestamp'] + self.bad_nodes = cause_data['nodes'] + print('ts:{} ag_ts:{}'.format(ts, ag_ts)) + results = [] + if self.es_client.has_record_in_graph(ts): + print('{} has recorded'.format(ts)) + return + for edge_collection in self.edge_collection: + if not self.db_client.has_collection(edge_collection): + continue + + aql = "For doc in " + edge_collection + \ + " FILTER " \ + " doc.timestamp ==" + ag_ts + \ + "RETURN doc" + + edges = self.db_client.fetch_data(aql) + + for edge in edges: + edge_from = edge['_from'] + edge_to = edge['_to'] + edge_type = edge['type'] + edge_layer = edge['layer'] + + dic = { + "_index": "aops_graph2", + "_source": { + "ts": ts, + "timestamp": datetime.datetime.fromtimestamp(ts / 1000, pytz.utc), + "edge_type": edge_type, + "edge_layer": edge_layer, + "src": edge_from, + "dst": edge_to + } + } + + aql = "For doc in ObserveEntities_" + ag_ts + \ + " Filter doc._id == '" + edge['_from'] + "' ||" + \ + " doc._id == '" + edge['_to'] + "' LIMIT 10 return doc" + nodes = self.db_client.fetch_data(aql) + for node in nodes: + self.get_node_by_from(edge['_from'], node, dic) + results.append(dic) + print("node length", len(results)) + + self.filter_proc(results) + + count = self.es_client.bulk_to_es(results) + print("write to es count is:", count) + + def set_graph_timestamp_index(self): + for edge_collection in self.edge_collection: + if not self.db_client.has_collection(edge_collection): + continue + self.db_client.add_index(edge_collection, ['timestamp']) + +if __name__ == "__main__": + AOps().set_graph_timestamp_index(); + while True: + start_time = int(time.time()) + AOps().fetch_graph_data() + end_time = int(time.time()) + print("fetch graph data cost time:{}".format(end_time - start_time)) + time.sleep(10) + print('-----------------------------------------------') + + + + diff --git a/deploy/deploy.sh b/deploy/deploy.sh new file mode 100755 index 0000000..8335796 --- /dev/null +++ b/deploy/deploy.sh @@ -0,0 +1,1223 @@ +#!/bin/bash + +OS_TYPE="" +OS_VERSION="" +DEPLOY_TYPE="remote" +OFFICIAL_RELEASE="yes" +WORKING_DIR=$(realpath $(dirname $0)) +GALA_DEPLOY_MODE="rpm" +COMPONENT="" + +DOCKER_HUB='hub.oepkgs.net' +DOCKER_HUB_TAG_PREFIX="${DOCKER_HUB}/a-ops" + +LOCAL_DEPLOY_SRCDIR="$WORKING_DIR" +gopher_local_rpm="" + +GOPHER_LOCAL_RPM="" +GOPHER_DOCKER_TAG="" + +REMOTE_REPO_PREFIX="http://mirrors.aliyun.com/openeuler/" + +GS_DATADIR="" +EPOL_REPO="" +EPOL_UPDATE_REPO="" + +KAFKA_PORT=9092 +PROMETHEUS_PORT=9090 +ES_PORT=9200 +ARANGODB_PORT=8529 +PYROSCOPE_PORT=4040 + +KAFKA_ADDR="localhost:${KAFKA_PORT}" +PROMETHEUS_ADDR="localhost:${PROMETHEUS_PORT}" +ES_ADDR="localhost:${ES_PORT}" +ARANGODB_ADDR="localhost:${ARANGODB_PORT}" +PYROSCOPE_ADDR="localhost:${PYROSCOPE_PORT}" + +PROMETHEUS_SCRAPE_LIST="" + +#=======Common Utils========# +function echo_err_exit() { + echo -e "\e[31m $@ \e[0m" + exit 1; +} + +function echo_info() { + echo -e "\e[32m $@ \e[0m" +} + +function echo_warn() { + echo -e "\e[33m $@ \e[0m" +} + +function print_usage() { + echo "usage : sh deploy.sh [COMPONENT] [OPTION]" + + echo "supported COMPONENT:" + echo " gopher|ops|middleware|opengauss|grafana" + echo "" + echo "gopher options:" + echo " [-K|--kafka ] [-p|--pyroscope ] [--docker]" + echo " [--proxy]" + echo "" + echo "ops options:" + echo " [-K|--kafka ] [-P|--prometheus ] [-A|--arangodb ]" + echo " [--docker]" + echo "" + echo "opengauss options:" + echo " [-D|--datadir ]" + echo "" + echo "middleware options:" + echo " [-K|--kafka ] [-P ]" + echo " [-E|--elastic ] [-A|--arangodb] [-p|--pyroscope]" + echo "" + echo "grafana options:" + echo " [-P|--prometheus ] [-p|--pyroscope ]" + echo " [-E|--elastic ]" + echo "" + echo "Common options:" + echo " --docker Deploy components with docker images, only support gopher" + echo " -S|--srcdir To specify offline resources for installation, only Used in offline deployment" + echo "" +} + +function get_port_from_addr() { + addr=$1 + port="" + + if echo $addr | grep -q ":" ; then + port=${addr##*:} + if [ -z "${port}" ] || ! echo $port | grep -q '^[[:digit:]]*$' ; then + echo_err_exit "Invalid port specified: $addr" + fi + fi + echo $port +} + +function get_ip_from_addr() { + addr=$1 + echo ${addr%:*} +} + +function addr_add_port() { + addr="${1}" + default_port="${2}" + + if [ -z "${addr}" ] || [ -z "${default_port}" ] ; then + echo_err_exit "Invalid parameter in addr_add_port()" + fi + + port=$(get_port_from_addr $addr) + if [ -z "${port}" ] ; then + echo "${addr}:${default_port}" + else + echo "${addr}" + fi +} + + +function install_rpm_local_repo() { + rpm="$1" + + [ -z "$LOCAL_DEPLOY_SRCDIR" ] && echo_err_exit "local repo is undefined, aborting!" + + yum install -y $rpm --repofrompath="local_deploy,$LOCAL_DEPLOY_SRCDIR" --nogpgcheck + [ $? -ne 0 ] && echo_err_exit "Error: failed to install $rpm, please check repo!" +} + + +function install_rpm_remote_repo() { + rpm="$1" + repo_path="" + + if [ "x$OS_TYPE" == "xopenEuler" ] ; then + if echo $REMOTE_REPO_PREFIX | grep -q "openEuler-22.03-LTS-SP1" ; then + EPOL_REPO=$REMOTE_REPO_PREFIX/EPOL/main/$(uname -m) + EPOL_UPDATE_REPO=$REMOTE_REPO_PREFIX/EPOL/update/main/$(uname -m) + else + EPOL_REPO=$REMOTE_REPO_PREFIX/EPOL/$(uname -m) + EPOL_UPDATE_REPO=$REMOTE_REPO_PREFIX/EPOL/update/$(uname -m) + fi + + repo_path="--repofrompath=epol_deploy,$EPOL_REPO \ + --repofrompath=epol_update_deploy,$EPOL_UPDATE_REPO \ + --repofrompath=everything_deploy,$REMOTE_REPO_PREFIX/everything/$(uname -m) \ + --repofrompath=update_deploy,$REMOTE_REPO_PREFIX/update/$(uname -m)" + fi + yum install -y $rpm $repo_path --nogpgcheck + [ $? -ne 0 ] && echo_err_exit "Error: failed to install $rpm, please check repo!" +} + +function install_rpm() { + rpm=$1 + if echo $rpm | grep -q ".rpm$" ; then + rpm_name="$(rpm -qpi $rpm | grep Name | awk -F : '{gsub(/[[:blank:]]*/,"",$2);print $2}')" + else + rpm_name="$rpm" + fi + + if [ -n "$rpm_name" ] && rpm -q "$rpm_name" >/dev/null 2>&1 ; then + echo_info "$rpm_name is already installed, skip..." + return + fi + + if [ "$DEPLOY_TYPE" == "local" ] ; then + install_rpm_local_repo $@ + elif [ "$DEPLOY_TYPE" == "remote" ] ; then + install_rpm_remote_repo $rpm_name + else + echo_err_exit "Unsupported repo type, please check!" + fi +} + +function config_docker() { + if ! grep "^INSECURE_REGISTRY" /etc/sysconfig/docker | grep -q "${DOCKER_HUB}" ; then + cat >> /etc/sysconfig/docker << EOF +INSECURE_REGISTRY='--insecure-registry ${DOCKER_HUB}' +EOF + systemctl daemon-reload + systemctl restart docker || echo_err_exit "Error: fail to configure docker" + fi +} + + +function docker_load_image_file() { + image_tarfile="$1" + + docker --version >/dev/null 2>&1 || echo_err_exit "Error: Docker cmd not found, please install docker firstly" + [ ! -f $image_tarfile ] && echo_err_exit "Error: failed to find local image file:" $image_tarfile + docker load -i $image_tarfile + [ $? -ne 0 ] && echo_err_exit "Error: failed to load docker image:" $image_tarfile +} + +function docker_pull_image() { + tag_name="$1" + + docker --version >/dev/null 2>&1 || echo_err_exit "Error: Docker cmd not found, please install docker firstly" + config_docker + docker pull ${DOCKER_HUB_TAG_PREFIX}/"${tag_name}" + [ $? -ne 0 ] && echo_err_exit "Error: failed to pull docker image:" $tag_name +} + + +#=======openGauss Server Deployment=======# +OPNEGAUSS_DEPLOY_SCRIPT='./opengauss/create_master_slave.sh' +function parse_arg_opengauss_server() { + ARGS=`getopt -a -o D: --long datadir: -- "$@"` + [ $? -ne 0 ] && (print_usage; exit 1) + eval set -- "${ARGS}" + while true + do + case $1 in + -D|--datadir) + GS_DATADIR="${2}" + shift;; + --) + shift + break;; + *) + print_usage + exit 1;; + esac + shift + done +} + +function create_opengauss_master_slave() { + GS_PASSWORD=Aops@123 + OG_SUBNET="172.11.0.0/24" + IPPREFIX=172.11.0. + START=101 + HOST_PORT=5432 + LOCAL_PORT=5434 + + MASTER_NODENAME=opengauss_master + SLAVE_NODENAME=opengauss_slave + nums_of_slave=1 + + docker stop ${MASTER_NODENAME} 2>/dev/null ||: + docker rm ${MASTER_NODENAME} 2>/dev/null ||: + for ((i=1;i<=nums_of_slave;i++)) ; do + docker stop ${SLAVE_NODENAME}${i} 2>/dev/null ||: + docker rm ${SLAVE_NODENAME}${i} 2>/dev/null ||: + done + docker network rm opengaussnetwork 2>/dev/null + + docker network create --subnet=$OG_SUBNET opengaussnetwork \ + || { + echo "" + echo "ERROR: OpenGauss Database Network was NOT successfully created." + echo "HINT: opengaussnetwork Maybe Already Exsist Please Execute 'docker network rm opengaussnetwork' " + exit 1 + } + echo "OpenGauss Database Network Created." + + conninfo="" + for ((i=1;i<=nums_of_slave;i++)) + do + ip=`expr $START + $i` + hport=`expr $HOST_PORT + 1000 \* $i` + lport=`expr $LOCAL_PORT + 1000 \* $i` + conninfo+="replconninfo$i = 'localhost=$IPPREFIX$START localport=$LOCAL_PORT localservice=$HOST_PORT remotehost=$IPPREFIX$ip remoteport=$lport remoteservice=$hport'\n" + done + echo -e $conninfo + + for ((i=0;i<=nums_of_slave;i++)) + do + if [ $i == 0 ]; then + hport=$HOST_PORT + lport=$LOCOL_PORT + ip=$START + nodeName=$MASTER_NODENAME + conn=$conninfo + role="primary" + else + hport=`expr $HOST_PORT + 1000 \* $i` + lport=`expr $LOCAL_PORT + 1000 \* $i` + ip=`expr $START + $i` + nodeName=$SLAVE_NODENAME$i + conn="replconninfo1 = 'localhost=$IPPREFIX$ip localport=$lport localservice=$hport remotehost=$IPPREFIX$START remoteport=$LOCAL_PORT remoteservice=$HOST_PORT'\n" + role="standby" + fi + docker run --network opengaussnetwork --ip $IPPREFIX$ip --privileged=true \ + --name $nodeName -h $nodeName -p $hport:$hport -d \ + -e GS_PORT=$hport \ + -e OG_SUBNET=$OG_SUBNET \ + -e GS_PASSWORD=$GS_PASSWORD \ + -e NODE_NAME=$nodeName \ + -e REPL_CONN_INFO="$conn" \ + -v $GS_DATADIR/$nodeName:/var/lib/opengauss \ + hub.oepkgs.net/a-ops/opengauss:3.0.0 -M $role \ + || echo_err_exit "ERROR: OpenGauss Database $role Docker Container was NOT successfully created." + + echo_info "OpenGauss Database $role Docker Container created." + sleep 30 + done +} + +function deploy_opengauss_server() { + echo_info "======Deploying openGauss Server======" + [ "$DEPLOY_TYPE" == "local" ] && echo_err_exit "openGauss server now not support offline deployment, aborting" + + if [ -n "${GS_DATADIR}" ] && [ ! -d "${GS_DATADIR}" ] ; then + echo_err_exit "Invalid openGauss data dir" + fi + + echo -e "[1] Pulling opengauss docker image" + docker_pull_image "opengauss:3.0.0" + + echo -e "\n[2] Creating opengauss master and slave container" + create_opengauss_master_slave + + echo -e "\n[4] Creating opengauss database and user" + systemctl restart docker # prevent iptables-related issues + container_id=$(docker ps | grep -w opengauss_master | awk '{print $1}') + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"echo create database tpccdb\; > ~/tmp.gsql\"" + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"echo create user tpcc with password \'tpcc_123456\'\; >> ~/tmp.gsql\"" + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"echo grant all privilege to tpcc\; >> ~/tmp.gsql\"" + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"echo create user opengauss_exporter with monadmin password \'opengauss_exporter123\'\; >> ~/tmp.gsql\"" + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"echo grant usage on schema dbe_perf to opengauss_exporter\; >> ~/tmp.gsql\"" + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"echo grant select on pg_stat_replication to opengauss_exporter\; >> ~/tmp.gsql\"" + + docker exec -it ${container_id} /bin/bash -c "su - omm -c \"gsql -f ~/tmp.gsql >/dev/null\"" + echo_info "======Deploying openGauss Server Done!======" +} + +#=======Gopher Deployment=======# +GOPHER_CONF='/etc/gala-gopher/gala-gopher.conf' +GOPHER_APP_CONF='/etc/gala-gopher/gala-gopher-app.conf' +PG_STAT_CONF='/etc/gala-gopher/extend_probes/pg_stat_probe.conf' +STACKPROBE_CONF='/etc/gala-gopher/extend_probes/stackprobe.conf' +function parse_arg_gopher() { + ARGS=`getopt -a -o K:p: --long kafka:,pyroscope:,docker -- "$@"` + [ $? -ne 0 ] && (print_usage; exit 1) + eval set -- "${ARGS}" + while true + do + case $1 in + -K|--kafka) + KAFKA_ADDR=$(addr_add_port $2 ${KAFKA_PORT}) + shift;; + -p|--pyroscope) + PYROSCOPE_ADDR=$(addr_add_port $2 ${PYROSCOPE_PORT}) + shift;; + --docker) + GALA_DEPLOY_MODE="docker" + ;; + --) + shift + break;; + *) + print_usage + exit 1;; + esac + shift + done +} + +download_gopher_deps() { + DOWNLOAD_DIR=$1 + + wget https://mirrors.aliyun.com/openeuler/openEuler-20.03-LTS-SP3/update/x86_64/Packages/libbpf-0.3-4.oe1.x86_64.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget https://mirrors.aliyun.com/openeuler/openEuler-22.03-LTS-SP1/EPOL/main/x86_64/Packages/flamegraph-1.0-1.oe2203sp1.noarch.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget http://121.36.84.172/dailybuild/openEuler-20.03-LTS-SP1/openEuler-20.03-LTS-SP1/EPOL/main/x86_64/Packages/cadvisor-0.37.0-2.oe1.x86_64.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget https://mirrors.aliyun.com/openeuler/openEuler-22.03-LTS-SP1/everything/x86_64/Packages/cjson-1.7.15-1.oe2203sp1.x86_64.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget http://121.36.84.172/dailybuild/openEuler-20.03-LTS-SP1/openEuler-20.03-LTS-SP1/EPOL/main/x86_64/Packages/python3-libconf-2.0.1-1.oe1.noarch.rpm -P ${DOWNLOAD_DIR} --no-check-certificate +} + +download_gopher() { + echo_info "- Download gala-gopher rpm" + DOWNLOAD_DIR=$1 + + if [ "$OS_VERSION" == "openEuler-22.03-LTS-SP1" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala/openeuler-22.03_LTS_SP1-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*oe2203sp1.*.rpm) + yum_download $gopher_local_rpm + elif [ "$OS_VERSION" == "openEuler-22.03-LTS" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala-oe2203/openeuler-22.03_LTS_SP1-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*oe2203.*.rpm) + yum_download $gopher_local_rpm + elif [ "$OS_VERSION" == "openEuler-20.03-LTS-SP1" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala-oe2003sp1/openeuler-20.03_LTS_SP3-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*oe1.*.rpm) + download_gopher_deps ${DOWNLOAD_DIR} + elif [ "$OS_VERSION" == "kylin" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala-kylin/openeuler-20.03_LTS_SP3-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*ky10.*.rpm) + download_gopher_deps ${DOWNLOAD_DIR} + else + echo_err_exit "Unsupported openEuler version, aborting!" + fi +} + +function deploy_gopher_rpm() { + echo -e "[1] Installing gala-gopher" + if [ "$DEPLOY_TYPE" == "local" ]; then + install_rpm_local_repo gala-gopher + else + mkdir -p ${WORKING_DIR}/gala-gopher-rpms + download_gopher ${WORKING_DIR}/gala-gopher-rpms + if [ "$OS_VERSION" == "openEuler-22.03-LTS-SP1" ] || [ "$OS_VERSION" == "openEuler-22.03-LTS" ] ; then + install_rpm_remote_repo $gopher_local_rpm + else + install_rpm log4cplus + install_rpm python3-requests + yum install ${WORKING_DIR}/gala-gopher-rpms/python3-libconf-2.0.1-1.oe1.noarch.rpm \ + ${WORKING_DIR}/gala-gopher-rpms/cadvisor-0.37.0-2.oe1.x86_64.rpm \ + ${WORKING_DIR}/gala-gopher-rpms/flamegraph-1.0-1.oe2203sp1.noarch.rpm \ + ${WORKING_DIR}/gala-gopher-rpms/libbpf-0.3-4.oe1.x86_64.rpm \ + $gopher_local_rpm -y + fi + fi + + echo -e "\n[2] Configuring gala-gopher" + # kafka broker + sed -i "s#kafka_broker =.*#kafka_broker = \"${KAFKA_ADDR}\"#g" ${GOPHER_CONF} + + # pg_stat_probe.conf + line=$(grep -n ' -' ${PG_STAT_CONF} | head -1 | cut -f1 -d':') + sed -i "$((line+1)),\$d" ${PG_STAT_CONF} + cat >> ${PG_STAT_CONF} << EOF + ip: "172.11.0.101" + port: "5432" + dbname: "postgres" + user: "opengauss_exporter" + password: "opengauss_exporter123" +EOF + + # add guassdb to app whitelist + if ! grep -q 'comm = "gaussdb"' ${GOPHER_APP_CONF} ; then + sed -i "/^(/a\ \t{\n\t\tcomm = \"gaussdb\",\n\t\tcmdline = \"\";\n\t}," ${GOPHER_APP_CONF} + fi + + # stackprobe.conf + sed -i "/name = \"stackprobe\"/{n;n;n;s/switch =.*/switch = \"on\"/g;}" ${GOPHER_CONF} + sed -i "s/pyroscope_server.*/pyroscope_server = \"${PYROSCOPE_ADDR}\";/g" ${STACKPROBE_CONF} + + echo -e "\n[3] Starting gala-gopher service" + systemctl restart gala-gopher || echo_err_exit "Error: fail to start gala-gopher.service" +} + +function prepare_docker_gopher_conf() { + mkdir -p /opt/gala/gopher_conf/extend_probes + + wget https://gitee.com/openeuler/gala-gopher/raw/master/config/gala-gopher.conf \ + -O /opt/gala/gopher_conf/gala-gopher.conf --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Failed to download gala-gopher.conf" + + wget https://gitee.com/openeuler/gala-gopher/raw/master/config/gala-gopher-app.conf \ + -O /opt/gala/gopher_conf/gala-gopher-app.conf --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Failed to download gala-gopher-app.conf" + + wget https://gitee.com/openeuler/gala-gopher/raw/master/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe.conf \ + -O /opt/gala/gopher_conf/extend_probes/stackprobe.conf --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Failed to download gala-gopher stackprobe.conf" +} + +function deploy_gopher_docker() { + container_name="aops-gala-gopher" + + [ "$DEPLOY_TYPE" == "local" ] && echo_err_exit "gala-gopher deployment in docker mode is only supported online" + echo -e "[1] Pulling/Loading gala-gopher docker image for ${GOPHER_DOCKER_TAG}" + gopher_tag="gala-gopher-$(uname -m):${GOPHER_DOCKER_TAG}" + docker_pull_image "${gopher_tag}" + + echo -e "\n[2] Configuring gala-gopher" + prepare_docker_gopher_conf + # kafka broker + sed -i "s#kafka_broker =.*#kafka_broker = \"${KAFKA_ADDR}\"#g" /opt/gala/gopher_conf/gala-gopher.conf + + # pg_stat_probe.conf + cat > /opt/gala/gopher_conf/extend_probes/pg_stat_probe.conf << EOF +servers: + - + ip: "172.11.0.101" + port: "5432" + dbname: "postgres" + user: "opengauss_exporter" + password: "opengauss_exporter123" +EOF + # add guassdb to app whitelist + if ! grep -q 'comm = "gaussdb"' /opt/gala/gopher_conf/gala-gopher-app.conf ; then + sed -i "/^(/a\ \t{\n\t\tcomm = \"gaussdb\",\n\t\tcmdline = \"\";\n\t}," /opt/gala/gopher_conf/gala-gopher-app.conf + fi + + # stackprobe.conf + sed -i "/name = \"stackprobe\"/{n;n;n;s/switch =.*/switch = \"on\"/g;}" /opt/gala/gopher_conf/gala-gopher.conf + sed -i "s/pyroscope_server.*/pyroscope_server = \"${PYROSCOPE_ADDR}\";/g" /opt/gala/gopher_conf/extend_probes/stackprobe.conf + + echo -e "\n[3] Creating gala-gopher container" + # Stop gala-gopher service to prevent port conflict + systemctl stop gala-gopher 2>/dev/null + docker stop ${container_name} 2>/dev/null ; docker rm ${container_name} 2>/dev/null + docker run -d --name ${container_name} --privileged \ + -v /etc:/etc -v /var:/var -v /boot:/boot:ro \ + -v /lib/modules:/lib/modules:ro -v /usr/src:/usr/src:ro \ + -v /opt/gala/gopher_conf/:/gala-gopher/user_conf/ \ + -v /sys/kernel/debug:/sys/kernel/debug -v /sys/fs/bpf:/sys/fs/bpf \ + --pid=host --network=host ${DOCKER_HUB_TAG_PREFIX}/"${gopher_tag}" + [ $? -ne 0 ] && echo_err_exit "Error: fail to run gala-gopher container" +} + +function deploy_gopher() { + echo_info "======Deploying gala-gopher(${GALA_DEPLOY_MODE})======" + if [ "x${GALA_DEPLOY_MODE}" == "xrpm" ] ; then + deploy_gopher_rpm + elif [ "x${GALA_DEPLOY_MODE}" == "xdocker" ] ; then + deploy_gopher_docker + else + echo_err_exit "Unsupported deploy mode, must be rpm or docker" + fi + echo_info "======Deploying gala-gopher Done!======" +} + + +#=======Ops Deployment=======# +ANTEATER_CONF='/etc/gala-anteater/config/gala-anteater.yaml' +SPIDER_CONF='/etc/gala-spider/gala-spider.yaml' +INFERENCE_CONF='/etc/gala-inference/gala-inference.yaml' +ANTEATER_KAFKA_IP="localhost" +ANTEATER_PROMETHEUS_IP="localhost" +function parse_arg_ops() { + ARGS=`getopt -a -o K:P:A:S: --long kafka:,prometheus:,arangodb:,docker,srcdir: -- "$@"` + [ $? -ne 0 ] && (print_usage; exit 1) + eval set -- "${ARGS}" + while true + do + case $1 in + -K|--kafka) + KAFKA_ADDR=$(addr_add_port $2 ${KAFKA_PORT}) + KAFKA_PORT=$(get_port_from_addr ${KAFKA_ADDR}) + ANTEATER_KAFKA_IP=$(get_ip_from_addr ${KAFKA_ADDR}) + shift;; + -P|--prometheus) + PROMETHEUS_ADDR=$(addr_add_port $2 ${PROMETHEUS_PORT}) + PROMETHEUS_PORT=$(get_port_from_addr ${PROMETHEUS_ADDR}) + ANTEATER_PROMETHEUS_IP=$(get_ip_from_addr ${PROMETHEUS_ADDR}) + shift;; + -A|--arangodb) + ARANGODB_ADDR=$(addr_add_port $2 ${ARANGODB_PORT}) + shift;; + -S|--srcdir) + DEPLOY_TYPE="local" + LOCAL_DEPLOY_SRCDIR=$(realpath $2) + shift;; + --docker) + GALA_DEPLOY_MODE="docker" + ;; + --) + shift + break;; + *) + print_usage + exit 1;; + esac + shift + done +} + +function deploy_ops_rpm() { + echo -e "[1] Installing gala-ops" + + install_rpm gala-ops + + echo -e "\n[2] Configuring gala-ops" + sed -i "/^Kafka:/{n;s/server:.*/server: \"${ANTEATER_KAFKA_IP}\"/g;}" ${ANTEATER_CONF} + sed -i "/^Kafka:/{n;n;s/port:.*/port: \"${KAFKA_PORT}\"/g;}" ${ANTEATER_CONF} + sed -i "/^Prometheus:/{n;s/server:.*/server: \"${ANTEATER_PROMETHEUS_IP}\"/g;}" ${ANTEATER_CONF} + sed -i "/^Prometheus:/{n;n;s/port:.*/port: \"${PROMETHEUS_PORT}\"/g;}" ${ANTEATER_CONF} + + sed -i "/^prometheus:/{n;s/base_url:.*/base_url: \"http:\/\/${PROMETHEUS_ADDR}\/\"/g;}" ${SPIDER_CONF} + sed -i "/^kafka:/{n;s/server:.*/server: \"${KAFKA_ADDR}\"/g;}" ${SPIDER_CONF} + sed -i "/db_conf:/{n;s/url:.*/url: \"http:\/\/${ARANGODB_ADDR}\"/g;}" ${SPIDER_CONF} + sed -i "s/ log_level:.*/ log_level: DEBUG/g" ${SPIDER_CONF} + + sed -i "/^prometheus:/{n;s/base_url:.*/base_url: \"http:\/\/${PROMETHEUS_ADDR}\/\"/g;}" ${INFERENCE_CONF} + sed -i "/^kafka:/{n;s/server:.*/server: \"${KAFKA_ADDR}\"/g;}" ${INFERENCE_CONF} + sed -i "/^arangodb:/{n;s/url:.*/url: \"http:\/\/${ARANGODB_ADDR}\"/g;}" ${INFERENCE_CONF} + sed -i "s/ log_level:.*/ log_level: DEBUG/g" ${INFERENCE_CONF} + + echo -e "\n[3] Starting gala-ops service" + systemctl restart gala-anteater || echo_err_exit "Error: fail to start gala-anteater service" + systemctl restart gala-spider gala-inference || echo_err_exit "Error: fail to start gala-spider or gala-inference service" +} + +function prepare_docker_anteater_config() { + mkdir -p /opt/gala/anteater_conf + if [ "$DEPLOY_TYPE" == "local" ] ; then + [ ! -f "$LOCAL_DEPLOY_SRCDIR/gala-anteater.yaml" ] && echo_err_exit "Failed to find gala-anteater local yaml file" + \cp -f $LOCAL_DEPLOY_SRCDIR/gala-anteater.yaml /opt/gala/anteater_conf/gala-anteater.yaml + else + wget https://gitee.com/openeuler/gala-anteater/raw/master/config/gala-anteater.yaml \ + -O $docker_anteater_conf --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Failed to download gala-anteater.yaml" + fi +} + +function deploy_ops_docker() { + echo -e "[1] Pulling gala-spider/gala-inference/gala-anteater docker image" + spider_tag="gala-spider-$(uname -m):1.0.1" + infer_tag="gala-inference-$(uname -m):1.0.1" + anteater_tag="gala-anteater-$(uname -m):1.0.1" + + if [ "$DEPLOY_TYPE" == "local" ] ; then + docker_load_image_file "$LOCAL_DEPLOY_SRCDIR/gala-spider-$(uname -m).tar" + docker_load_image_file "$LOCAL_DEPLOY_SRCDIR/gala-inference-$(uname -m).tar" + docker_load_image_file "$LOCAL_DEPLOY_SRCDIR/gala-anteater-$(uname -m).tar" + elif [ "$DEPLOY_TYPE" == "remote" ] ; then + docker_pull_image "${spider_tag}" + docker_pull_image "${infer_tag}" + docker_pull_image "${anteater_tag}" + fi + + echo -e "\n[2] Creating gala-spider/gala-inference/gala-anteater container" + docker stop gala-spider-demo 2>/dev/null ; docker rm gala-spider-demo 2>/dev/null + docker run -d --name gala-spider-demo \ + -e prometheus_server=${PROMETHEUS_ADDR} \ + -e arangodb_server=${ARANGODB_ADDR} \ + -e kafka_server=${KAFKA_ADDR} \ + -e log_level=DEBUG --network host ${DOCKER_HUB_TAG_PREFIX}/"${spider_tag}" + [ $? -ne 0 ] && echo_err_exit "Error: fail to run gala-spider container" + + docker stop gala-inference-demo 2>/dev/null ; docker rm gala-inference-demo 2>/dev/null + docker run -d --name gala-inference-demo \ + -e prometheus_server=${PROMETHEUS_ADDR} \ + -e arangodb_server=${ARANGODB_ADDR} \ + -e kafka_server=${KAFKA_ADDR} \ + -e log_level=DEBUG --network host ${DOCKER_HUB_TAG_PREFIX}/"${infer_tag}" + [ $? -ne 0 ] && echo_err_exit "Error: fail to run gala-inference container" + + + prepare_docker_anteater_config + docker_anteater_conf="/opt/gala/anteater_conf/gala-anteater.yaml" + sed -i "/^Kafka:/{n;s/server:.*/server: \"${ANTEATER_KAFKA_IP}\"/g;}" $docker_anteater_conf + sed -i "/^Kafka:/{n;n;s/port:.*/port: \"${KAFKA_PORT}\"/g;}" $docker_anteater_conf + sed -i "/^Prometheus:/{n;s/server:.*/server: \"${ANTEATER_PROMETHEUS_IP}\"/g;}" $docker_anteater_conf + sed -i "/^Prometheus:/{n;n;s/port:.*/port: \"${PROMETHEUS_PORT}\"/g;}" $docker_anteater_conf + + docker stop gala-anteater-demo 2>/dev/null ; docker rm gala-anteater-demo 2>/dev/null + docker run -d --name gala-anteater-demo \ + -v $docker_anteater_conf:/etc/gala-anteater/config/gala-anteater.yaml \ + --network host ${DOCKER_HUB_TAG_PREFIX}/"${anteater_tag}" + [ $? -ne 0 ] && echo_err_exit "Error: fail to run gala-anteater container" +} + + +function deploy_ops() { + echo_info "======Deploying gala-ops(${GALA_DEPLOY_MODE})======" + if [ "x${GALA_DEPLOY_MODE}" == "xrpm" ] ; then + if [ "$OFFICIAL_RELEASE" == "no" ] ; then + echo_err_exit "gala-ops deployment in rpm mode is not supported on" $OS_VERSION + fi + deploy_ops_rpm + elif [ "x${GALA_DEPLOY_MODE}" == "xdocker" ] ; then + deploy_ops_docker + else + echo_err_exit "Unsupported deploy mode, must be rpm or docker" + fi + echo_info "======Deploying gala-ops Done!======" +} + +#=======Middleware Deployment=======# +middleware_deploy_list="" +function parse_arg_middleware() { + ARGS=`getopt -a -o K:P:E:S:Ap --long kafka:,prometheus:,elastic:,arangodb,pyroscope,srcdir: -- "$@"` + [ $? -ne 0 ] && (print_usage; exit 1) + eval set -- "${ARGS}" + while true + do + case $1 in + -K|--kafka) + KAFKA_ADDR=$(addr_add_port "$2" ${KAFKA_PORT}) + middleware_deploy_list="${middleware_deploy_list}kafka " + shift;; + -P|--prometheus) + PROMETHEUS_SCRAPE_LIST="$2" + middleware_deploy_list="${middleware_deploy_list}prometheus " + shift;; + -E|--elastic) + ES_ADDR=$(addr_add_port "$2" ${ES_PORT}) + middleware_deploy_list="${middleware_deploy_list}elasticsearch " + shift;; + -p|--pyroscope) + middleware_deploy_list="${middleware_deploy_list}pyroscope " + ;; + -A|--arangodb) + middleware_deploy_list="${middleware_deploy_list}arangodb " + ;; + -S|--srcdir) + DEPLOY_TYPE="local" + LOCAL_DEPLOY_SRCDIR=$(realpath $2) + shift;; + --) + shift + break;; + *) + print_usage + exit 1;; + esac + shift + done +} + +KAFKA_VERSION='kafka_2.13-2.8.2' +KAFKA_WORKDIR="/opt/${KAFKA_VERSION}/" +function deploy_kafka() { + echo -e "[-] Deploy kafka" + echo -e "Installing..." + if [ ! -d ${KAFKA_WORKDIR} ] ; then + if ! which java > /dev/null 2>&1 ; then + install_rpm java-1.8.0-openjdk + fi + + if [ "$DEPLOY_TYPE" == "local" ] ; then + KAFKA_LOCAL_TARBALL="$LOCAL_DEPLOY_SRCDIR/${KAFKA_VERSION}.tgz" + [ ! -f "$KAFKA_LOCAL_TARBALL" ] && echo_err_exit "Error: fail to find kafka local tarball" + else + KAFKA_LOCAL_TARBALL="./${KAFKA_VERSION}.tgz" + if [ ! -f "$KAFKA_LOCAL_TARBALL" ] ; then + wget https://mirrors.aliyun.com/apache/kafka/2.8.2/${KAFKA_VERSION}.tgz --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Error: fail to download kafka tarball from official website, check proxy!" + fi + fi + tar xzf ${KAFKA_LOCAL_TARBALL} -C /opt + fi + + echo -e "Configuring..." + sed -i "0,/.*listeners=.*/s//listeners=PLAINTEXT:\/\/${KAFKA_ADDR}/" ${KAFKA_WORKDIR}/config/server.properties + + echo -e "Starting..." + cd ${KAFKA_WORKDIR} + ./bin/kafka-server-stop.sh + ./bin/zookeeper-server-stop.sh + ./bin/zookeeper-server-start.sh config/zookeeper.properties >/dev/null 2>&1 & + i=0 + while ! netstat -tunpl | grep ':2181' | grep -q 'LISTEN' ; do + sleep 5 + let i+=5 + if [ $i -ge 60 ] ; then + echo_err_exit "Fail to start zookeeper, aborting" + fi + done + ./bin/kafka-server-start.sh config/server.properties >/dev/null 2>&1 & + cd - >/dev/null +} + +PROMETHEUS_CONF='/etc/prometheus/prometheus.yml' +function deploy_prometheus2() { + echo -e "[-] Deploy prometheus2" + echo -e "Installing..." + if ! rpm -qa | grep -q "prometheus2" 2>/dev/null ; then + if [ "$DEPLOY_TYPE" == "local" ] ; then + PROMETHEUS_LOCAL_RPM="ls $LOCAL_DEPLOY_SRCDIR/prometheus2*.$(uname -m).rpm" + [ ! -f "$PROMETHEUS_LOCAL_RPM" ] && echo_err_exit "Error: fail to find prometheus2 local rpm" + yum install -y $PROMETHEUS_LOCAL_RPM + else + install_rpm prometheus2 + fi + fi + + echo -e "Configuring..." + \cp -f ${PROMETHEUS_CONF} "${PROMETHEUS_CONF}.bak" + line=$(grep -n scrape_configs ${PROMETHEUS_CONF} | cut -f1 -d':') + sed -i "$((line+1)),\$d" ${PROMETHEUS_CONF} + scrape_array=(${PROMETHEUS_SCRAPE_LIST//,/ }) + + for var in ${scrape_array[@]} + do + port="" + job_name=${var%:*} + if echo $var | grep -q ":" ; then + port=${var##*:} + fi + port=${port:-8888} + scrape_addr=${job_name##*-} + + cat >> ${PROMETHEUS_CONF} << EOF + - job_name: "${job_name}" + static_configs: + - targets: ["${scrape_addr}:${port}"] + +EOF + done + + echo -e "Starting..." + systemctl restart prometheus.service || echo_err_exit "Error: fail to start prometheus.service" +} + +function deploy_arangodb_docker() { + container_name="aops-arangodb" + if docker inspect ${container_name} >/dev/null 2>&1 ; then + echo -e "arangodb container has already been created, running" + docker start ${container_name} || echo_err_exit "Error: fail to run arangodb container" + return + fi + + echo -e "Pulling/Loading arangodb docker images" + arangodb_tag="arangodb-$(uname -m)" + if [ "$DEPLOY_TYPE" == "local" ] ; then + docker_load_image_file "$LOCAL_DEPLOY_SRCDIR/arangodb-$(uname -m).tar" + else + docker_pull_image "${arangodb_tag}" + fi + + echo -e "Creating and running arangodb container" + docker run -d --name ${container_name} -p $ARANGODB_PORT:$ARANGODB_PORT -e ARANGO_NO_AUTH=yes ${DOCKER_HUB_TAG_PREFIX}/${arangodb_tag} + [ $? -ne 0 ] && echo_err_exit "Error: fail to run arangodb container" +} + +ARANGODB_CONF='/etc/arangodb3/arangod.conf' +function deploy_arangodb() { + echo -e "[-] Deploy arangodb" + if [ "${GALA_DEPLOY_MODE}" == "docker" ] || [ "$DEPLOY_TYPE" == "local" ] ; then + deploy_arangodb_docker + return + fi + + if [ $(uname -m) != 'x86_64' ] ; then + echo_err_exit "Arangodb only available on x86_64 in rpm mode, try adding --docker" + fi + + if [ "$OFFICIAL_RELEASE" == "no" ] ; then + echo_warn "arangodb deployment in rpm mode is not supported on $OS_VERSION, try deploying with docker" + deploy_arangodb_docker + return + fi + + echo -e "Installing..." + install_rpm arangodb3 + + echo -e "Configuring..." + sed -i 's/authentication =.*/authentication = false/g' ${ARANGODB_CONF} + + echo -e "Starting..." + systemctl restart arangodb3.service || echo_err_exit "Error: fail to start arangodb3 service" +} + +function deploy_elasticsearch() { + echo_info "======Deploying Elasticsearch======" + echo -e "[1] Downloading es tarball" + + if [ "$DEPLOY_TYPE" == "local" ] ; then + ES_LOCAL_TARBALL="$LOCAL_DEPLOY_SRCDIR/elasticsearch-8.5.3-linux-$(uname -m).tar.gz" + [ ! -f "$ES_LOCAL_TARBALL" ] && echo_err_exit "Error: fail to find es local tarball" + else + ES_LOCAL_TARBALL="./elasticsearch-8.5.3-linux-$(uname -m).tar.gz" + if [ ! -f "$ES_LOCAL_TARBALL" ] ; then + wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.5.3-linux-$(uname -m).tar.gz --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Error: fail to download elasticsearch rpm from official website, check proxy!" + fi + fi + + echo -e "\n[2] Creating elasticsearch-used user/group" + groupadd elastic + useradd -g elastic elastic + \cp -f ${ES_LOCAL_TARBALL} /home/elastic + chown elastic:elastic /home/elastic/elasticsearch-8.5.3-linux-$(uname -m).tar.gz + + echo -e "\n[3] Starting es process" + kill -9 $(ps -ef | grep elasticsearch-8.5.3 | awk '{if($3==1) print $2}') 2>/dev/null + su - elastic -c "tar xzfm elasticsearch-8.5.3-linux-$(uname -m).tar.gz && \ + cd elasticsearch-8.5.3 && \ + ES_JAVA_OPTS=\"-Xms1g -Xmx1g\" ./bin/elasticsearch -E xpack.security.enabled=false -E http.host=0.0.0.0 -d" + echo_info "======Deploying Elasticsearch Done======" + deploy_logstash +} + +function deploy_logstash() { + echo_info "======Deploying Logstash======" + echo -e "[1] Downloading logstash rpm and install" + + if rpm -qa | grep -q logstash ; then + echo "logstash is already installed, skip installing..." + else + LOGSTASH_LOCAL_RPM="$LOCAL_DEPLOY_SRCDIR/logstash-8.5.3-$(uname -m).rpm" + if [ "$DEPLOY_TYPE" == "local" ] ; then + [ ! -f "$LOGSTASH_LOCAL_RPM" ] && echo_err_exit "Error: fail to find logstash local rpm" + else + LOGSTASH_LOCAL_RPM="./logstash-8.5.3-$(uname -m).rpm" + if [ ! -f "$LOGSTASH_LOCAL_RPM" ] ; then + wget https://mirrors.aliyun.com/elasticstack/8.x/yum/8.5.3/logstash-8.5.3-$(uname -m).rpm --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Error: fail to download logstash rpm from official website, check proxy!" + fi + fi + + yum install ${LOGSTASH_LOCAL_RPM} -y || echo_err_exit "Error: fail to install $LOGSTASH_LOCAL_RPM" + fi + + echo -e "\n[2] Configure logstash" + rm -f /etc/logstash/logstash-sample.conf + cat > /etc/logstash/conf.d/kafka2es.conf << EOF +input { + kafka { + bootstrap_servers => "${KAFKA_ADDR}" + topics => ["gala_anteater_hybrid_model", "gala_cause_inference"] + group_id => "hh_group" + client_id => "hh_client" + decorate_events => "true" + } +} + +filter { + json { + source => "message" + } + + date { + match => ["Timestamp", "UNIX_MS"] + target => "@timestamp" + } +} + +output { + elasticsearch { + hosts => "${ES_ADDR}" + index => "%{[@metadata][kafka][topic]}-%{+YYYY.MM.dd}" + } +} +EOF + + echo -e "\n[3] Starting logstash service" + cd /usr/share/logstash + kill -9 $(ps -ef | grep logstash | grep 'kafka2es.conf' | awk '{print $2}') 2>/dev/null + nohup ./bin/logstash -f /etc/logstash/conf.d/kafka2es.conf & + cd - > /dev/null + echo_info "======Deploying Logstash Done======" +} + +function deploy_pyroscope() { + if ps -ef | grep -v grep | grep -q 'pyroscope server' ; then + echo_info "pyroscope is already running, skip..." + return + fi + + if which pyroscope >/dev/null; then + nohup pyroscope server & + return + fi + + if [ "$DEPLOY_TYPE" == "local" ] ; then + PYROSCOPE_LOCAL_RPM="$LOCAL_DEPLOY_SRCDIR/pyroscope-0.37.2-1-$(uname -m).rpm" + [ ! -f "$PYROSCOPE_LOCAL_RPM" ] && echo_err_exit "Error: fail to find pyroscope local rpm" + else + PYROSCOPE_LOCAL_RPM="./pyroscope-0.37.2-1-$(uname -m).rpm" + if [ ! -f "$PYROSCOPE_LOCAL_RPM" ] ; then + wget https://dl.pyroscope.io/release/pyroscope-0.37.2-1-$(uname -m).rpm --no-check-certificate + [ $? -ne 0 ] && echo_err_exit "Error: fail to download pyroscope rpm from official website, check proxy!" + fi + fi + + yum install ${PYROSCOPE_LOCAL_RPM} -y || echo_err_exit "Error: fail to install $PYROSCOPE_LOCAL_RPM" + + nohup pyroscope server & +} + +function deploy_middleware() { + echo_info "======Deploying MiddleWare======" + [[ "${middleware_deploy_list}" =~ "kafka" ]] && deploy_kafka + [[ "${middleware_deploy_list}" =~ "prometheus" ]] && deploy_prometheus2 + [[ "${middleware_deploy_list}" =~ "arangodb" ]] && deploy_arangodb + [[ "${middleware_deploy_list}" =~ "elasticsearch" ]] && deploy_elasticsearch + [[ "${middleware_deploy_list}" =~ "pyroscope" ]] && deploy_pyroscope + + echo_info "======Deploying MiddleWare Done!======" +} + +#=======Grafana Deployment=======# +function parse_arg_grafana() { + ARGS=`getopt -a -o P:p:E:A:S: --long prometheus:,pyroscope:,elastic:,arangodb:srcdir: -- "$@"` + [ $? -ne 0 ] && ( print_usage; exit 1 ) + eval set -- "${ARGS}" + while true + do + case $1 in + -P|--prometheus) + PROMETHEUS_ADDR=$(addr_add_port "$2" ${PROMETHEUS_PORT}) + shift;; + -p|--pyroscope) + PYROSCOPE_ADDR=$(addr_add_port "$2" ${PYROSCOPE_PORT}) + shift;; + -E|--elastic) + ES_ADDR=$(addr_add_port "$2" ${ES_PORT}) + shift;; + -A|--arangodb) + ARANGODB_ADDR=$(addr_add_port "$2" ${ARANGODB_PORT}) + shift;; + -S|--srcdir) + DEPLOY_TYPE="local" + LOCAL_DEPLOY_SRCDIR=$(realpath $2) + shift;; + --) + shift + break;; + *) + print_usage + exit 1;; + esac + shift + done +} + +function get_grafana_datasourcesID_byName() { + d_name="$1" + + d_id=$(curl -X GET -H "Content-Type: application/json" http://admin:admin@localhost:3000/api/datasources/id/${d_name} 2>/dev/null) + if echo $d_id | grep -q 'not found' ; then + echo_err_exit "Failed to find datasource ${d_name} in grafana" + fi + + d_id=${d_id##*:} + d_id=${d_id%?} + echo $d_id +} + + +function deploy_grafana() { + container_name="aops-grafana" + + echo_info "======Deploying Grafana======" + + if docker inspect ${container_name} >/dev/null 2>&1 ; then + docker stop ${container_name} + docker rm ${container_name} + fi + + echo -e "\n[1] Pulling/Loading grafana docker image" + if [ "$DEPLOY_TYPE" == "local" ] ; then + docker_load_image_file "$LOCAL_DEPLOY_SRCDIR/grafana-$(uname -m).tar" + elif [ "$DEPLOY_TYPE" == "remote" ] ; then + docker_pull_image "grafana" + fi + + echo -e "\n[2] Creating grafana container" + docker stop ${container_name} 2>/dev/null ; docker rm ${container_name} 2>/dev/null + docker run -d --name ${container_name} --network host ${DOCKER_HUB_TAG_PREFIX}/grafana + [ $? -ne 0 ] && echo_err_exit "Error: fail to run grafana container" + + echo -e "\n[3] Configuring datasources" + i=0 + while ! netstat -tunpl | grep ':3000' | grep 'LISTEN' | grep -q 'grafana' ; do + sleep 1 + let i+=1 + if [ $i -ge 10 ] ; then + echo_err_exit "Fail to connect grafana, check container status" + fi + done + + name="Prometheus-dfs" + id=$(get_grafana_datasourcesID_byName ${name}) + result=$(curl -X PUT -H "Content-Type: application/json" -d '{"id":'${id}',"name":"'${name}'","type":"prometheus", +"access":"proxy","url":"http://'${PROMETHEUS_ADDR}'","user":"","database":"", +"basicAuth":false,"isDefault":true,"jsonData":{"httpMethod":"POST"},"readOnly":false}' \ +http://admin:admin@localhost:3000/api/datasources/${id} 2>/dev/null) + if ! echo $result | grep -q 'Datasource updated' ; then + echo_err_exit "Fail to update ${name} datesource in grafana" + fi + + name="pyroscope-datasource" + id=$(get_grafana_datasourcesID_byName ${name}) + result=$(curl -X PUT -H "Content-Type: application/json" -d '{"id":'${id}',"name":"'${name}'","type":"pyroscope-datasource", +"access":"proxy","url":"","user":"","database":"","basicAuth":false,"isDefault":false, +"jsonData":{"path":"http://'${PYROSCOPE_ADDR}'"},"readOnly":false}' \ +http://admin:admin@localhost:3000/api/datasources/${id} 2>/dev/null) + if ! echo $result | grep -q 'Datasource updated' ; then + echo_err_exit "Fail to update ${name} datesource in grafana" + fi + + name="Elasticsearch-anteater_hybrid_model" + id=$(get_grafana_datasourcesID_byName ${name}) + result=$(curl -X PUT -H "Content-Type: application/json" -d '{"id":'${id}',"name":"'${name}'","type":"elasticsearch", +"access":"proxy","url":"http://'${ES_ADDR}'","user":"", +"database":"[gala_anteater_hybrid_model-]YYYY.MM.DD","basicAuth":false,"isDefault":false, +"jsonData":{"includeFrozen":false,"interval":"Daily","logLevelField":"","logMessageField":"","maxConcurrentShardRequests":5,"timeField":"@timestamp"}, +"readOnly":false}' http://admin:admin@localhost:3000/api/datasources/${id} 2>/dev/null) + if ! echo $result | grep -q 'Datasource updated' ; then + echo_err_exit "Fail to update ${name} datesource in grafana" + fi + + name="Elasticsearch-cause_inference" + id=$(get_grafana_datasourcesID_byName ${name}) + result=$(curl -X PUT -H "Content-Type: application/json" -d '{"id":'${id}',"name":"'${name}'","type":"elasticsearch", +"access":"proxy","url":"http://'${ES_ADDR}'","user":"", +"database":"[gala_cause_inference-]YYYY.MM.DD","basicAuth":false,"isDefault":false, +"jsonData":{"includeFrozen":false,"interval":"Daily","logLevelField":"","logMessageField":"","maxConcurrentShardRequests":5,"timeField":"@timestamp"}, +"readOnly":false}' http://admin:admin@localhost:3000/api/datasources/${id} 2>/dev/null) + if ! echo $result | grep -q 'Datasource updated' ; then + echo_err_exit "Fail to update ${name} datesource in grafana" + fi + + name="Elasticsearch-cause_inference_top" + id=$(get_grafana_datasourcesID_byName ${name}) + result=$(curl -X PUT -H "Content-Type: application/json" -d '{"id":'${id}',"name":"'${name}'","type":"elasticsearch", +"access":"proxy","url":"http://'${ES_ADDR}'","user":"", +"database":"[gala_cause_inference-]YYYY.MM.DD","basicAuth":false,"isDefault":false, +"jsonData":{"includeFrozen":false,"interval":"Daily","logLevelField":"","logMessageField":"_source.Resource.top1","maxConcurrentShardRequests":5,"timeField":"@timestamp"}, +"readOnly":false}' http://admin:admin@localhost:3000/api/datasources/${id} 2>/dev/null) + if ! echo $result | grep -q 'Datasource updated' ; then + echo_err_exit "Fail to update ${name} datesource in grafana" + fi + + name="Elasticsearch-graph" + id=$(get_grafana_datasourcesID_byName ${name}) + result=$(curl -X PUT -H "Content-Type: application/json" -d '{"id":'${id}',"name":"'${name}'","type":"elasticsearch", +"access":"proxy","url":"http://'${ES_ADDR}'","user":"", +"database":"aops_graph2","basicAuth":false,"isDefault":false, +"jsonData":{"includeFrozen":false,"logLevelField":"","logMessageField":"_source","maxConcurrentShardRequests":5,"timeField":"timestamp"}, +"readOnly":false}' http://admin:admin@localhost:3000/api/datasources/${id} 2>/dev/null) + if ! echo $result | grep -q 'Datasource updated' ; then + echo_err_exit "Fail to update ${name} datesource in grafana" + fi + + # Create topo graph es resources + curl -X PUT "${ES_ADDR}/aops_graph2?pretty" >/dev/null 2>&1 + + # Running daemon that transfrom arangodb to es + if ! which pip3 >/dev/null ; then + install_rpm python3-pip + fi + if [ "$DEPLOY_TYPE" == "local" ] ; then + pushd $LOCAL_DEPLOY_SRCDIR + pip3 install -q elasticsearch python-arango pytz pyArango --no-index --find-links=./ + popd + else + pip3 install -q elasticsearch python-arango pytz pyArango -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com + fi + [ $? -ne 0 ] && echo_err_exit "Fail to pip install dependencies for arangodb2es.py" + + mkdir -p /opt/gala + \cp -f ${WORKING_DIR}/arangodb2es.py /opt/gala + sed -i "s/self.arangodbUrl =.*/self.arangodbUrl = 'http:\/\/${ARANGODB_ADDR}'/g" /opt/gala/arangodb2es.py + sed -i "s/self.esUrl =.*/self.esUrl = 'http:\/\/${ES_ADDR}'/g" /opt/gala/arangodb2es.py + sed -i "s/self.promethusUrl =.*/self.promethusUrl = 'http:\/\/${PROMETHEUS_ADDR}'/g" /opt/gala/arangodb2es.py + + kill -9 $(ps -ef | grep arangodb2es.py | grep -v grep | awk '{print $2}') 2>/dev/null + python3 /opt/gala/arangodb2es.py >/dev/null 2>&1 & + + echo_info "======Deploying Grafana Done======" +} + +#=======Main=======# +function detect_openEuler_version() { + OS_VERSION=$(cat /etc/openEuler-latest | head -n1 | awk -F= '{print $2}' 2> /dev/null) + if [ "$OS_VERSION" == "openEuler-22.03-LTS-SP1" ] ; then + GOPHER_DOCKER_TAG="22.03-lts-sp1" + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/$OS_VERSION" + elif [ "$OS_VERSION" == "openEuler-22.03-LTS" ] ; then + GOPHER_DOCKER_TAG="22.03-lts" + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/openEuler-22.03-LTS-SP1" + OFFICIAL_RELEASE="no" + elif [ "$OS_VERSION" == "openEuler-20.03-LTS-SP1" ] ; then + GOPHER_DOCKER_TAG="20.03-lts-sp1" + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/$OS_VERSION" + OFFICIAL_RELEASE="no" + else + echo_err_exit "Unsupported openEuler version, aborting!" + fi +} + +function detect_os() { + if [ "$(uname -m)" != "x86_64" ] ; then + echo_err_exit "Deployment Tool now only support x86_64, aborting!" + fi + + OS_TYPE=$(cat /etc/os-release | grep '^ID=' | awk -F '\"' '{print $2}') + [ -z "$OS_TYPE" ] && echo_err_exit "Unsupport OS type, aborting!" + + if [ "x$OS_TYPE" == "xopenEuler" ] ; then + detect_openEuler_version + elif [ "x$OS_TYPE" == "xkylin" ] ; then + OS_VERSION="$OS_TYPE" + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/openEuler-20.03-LTS" + OFFICIAL_RELEASE="no" + GOPHER_DOCKER_TAG="kylin-v10" + elif [ "x$OS_TYPE" == "xeuleros" ] ; then + # TODO: support euleros + OS_VERSION="$OS_TYPE" + echo_err_exit "Unsupport OS type, aborting" + else + echo_err_exit "Unsupport OS type, aborting" + fi +} + + +COMPONENT="$1" +shift + +detect_os +case "x$COMPONENT" in + xgopher) + parse_arg_gopher $@ + deploy_gopher + ;; + xops) + parse_arg_ops $@ + deploy_ops + ;; + xopengauss) + parse_arg_opengauss_server $@ + deploy_opengauss_server + ;; + xmiddleware) + parse_arg_middleware $@ + deploy_middleware + ;; + xgrafana) + parse_arg_grafana $@ + deploy_grafana + ;; + x) + echo "Must specify a componet to be deployed!" + print_usage + exit 1 + ;; + *) + echo "Unsupport component:" $COMPONENT + print_usage + exit 1 + ;; +esac diff --git a/deploy/download_offline_res.sh b/deploy/download_offline_res.sh new file mode 100755 index 0000000..838e5e6 --- /dev/null +++ b/deploy/download_offline_res.sh @@ -0,0 +1,331 @@ +#!/bin/bash +set +e + +OS_TYPE="" +OS_VERSION="" +WORKING_DIR=$(realpath $(dirname $0)) +DOWNLOAD_DIR="" +DOWNLOAD_DIR_MW=$WORKING_DIR/gala_deploy_middleware +DOWNLOAD_DIR_OPS=$WORKING_DIR/gala_deploy_ops +DOWNLOAD_DIR_GRAFANA=$WORKING_DIR/gala_deploy_grafana +DOWNLOAD_DIR_GOPHER=$WORKING_DIR/gala_deploy_gopher +DOCKER_HUB='hub.oepkgs.net' +DOCKER_HUB_TAG_PREFIX="${DOCKER_HUB}/a-ops" +DOWNLOAD_COMPONENT_LIST="" +REMOTE_REPO_PREFIX="http://mirrors.aliyun.com/openeuler/" +EPOL_REPO="" +EPOL_UPDATE_REPO="" + +function echo_err_exit() { + echo -e "\e[31m $@ \e[0m" + exit 1; +} + +function echo_info() { + echo -e "\e[32m $@ \e[0m" +} + +function echo_warn() { + echo -e "\e[33m $@ \e[0m" +} + +function yum_download() { + rpm="$1" + repo_path="" + + if echo $REMOTE_REPO_PREFIX | grep -q "openEuler-22.03-LTS-SP1" ; then + EPOL_REPO=$REMOTE_REPO_PREFIX/EPOL/main/$(uname -m) + EPOL_UPDATE_REPO=$REMOTE_REPO_PREFIX/EPOL/update/main/$(uname -m) + else + EPOL_REPO=$REMOTE_REPO_PREFIX/EPOL/$(uname -m) + EPOL_UPDATE_REPO=$REMOTE_REPO_PREFIX/EPOL/update/$(uname -m) + fi + + repo_path="--repofrompath=epol_deploy,$EPOL_REPO \ + --repofrompath=epol_update_deploy,$EPOL_UPDATE_REPO \ + --repofrompath=everything_deploy,$REMOTE_REPO_PREFIX/everything/$(uname -m) \ + --repofrompath=update_deploy,$REMOTE_REPO_PREFIX/update/$(uname -m)" + + echo $repo_path + yumdownloader --resolve $rpm $repo_path --destdir=${DOWNLOAD_DIR} --nogpgcheck -b + [ $? -ne 0 ] && echo_err_exit "Error: failed to download $rpm, please check repo!" +} + +function install_rpm() { + rpm="$1" + repo_path="" + + if rpm -qa | grep -q $rpm 2>/dev/null ; then + return + fi + + if echo $REMOTE_REPO_PREFIX | grep -q "openEuler-22.03-LTS-SP1" ; then + EPOL_REPO=$REMOTE_REPO_PREFIX/EPOL/main/$(uname -m) + EPOL_UPDATE_REPO=$REMOTE_REPO_PREFIX/EPOL/update/main/$(uname -m) + else + EPOL_REPO=$REMOTE_REPO_PREFIX/EPOL/$(uname -m) + EPOL_UPDATE_REPO=$REMOTE_REPO_PREFIX/EPOL/update/$(uname -m) + fi + + repo_path="--repofrompath=epol_deploy,$EPOL_REPO \ + --repofrompath=epol_update_deploy,$EPOL_UPDATE_REPO \ + --repofrompath=everything_deploy,$REMOTE_REPO_PREFIX/everything/$(uname -m) \ + --repofrompath=update_deploy,$REMOTE_REPO_PREFIX/update/$(uname -m)" + + echo $repo_path + yum install -y $rpm $repo_path --nogpgcheck + [ $? -ne 0 ] && echo_err_exit "Error: failed to install $rpm, please check repo!" +} + +function config_docker() { + if ! grep "^INSECURE_REGISTRY" /etc/sysconfig/docker | grep -q "${DOCKER_HUB}" ; then + cat >> /etc/sysconfig/docker << EOF +INSECURE_REGISTRY='--insecure-registry ${DOCKER_HUB}' +EOF + systemctl daemon-reload + systemctl restart docker || echo_err_exit "Error: fail to configure docker" + fi +} + +function docker_pull_image() { + tag_name="$1" + + if ! docker --version >/dev/null 2>&1 ; then + echo_warn "Docker cmd not found, try installing docker firstly" + yum install docker -y + [ $? -ne 0 ] ; echo_err_exit "Error: fail to install docker" + fi + config_docker + docker pull ${DOCKER_HUB_TAG_PREFIX}/"${tag_name}" + [ $? -ne 0 ] && echo_err_exit "Error: failed to pull docker image:" $tag_name +} + + +download_ops_image() { + echo_info "- Download gala-anteater docker image" + if [ ! -f ${DOWNLOAD_DIR}/gala-anteater-$(uname -m).tar ] ; then + echo " [1] Executing \"docker pull ${DOCKER_HUB_TAG_PREFIX}/gala-anteater-$(uname -m)\"" + docker_pull_image "gala-anteater-$(uname -m):1.0.1" + + echo " [2] Executing \"docker save -o ${DOWNLOAD_DIR}/gala-anteater-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/gala-anteater-$(uname -m):1.0.1\"" + docker save -o ${DOWNLOAD_DIR}/gala-anteater-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/gala-anteater-$(uname -m):1.0.1 + fi + + echo_info "- Download gala-spider docker image" + if [ ! -f ${DOWNLOAD_DIR}/gala-spider-$(uname -m).tar ] ; then + echo " [1] Executing \"docker pull ${DOCKER_HUB_TAG_PREFIX}/gala-spider-$(uname -m)\"" + docker_pull_image "gala-spider-$(uname -m):1.0.1" + + echo " [2] Executing \"docker save -o ${DOWNLOAD_DIR}/gala-spider-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/gala-spider-$(uname -m):1.0.1\"" + docker save -o ${DOWNLOAD_DIR}/gala-spider-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/gala-spider-$(uname -m):1.0.1 + fi + + echo_info "- Download gala-inference docker image" + if [ ! -f ${DOWNLOAD_DIR}/gala-inference-$(uname -m).tar ] ; then + echo " [1] Executing \"docker pull ${DOCKER_HUB_TAG_PREFIX}/gala-inference-$(uname -m)\"" + docker_pull_image "gala-inference-$(uname -m):1.0.1" + + echo " [2] Executing \"docker save -o ${DOWNLOAD_DIR}/gala-inference-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/gala-inference-$(uname -m):1.0.1\"" + docker save -o ${DOWNLOAD_DIR}/gala-inference-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/gala-inference-$(uname -m):1.0.1 + fi + + wget https://gitee.com/openeuler/gala-anteater/raw/master/config/gala-anteater.yaml -O ${DOWNLOAD_DIR}/gala-anteater.yaml --no-check-certificate +} + +download_kafka_tarball() { + KAFKA_VERSION='kafka_2.13-2.8.2' + echo_info "- Download $KAFKA_VERSION tarball" + if [ ! -f ${DOWNLOAD_DIR}/${KAFKA_VERSION}.tgz ] ; then + echo " Executing \"wget https://mirrors.aliyun.com/apache/kafka/2.8.2/${KAFKA_VERSION}.tgz\"" + wget https://mirrors.aliyun.com/apache/kafka/2.8.2/${KAFKA_VERSION}.tgz -P ${DOWNLOAD_DIR} --no-check-certificate + fi + + if [ ! -f ${DOWNLOAD_DIR}/java-1.8.0-openjdk*.rpm ] ; then + yum_download java-1.8.0-openjdk + install_rpm createrepo + createrepo ${DOWNLOAD_DIR} + fi +} + + +download_prometheus() { + echo_info "- Download prometheus2 rpm" + if [ ! -f ${DOWNLOAD_DIR}/prometheus2*.$(uname -m).rpm ] ; then + echo " Executing \"yumdownloader prometheus2\"" + yum_download prometheus2 + fi +} + +download_es_logstash() { + echo_info "- Download elasticsearch(8.5.3) tarball" + if [ ! -f ${DOWNLOAD_DIR}/elasticsearch-8.5.3-linux-$(uname -m).tar.gz ] ; then + echo " Executing \"wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.5.3-linux-$(uname -m).tar.gz\"" + wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.5.3-linux-$(uname -m).tar.gz -P ${DOWNLOAD_DIR} --no-check-certificate + fi + + echo_info "- Download logstash(8.5.3) rpm" + if [ ! -f ${DOWNLOAD_DIR}/logstash-8.5.3-$(uname -m).rpm ] ; then + echo " Executing \"wget https://mirrors.aliyun.com/elasticstack/8.x/yum/8.5.3/logstash-8.5.3-$(uname -m).rpm\"" + wget https://mirrors.aliyun.com/elasticstack/8.x/yum/8.5.3/logstash-8.5.3-$(uname -m).rpm -P ${DOWNLOAD_DIR} --no-check-certificate + fi +} + +download_arangodb_image() { + echo_info "- Download arangodb docker image" + if [ ! -f ${DOWNLOAD_DIR}/arangodb-$(uname -m).tar ] ; then + echo " [1] Executing \"docker pull ${DOCKER_HUB_TAG_PREFIX}/arangodb-$(uname -m)\"" + docker_pull_image "arangodb-$(uname -m)" + + echo " [2] Executing \"docker save -o ${DOWNLOAD_DIR}/arangodb-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/arangodb-$(uname -m)\"" + docker save -o ${DOWNLOAD_DIR}/arangodb-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/arangodb-$(uname -m) + fi +} + +download_pyroscope() { + echo_info "- Download pyroscope rpm" + if [ ! -f ${DOWNLOAD_DIR}/pyroscope-0.37.2-1-$(uname -m).rpm ] ; then + echo " Executing \"wget https://dl.pyroscope.io/release/pyroscope-0.37.2-1-$(uname -m).rpm\"" + wget https://dl.pyroscope.io/release/pyroscope-0.37.2-1-$(uname -m).rpm -P ${DOWNLOAD_DIR} --no-check-certificate + fi +} + + +download_grafana_image() { + echo_info "- Download grafana docker image" + if [ ! -f ${DOWNLOAD_DIR}/grafana-$(uname -m).tar ] ; then + echo " [1] Executing \"docker pull ${DOCKER_HUB_TAG_PREFIX}/grafana:latest\"" + docker_pull_image "grafana" + + echo " [2] Executing \"docker save -o ${DOWNLOAD_DIR}/grafana-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/grafana\"" + docker save -o ${DOWNLOAD_DIR}/grafana-$(uname -m).tar ${DOCKER_HUB_TAG_PREFIX}/grafana + fi + + echo_info "- Download python dep for arangodb2es.py" + if ! which pip3 >/dev/null ; then + yum install -y python3-pip + fi + pip3 download elasticsearch python-arango pytz pyArango \ + -d ${DOWNLOAD_DIR} -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com +} + +download_gopher_deps() { + wget https://mirrors.aliyun.com/openeuler/openEuler-20.03-LTS-SP3/update/x86_64/Packages/libbpf-0.3-4.oe1.x86_64.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget https://mirrors.aliyun.com/openeuler/openEuler-22.03-LTS-SP1/EPOL/main/x86_64/Packages/flamegraph-1.0-1.oe2203sp1.noarch.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget http://121.36.84.172/dailybuild/openEuler-20.03-LTS-SP1/openEuler-20.03-LTS-SP1/EPOL/main/x86_64/Packages/cadvisor-0.37.0-2.oe1.x86_64.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget https://mirrors.aliyun.com/openeuler/openEuler-22.03-LTS-SP1/everything/x86_64/Packages/cjson-1.7.15-1.oe2203sp1.x86_64.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + wget http://121.36.84.172/dailybuild/openEuler-20.03-LTS-SP1/openEuler-20.03-LTS-SP1/EPOL/main/x86_64/Packages/python3-libconf-2.0.1-1.oe1.noarch.rpm -P ${DOWNLOAD_DIR} --no-check-certificate + yum_download log4cplus + yum_download python3-requests +} + +download_gopher() { + echo_info "- Download gala-gopher rpm" + gopher_local_rpm="" + + if [ "$OS_VERSION" == "openEuler-22.03-LTS-SP1" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala/openeuler-22.03_LTS_SP1-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*oe2203sp1.*.rpm) + yum_download $gopher_local_rpm + elif [ "$OS_VERSION" == "openEuler-22.03-LTS" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala-oe2203/openeuler-22.03_LTS_SP1-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*oe2203.*.rpm) + yum_download $gopher_local_rpm + elif [ "$OS_VERSION" == "openEuler-20.03-LTS-SP1" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala-oe2003sp1/openeuler-20.03_LTS_SP3-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*oe1.*.rpm) + download_gopher_deps + elif [ "$OS_VERSION" == "kylin" ] ; then + yumdownloader --repofrompath="gala_eur,https://eur.openeuler.openatom.cn/results/Vchanger/gala-kylin/openeuler-20.03_LTS_SP3-x86_64/" gala-gopher \ + --destdir=${DOWNLOAD_DIR} -b + gopher_local_rpm=$(ls ${DOWNLOAD_DIR}/gala-gopher*ky10.*.rpm) + download_gopher_deps + else + echo_err_exit "Unsupported openEuler version, aborting!" + fi + + createrepo ${DOWNLOAD_DIR} +} + +function detect_openEuler_version() { + OS_VERSION=$(cat /etc/openEuler-latest | head -n1 | awk -F= '{print $2}' 2> /dev/null) + if [ "$OS_VERSION" == "openEuler-22.03-LTS-SP1" ] ; then + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/$OS_VERSION" + elif [ "$OS_VERSION" == "openEuler-22.03-LTS" ] ; then + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/openEuler-22.03-LTS-SP1" + elif [ "$OS_VERSION" == "openEuler-20.03-LTS-SP1" ] ; then + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/$OS_VERSION" + else + echo_err_exit "Unsupported openEuler version, aborting!" + fi +} + +function detect_os() { + if [ "$(uname -m)" != "x86_64" ] ; then + echo_err_exit "Deployment Tool now only support x86_64, aborting!" + fi + + OS_TYPE=$(cat /etc/os-release | grep '^ID=' | awk -F '\"' '{print $2}') + [ -z "$OS_TYPE" ] && echo_err_exit "Unsupport OS type, aborting!" + + if [ "x$OS_TYPE" == "xopenEuler" ] ; then + detect_openEuler_version + elif [ "x$OS_TYPE" == "xkylin" ] ; then + OS_VERSION="$OS_TYPE" + REMOTE_REPO_PREFIX="$REMOTE_REPO_PREFIX/openEuler-20.03-LTS-SP1" + OFFICIAL_RELEASE="no" + else + echo_err_exit "Unsupport OS type, aborting" + fi +} + +detect_os +component=$1 +case "x$component" in + xgopher) + DOWNLOAD_COMPONENT_LIST="${DOWNLOAD_COMPONENT_LIST}gopher " + DOWNLOAD_DIR=$DOWNLOAD_DIR_GOPHER + ;; + xops) + DOWNLOAD_COMPONENT_LIST="${DOWNLOAD_COMPONENT_LIST}ops " + DOWNLOAD_DIR=$DOWNLOAD_DIR_OPS + ;; + xmiddleware) + DOWNLOAD_COMPONENT_LIST="${DOWNLOAD_COMPONENT_LIST}middleware " + DOWNLOAD_DIR=$DOWNLOAD_DIR_MW + ;; + xgrafana) + DOWNLOAD_COMPONENT_LIST="${DOWNLOAD_COMPONENT_LIST}grafana " + DOWNLOAD_DIR=$DOWNLOAD_DIR_GRAFANA + ;; + x) + echo_err_exit "Must specify download component" + ;; + *) + echo_err_exit "Unsupport component:" $component + ;; +esac + + +mkdir -p ${DOWNLOAD_DIR} +if [[ "${DOWNLOAD_COMPONENT_LIST}" =~ "middleware" ]] ; then + download_kafka_tarball + download_prometheus + download_es_logstash + download_arangodb_image + download_pyroscope +fi + +if [[ "${DOWNLOAD_COMPONENT_LIST}" =~ "grafana" ]] ; then + download_grafana_image +fi + +if [[ "${DOWNLOAD_COMPONENT_LIST}" =~ "ops" ]] ; then + download_ops_image +fi + +if [[ "${DOWNLOAD_COMPONENT_LIST}" =~ "gopher" ]] ; then + download_gopher +fi -- Gitee