diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/Dockerfile b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/Dockerfile index 9842eb85284c39efbd12b8629345a9564f85a8c5..9411ed326281034acabbcb04350ddbff320b1534 100644 --- a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/Dockerfile +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/Dockerfile @@ -3,18 +3,22 @@ FROM $BASE ARG VERSION=2.1.3 -# install accumulo -RUN curl -fSL -o accumulo.tar.gz https://dlcdn.apache.org/accumulo/${VERSION}/accumulo-${VERSION}-bin.tar.gz; \ - mkdir -p /usr/local/accumulo && \ - tar -zxf accumulo.tar.gz -C /usr/local/accumulo --strip-components=1 && \ - rm -rf accumulo.tar.gz -ENV PATH=$PATH:/usr/local/accumulo/bin +# install -y openjdk +COPY ssh/* /etc/ssh/ +RUN yum install -y java-11-openjdk-devel openssh openssh-clients sudo hostname && \ + yum clean all && \ + ssh-keygen -A && ssh-keygen -t ed25519 -P '' -f /root/.ssh/id_ed25519 && \ + cat /root/.ssh/id_ed25519.pub > /root/.ssh/authorized_keys && \ + chmod 0400 /root/.ssh/authorized_keys +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk # install zookeeper RUN curl -fSL -o zookeeper.tar.gz https://archive.apache.org/dist/zookeeper/zookeeper-3.9.3/apache-zookeeper-3.9.3-bin.tar.gz; \ mkdir -p /usr/local/zookeeper && \ tar -zxf zookeeper.tar.gz -C /usr/local/zookeeper --strip-components=1 && \ - rm -rf zookeeper.tar.gz + rm -rf zookeeper.tar.gz && \ + cd /usr/local/zookeeper/conf && \ + touch zoo.cfg && echo -e 'tickTime=2000\ndataDir=/var/lib/zookeeper\nclientPort=2181\nadmin.serverPort=8081' > zoo.cfg ENV PATH=$PATH:/usr/local/zookeeper/bin ENV ZOOKEEPER_HOME=/usr/local/zookeeper/ @@ -26,10 +30,19 @@ RUN curl -fSL -o hadoop.tar.gz https://dlcdn.apache.org/hadoop/common/hadoop-${H rm -rf hadoop.tar.gz ENV PATH=$PATH:/usr/local/hadoop/bin ENV HADOOP_HOME=/usr/local/hadoop/ +COPY hadoop/* $HADOOP_HOME/etc/hadoop/ -# install -y openjdk -RUN yum install -y java-17-openjdk-devel hostname && \ - yum clean all -ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk +# install accumulo +RUN curl -fSL -o accumulo.tar.gz https://dlcdn.apache.org/accumulo/${VERSION}/accumulo-${VERSION}-bin.tar.gz; \ + mkdir -p /usr/local/accumulo && \ + tar -zxf accumulo.tar.gz -C /usr/local/accumulo --strip-components=1 && \ + rm -rf accumulo.tar.gz +ENV PATH=$PATH:/usr/local/accumulo/bin +ENV ACCUMULO_HOME=/usr/local/accumulo/ +COPY properties/* $ACCUMULO_HOME/conf/ + +COPY entrypoint.sh / +RUN chmod +x /entrypoint.sh -CMD ["accumulo", "version"] \ No newline at end of file +ENTRYPOINT ["/entrypoint.sh"] +CMD ["init", "--upload-accumulo-props"] \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/entrypoint.sh b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..de9cb3b8462b9958eedb7508000d1a16679e38fe --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/entrypoint.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Start SSH service +/usr/sbin/sshd + +# Format HDFS +echo "Starting Zookeeper..." +"$ZOOKEEPER_HOME"/bin/zkServer.sh start + +# Format HDFS +echo "Formatting HDFS NameNode..." +hdfs namenode -format + +# Run Hadoop +echo "Starting Hadoop..." +$HADOOP_HOME/sbin/start-all.sh start +echo "Start History Server" +$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh --config $HADOOP_HOME/etc/hadoop start historyserver + +# Run accumulo with CMD +accumulo "$@" + +tail -f /dev/null \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/capacity-scheduler.xml b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/capacity-scheduler.xml new file mode 100644 index 0000000000000000000000000000000000000000..5eb4cddb0b3f079a35b06de37056322da06acaf7 --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/capacity-scheduler.xml @@ -0,0 +1,65 @@ + + + + + + yarn.scheduler.capacity.default.minimum-user-limit-percent + 100 + + + yarn.scheduler.capacity.maximum-am-resource-percent + 0.5 + + + yarn.scheduler.capacity.maximum-applications + 10000 + + + yarn.scheduler.capacity.node-locality-delay + 40 + + + yarn.scheduler.capacity.resource-calculator + org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator + + + yarn.scheduler.capacity.root.capacity + 100 + + + yarn.scheduler.capacity.root.default.capacity + 100 + + + yarn.scheduler.capacity.root.default.maximum-am-resource-percent + 0.5 + + + yarn.scheduler.capacity.root.default.maximum-capacity + 100 + + + yarn.scheduler.capacity.root.default.state + RUNNING + + + yarn.scheduler.capacity.root.default.user-limit-factor + 1 + + + yarn.scheduler.capacity.root.queues + default + + \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/core-site.xml b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/core-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..1821f54490e8bca27ecfef8c6589d123cd2d59b2 --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/core-site.xml @@ -0,0 +1,36 @@ + + + + + + + + + hadoop.tmp.dir + /data/hadoop + + + fs.defaultFS + hdfs://localhost:8020 + + + hadoop.http.staticuser.user + root + + + fs.hdfs.impl + org.apache.hadoop.hdfs.DistributedFileSystem + + \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/hadoop-env.sh b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/hadoop-env.sh new file mode 100644 index 0000000000000000000000000000000000000000..3c57ab9c5ad5e25791831501351b207720a08d5a --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/hadoop-env.sh @@ -0,0 +1,8 @@ + +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk +export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} +export HDFS_NAMENODE_USER="root" +export HDFS_DATANODE_USER="root" +export HDFS_SECONDARYNAMENODE_USER="root" +export YARN_RESOURCEMANAGER_USER="root" +export YARN_NODEMANAGER_USER="root" \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/hdfs-site.xml b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/hdfs-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..73d9f71ae7b0420f0e97436b8deb0a7dea72ff4d --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/hdfs-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + dfs.replication + 1 + + \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/mapred-site.xml b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/mapred-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..1a9adfda2ec5d1e31ac9135195f69de972ea9084 --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/mapred-site.xml @@ -0,0 +1,28 @@ + + + + + + + + + mapreduce.framework.name + yarn + + + mapreduce.application.classpath + $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/yarn-site.xml b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/yarn-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..5957ce2e16a277516fe5688bc581522e1aeff2e0 --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/hadoop/yarn-site.xml @@ -0,0 +1,61 @@ + + + + + + yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage + 98 + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.env-whitelist + JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME + + + yarn.resourcemanager.scheduler.class + org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler + + + yarn.log-aggregation-enable + true + + + yarn.log.dir + /data/hadoop + + + yarn.log.server.url + http://localhost:19888/jobhistory/logs + + + yarn.scheduler.minimum-allocation-vcores + 1 + + + yarn.scheduler.maximum-allocation-vcores + 1 + + + yarn.scheduler.minimum-allocation-mb + 256 + + + yarn.scheduler.maximum-allocation-mb + 4096 + + \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/accumulo.properties b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/accumulo.properties new file mode 100644 index 0000000000000000000000000000000000000000..2ba912a5bd1b9d4100b6cd73fe268a7a3d61e100 --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/accumulo.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the main configuration file for Apache Accumulo. Available configuration properties can be +# found in the Accumulo documentation on the Accumulo project website (https://accumulo.apache.org/) +# Link for Accumulo 2.0: https://accumulo.apache.org/docs/2.0/administration/properties + +## Sets location in HDFS where Accumulo will store data +instance.volumes=hdfs://localhost:8020/accumulo + +## Sets location of Zookeepers +instance.zookeeper.host=localhost:2181 + +## Change secret before initialization. All Accumulo servers must have same secret +instance.secret=DEFAULT + +## Set to false if 'accumulo-util build-native' fails +tserver.memory.maps.native.enabled=true + +## Trace user +trace.user=root + +## Trace password +trace.password=secret + +## Search for available port if default is unavailable +tserver.port.search=true diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/log4j-monitor.properties b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/log4j-monitor.properties new file mode 100644 index 0000000000000000000000000000000000000000..2057ede4205ccd5cab6d3ce5f0c6da41e9dd51b1 --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/log4j-monitor.properties @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Log4j 1.2 file that configures logging for Accumulo Monitor +## The system properties referenced below are configured by accumulo-env.sh + +## Define a console appender +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.Target=System.out +log4j.appender.console.Threshold=ALL +log4j.appender.console.layout.ConversionPattern=%d{ISO8601} [%-8c{2}] %-5p: %m%n +log4j.appender.console.layout=org.apache.log4j.PatternLayout + +## Define an appender for the Accumulo Monitor to log to its own web GUI +log4j.appender.gui=org.apache.accumulo.server.monitor.LogService +log4j.appender.gui.Threshold=WARN + +## Append monitor logs to its own web GUI +log4j.logger.org.apache.accumulo=INHERITED, gui + +## Append most logs to file +log4j.rootLogger=INFO, console diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/log4j-service.properties b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/log4j-service.properties new file mode 100644 index 0000000000000000000000000000000000000000..458804108e8f5342fa1dd92f8df2e554ad02bebf --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/properties/log4j-service.properties @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Log4j 1.2 file that configures logging for all Accumulo services (Master, TabletServer, GC, and Tracer) except Monitor +## The system properties referenced below are configured by accumulo-env.sh + +## Define a console appender +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.Target=System.out +log4j.appender.console.Threshold=ALL +log4j.appender.console.layout.ConversionPattern=%d{ISO8601} [%-8c{2}] %-5p: %m%n +log4j.appender.console.layout=org.apache.log4j.PatternLayout + +## Define an appender to send important logs to the the primary Accumulo Monitor +## The primary monitor is the one currently holding a shared lock in ZooKeeper, +## and is typically the one that started first. +log4j.appender.monitor=org.apache.accumulo.monitor.util.AccumuloMonitorAppender +log4j.appender.monitor.Threshold=WARN + +## Change this log level from OFF to one of the following to enable audit logging: +## INFO +## enables audit logging (inherit appenders from root logger) +## INFO, audit +## enables audit logging using the audit log appender +## (requires audit log file appender above to be uncommented) +log4j.logger.org.apache.accumulo.audit=OFF + +## Append logs to the primary Accumulo Monitor +log4j.logger.org.apache.accumulo=INHERITED, monitor + +## Constrain some particularly spammy loggers +log4j.logger.org.apache.accumulo.core.file.rfile.bcfile=INFO +log4j.logger.org.mortbay.log=WARN +log4j.logger.org.apache.zookeeper=ERROR + +## Append most logs to console +log4j.rootLogger=INFO, console diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/ssh/ssh_config b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/ssh/ssh_config new file mode 100644 index 0000000000000000000000000000000000000000..79611d11237584106f2f3a08c13a4659664afebc --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/ssh/ssh_config @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +Host * + StrictHostKeyChecking no \ No newline at end of file diff --git a/Bigdata/accumulo/2.1.3/24.03-lts-sp1/ssh/sshd_config b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/ssh/sshd_config new file mode 100644 index 0000000000000000000000000000000000000000..8a7c77b72137a4ca5e2c8a83c704c55666791dce --- /dev/null +++ b/Bigdata/accumulo/2.1.3/24.03-lts-sp1/ssh/sshd_config @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ListenAddress 0.0.0.0 \ No newline at end of file diff --git a/Bigdata/accumulo/README.md b/Bigdata/accumulo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..04d110ac36597ec7ada45b8a8c611cee143bfb0e --- /dev/null +++ b/Bigdata/accumulo/README.md @@ -0,0 +1,70 @@ +# Quick reference + +- The official accumulo docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# Accumulo | openEuler +Current accumulo docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +Apache Accumulo is a sorted, distributed key/value store that provides robust, scalable data storage and retrieval. With Apache Accumulo, users can store and manage large data sets across a cluster. + +Learn more on [accumulo website](https://accumulo.apache.org/). + +# Supported tags and respective Dockerfile links +The tag of each accumulo docker image is consist of the version of accumulo and the version of basic image. The details are as follows +| Tags | Currently | Architectures| +|--|--|--| +|[2.1.3-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/accumulo/2.1.3/24.03-lts-sp1/Dockerfile)| Apache accumulo 2.1.3 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +# Usage +Deploy a accumulo instance with pre-installed hadoop and zookeeper components by following command: +```bash +# Start Accumulo +docker run -it \ + --name accumulo \ + openeuler/accumulo:latest +``` +The following message indicates that the accumulo is ready : +``` +************************************************************/ +Starting Hadoop... +Starting namenodes on [localhost] +localhost: Warning: Permanently added 'localhost' (ED25519) to the list of known hosts. +Starting datanodes +Starting secondary namenodes [183cfa448d9e] +183cfa448d9e: Warning: Permanently added '183cfa448d9e' (ED25519) to the list of known hosts. +2025-06-27 09:44:28,879 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +Starting resourcemanager +Starting nodemanagers +Start History Server +WARNING: Use of this script to start the MR JobHistory daemon is deprecated. +WARNING: Attempting to execute replacement "mapred --daemon start" instead. +2025-06-27T09:44:36,603 [conf.SiteConfiguration] INFO : Found Accumulo configuration on classpath at /usr/local/accumulo/conf/accumulo.properties +2025-06-27T09:44:36,758 [util.NativeCodeLoader] WARN : Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2025-06-27T09:44:37,033 [fs.VolumeManagerImpl] WARN : dfs.datanode.synconclose set to false in hdfs-site.xml: data loss is possible on hard system reset or power loss +2025-06-27T09:44:37,033 [init.Initialize] INFO : Hadoop Filesystem is hdfs://localhost:8020 +2025-06-27T09:44:37,033 [init.Initialize] INFO : Accumulo data dirs are [[hdfs://localhost:8020/accumulo]] +2025-06-27T09:44:37,033 [init.Initialize] INFO : Zookeeper server is localhost:2181 +2025-06-27T09:44:37,033 [init.Initialize] INFO : Checking if Zookeeper is available. If this hangs, then you need to make sure zookeeper is running + + +Warning!!! Your instance secret is still set to the default, this is not secure. We highly recommend you change it. + + +You can change the instance secret in accumulo by using: + bin/accumulo org.apache.accumulo.server.util.ChangeSecret +You will also need to edit your secret in your configuration file by adding the property instance.secret to your accumulo.properties. Without this accumulo will not operate correctly +Instance name : + +``` + +To stop and remove the container, use these commands. +``` +docker stop accumulo +docker rm accumulo +``` + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/accumulo/doc/image-info.yml b/Bigdata/accumulo/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..231bdd2d3e8f7a9b6be8ffca403f6c1bdf8d761d --- /dev/null +++ b/Bigdata/accumulo/doc/image-info.yml @@ -0,0 +1,92 @@ +name: accumulo +category: bigdata +description: Apache Accumulo 是一个排序的分布式键/值存储,提供强大且可扩展的数据存储和检索功能。借助 Apache Accumulo,用户可以跨集群存储和管理大型数据集。 +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` +tags: | + accumulo镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures| + |------|-----------|---------------| + |[2.1.3-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/accumulo/2.1.3/24.03-lts-sp1/Dockerfile)| Apache accumulo 2.1.3 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/accumulo:{Tag} + ``` + +usage: | + - 启动容器 + + `openeuler/accumulo`镜像预装了hadoop、zookeeper,用户可以无需依赖其他容器,直接启动accumulo实例: + + ```bash + docker run -it \ + --name accumulo \ + openeuler/accumulo:{Tag} + ``` + + 出现如下信息则说明accumulo已经部署成功: + ``` + ************************************************************/ + Starting Hadoop... + Starting namenodes on [localhost] + localhost: Warning: Permanently added 'localhost' (ED25519) to the list of known hosts. + Starting datanodes + Starting secondary namenodes [183cfa448d9e] + 183cfa448d9e: Warning: Permanently added '183cfa448d9e' (ED25519) to the list of known hosts. + 2025-06-27 09:44:28,879 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable + Starting resourcemanager + Starting nodemanagers + Start History Server + WARNING: Use of this script to start the MR JobHistory daemon is deprecated. + WARNING: Attempting to execute replacement "mapred --daemon start" instead. + 2025-06-27T09:44:36,603 [conf.SiteConfiguration] INFO : Found Accumulo configuration on classpath at /usr/local/accumulo/conf/accumulo.properties + 2025-06-27T09:44:36,758 [util.NativeCodeLoader] WARN : Unable to load native-hadoop library for your platform... using builtin-java classes where applicable + 2025-06-27T09:44:37,033 [fs.VolumeManagerImpl] WARN : dfs.datanode.synconclose set to false in hdfs-site.xml: data loss is possible on hard system reset or power loss + 2025-06-27T09:44:37,033 [init.Initialize] INFO : Hadoop Filesystem is hdfs://localhost:8020 + 2025-06-27T09:44:37,033 [init.Initialize] INFO : Accumulo data dirs are [[hdfs://localhost:8020/accumulo]] + 2025-06-27T09:44:37,033 [init.Initialize] INFO : Zookeeper server is localhost:2181 + 2025-06-27T09:44:37,033 [init.Initialize] INFO : Checking if Zookeeper is available. If this hangs, then you need to make sure zookeeper is running + + + Warning!!! Your instance secret is still set to the default, this is not secure. We highly recommend you change it. + + + You can change the instance secret in accumulo by using: + bin/accumulo org.apache.accumulo.server.util.ChangeSecret + You will also need to edit your secret in your configuration file by adding the property instance.secret to your accumulo.properties. Without this accumulo will not operate correctly + Instance name : + + ``` + + - 容器交互 + + 容器启动默认执行`accumulo init --upload-accumulo-props`, 用户可根据需求通过CMD改变执行的命令,或启动时重新设置entrypoint,进入容器后进行交互: + ```bash + docker run -it \ + --name accumulo \ + --entrypoint=bash \ + openeuler/accumulo:{Tag} + ``` + + - 停止服务 + + 使用如下命令终止容器: + ``` + docker stop accumulo + docker rm accumulo + ``` + +license: Apache-2.0 license +similar_packages: + - Apache HBase: 基于 Hadoop 的分布式、可扩展的 NoSQL 数据库,适合处理大规模、实时随机读写操作。 + - Cassandra: 由 Apache 提供支持的分布式 NoSQL 数据库,用于处理大量数据,具有高可用性和无单点故障。 +dependency: + - openjdk + - hadoop + - zookeeper \ No newline at end of file diff --git a/Bigdata/accumulo/doc/picture/logo.png b/Bigdata/accumulo/doc/picture/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..5b0f6b434571b37da9a86d9d38d09297b4363fac Binary files /dev/null and b/Bigdata/accumulo/doc/picture/logo.png differ diff --git a/Bigdata/atlas/2.4.0/24.03-lts-sp1/Dockerfile b/Bigdata/atlas/2.4.0/24.03-lts-sp1/Dockerfile index e2a5fce68025cfd6af8fff55ed22282a17347e23..449ce8e401da20f7113f1308bc3b6798d0d01ced 100644 --- a/Bigdata/atlas/2.4.0/24.03-lts-sp1/Dockerfile +++ b/Bigdata/atlas/2.4.0/24.03-lts-sp1/Dockerfile @@ -3,13 +3,28 @@ FROM $BASE ARG VERSION=2.4.0 -RUN yum install -y git maven java-17-openjdk-devel make gcc g++ +RUN yum install -y git maven java-1.8.0-openjdk-devel make gcc g++ hostname && \ + yum clean all -RUN git clone -b release-${VERSION} https://github.com/apache/atlas.git - -WORKDIR /atlas ENV MAVEN_OPTS="-Xms8g -Xmx8g" -RUN mvn clean install -DskipTests -T 2C -RUN mvn clean package -Pdist -DskipTests +RUN git clone -b release-${VERSION} https://github.com/apache/atlas.git && \ + cd /atlas && \ + mvn clean install -DskipTests -T 2C && \ + mvn clean \ + -Dhttps.protocols=TLSv1.2 \ + -DskipTests \ + -Drat.skip=true \ + package -Pdist,embedded-hbase-solr && \ + mkdir -p /atlas-server && \ + tar -xzvf /atlas/distro/target/apache-atlas-${VERSION}-server.tar.gz -C /atlas-server --strip-components=1 && \ + echo -e 'export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk' >> /atlas-server/conf/atlas-env.sh && \ + mkdir -p /atlas-server/hbase/conf/ && \ + cp /atlas/dev-support/atlas-docker/scripts/hbase-site.xml /atlas-server/hbase/conf/ && \ + rm -rf /atlas + +COPY atlas_config.patch /atlas-server/ +WORKDIR /atlas-server +RUN yum install -y patch && \ + patch -p1 < atlas_config.patch -CMD ["bash"] \ No newline at end of file +EXPOSE 21000 \ No newline at end of file diff --git a/Bigdata/atlas/2.4.0/24.03-lts-sp1/atlas_config.patch b/Bigdata/atlas/2.4.0/24.03-lts-sp1/atlas_config.patch new file mode 100644 index 0000000000000000000000000000000000000000..94e0341ca11435cbebf0c31e7e0a76021d25b1e7 --- /dev/null +++ b/Bigdata/atlas/2.4.0/24.03-lts-sp1/atlas_config.patch @@ -0,0 +1,64 @@ +--- a/bin/atlas_config.py 2025-06-28 10:16:09.154265752 +0000 ++++ b/bin/atlas_config.py 2025-06-28 10:00:31.479221040 +0000 +@@ -529,15 +529,17 @@ + + def wait_for_startup(confdir, wait): + count = 0 ++ started = False + host = get_atlas_url_host(confdir) + port = get_atlas_url_port(confdir) +- while True: ++ pid_file = pidFile(atlasDir()) ++ while not started: + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect((host, int(port))) + s.close() +- break ++ started = True + except Exception as e: + # Wait for 1 sec before next ping + sys.stdout.write('.') +@@ -546,6 +548,14 @@ + + if count > wait: + s.close() ++ sys.stdout.write('\nAtlas Web-UI startup timed out! But, wait for it...') ++ sys.stdout.flush() ++ break ++ ++ if not os.path.exists(pid_file): ++ sys.stdout.write('\nApache Atlas startup failed!\nCheck logs: /apache-atlas/logs/application.log') ++ sys.stdout.flush() ++ exit() + break + + count = count + 1 +@@ -584,14 +594,14 @@ + + if zk_url is None: + if port is None: +- cmd = [os.path.join(dir, solrScript), action] ++ cmd = [os.path.join(dir, solrScript), action, '-force'] + else: +- cmd = [os.path.join(dir, solrScript), action, '-p', str(port)] ++ cmd = [os.path.join(dir, solrScript), action, '-force', '-p', str(port)] + else: + if port is None: +- cmd = [os.path.join(dir, solrScript), action, '-z', zk_url] ++ cmd = [os.path.join(dir, solrScript), action, '-force', '-z', zk_url] + else: +- cmd = [os.path.join(dir, solrScript), action, '-z', zk_url, '-p', port] ++ cmd = [os.path.join(dir, solrScript), action, '-force', '-z', zk_url, '-p', port] + + if homedir is not None: + if not os.path.exists(homedir) : +@@ -614,7 +624,7 @@ + if IS_WINDOWS: + solrScript = "solr.cmd" + +- cmd = [os.path.join(dir, solrScript), 'create', '-c', index, '-d', confdir, '-shards', solrShards(), '-replicationFactor', solrReplicationFactor()] ++ cmd = [os.path.join(dir, solrScript), 'create', '-c', index, '-d', confdir, '-shards', solrShards(), '-replicationFactor', solrReplicationFactor(), '-force'] + + return runProcess(cmd, logdir, False, wait) diff --git a/Bigdata/atlas/README.md b/Bigdata/atlas/README.md new file mode 100644 index 0000000000000000000000000000000000000000..452e9cbf632b6d6c4cc052087dbd9ffb2f9af969 --- /dev/null +++ b/Bigdata/atlas/README.md @@ -0,0 +1,54 @@ +# Quick reference + +- The official atlas docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# atlas | openEuler +Current atlas docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +Atlas is a scalable and extensible set of core foundational governance services – enabling enterprises to effectively and efficiently meet their compliance requirements within Hadoop and allows integration with the whole enterprise data ecosystem. + +Apache Atlas provides open metadata management and governance capabilities for organizations to build a catalog of their data assets, classify and govern these assets and provide collaboration capabilities around these data assets for data scientists, analysts and the data governance team. + +Learn more on [atlas website](https://atlas.apache.org/). + +# Supported tags and respective Dockerfile links +The tag of each atlas docker image is consist of the version of atlas and the version of basic image. The details are as follows +| Tags | Currently | Architectures| +|--|--|--| +|[2.4.0-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/atlas/2.4.0/24.03-lts-sp1/Dockerfile)| Apache Atlas 2.4.0 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +# Usage +Start a atlas instance by following command: +```bash +docker run -it \ + --name atlas \ + -p 21000:21000 \ + openeuler/atlas:latest +``` + +Running Apache Atlas with Local Apache HBase & Apache Solr +``` +bin/atlas_start.py +``` + +To verify if Apache Atlas server is up and running, run curl command as shown below: +``` +curl -u {username}:{password} http://localhost:21000/api/atlas/admin/version +``` +This will return result like +``` +{"Description":"Metadata Management and Data Governance Platform over Hadoop","Version":"2.2.0","Name":"apache-atlas"} +``` + +Access Apache Atlas UI using a browser: `http://localhost:21000`, the default credentials: `admin / admin`. + +To stop Apache Atlas, run following command: +``` +bin/atlas_stop.py +``` + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/atlas/doc/image-info.yml b/Bigdata/atlas/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..33f64a8654b3e0a2bfa0fe5a2999d76a50ea1cad --- /dev/null +++ b/Bigdata/atlas/doc/image-info.yml @@ -0,0 +1,67 @@ +name: atlas +category: bigdata +description: Atlas 是一套可扩展且易于扩展的核心基础治理服务,使企业能够有效且高效地满足 Hadoop 内部的合规性要求,并允许与整个企业数据生态系统集成。Apache Atlas 为组织提供开放的元数据管理和治理功能,用于构建其数据资产目录、对这些资产进行分类和管理,并为数据科学家、分析师和数据治理团队提供围绕这些数据资产的协作能力。 +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` +tags: | + atlas镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures| + |------|-----------|---------------| + |[2.4.0-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/atlas/2.4.0/24.03-lts-sp1/Dockerfile)| Apache Atlas 2.4.0 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/atlas:{Tag} + ``` + +usage: | + - 启动容器 + + ```bash + docker run -it \ + --name atlas \ + -p 21000:21000 \ + openeuler/atlas:{Tag} + ``` + + 使用容器内的Apache HBase和Apache Solr运行Apache Atlas: + ``` + export MANAGE_LOCAL_HBASE=true + export MANAGE_LOCAL_SOLR=true + bin/atlas_start.py + ``` + + 使用如下命令验证Atlas是否已经正常运行: + ``` + # 运行命令 + curl -u {username}:{password} http://localhost:21000/api/atlas/admin/version + + # 返回如下信息则正常 + {"Description":"Metadata Management and Data Governance Platform over Hadoop","Version":"2.2.0","Name":"apache-atlas"} + ``` + 默认登录用户名`{username}:{password}`为:`admin:admin` + + - webUI访问: + + 浏览器地址栏输入:`http://localhost:21000`来可视化管理Altas, 默认登录用户名/密码:`admin/admin`。 + + - 停止服务 + + 使用如下命令停止Atlas服务: + ``` + bin/atlas_stop.py + ``` + +license: Apache-2.0 license +similar_packages: + - Apache HBase: 基于 Hadoop 的分布式、可扩展的 NoSQL 数据库,适合处理大规模、实时随机读写操作。 + - Cassandra: 由 Apache 提供支持的分布式 NoSQL 数据库,用于处理大量数据,具有高可用性和无单点故障。 +dependency: + - openjdk + - hadoop + - zookeeper \ No newline at end of file diff --git a/Bigdata/atlas/doc/picture/logo.png b/Bigdata/atlas/doc/picture/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..7fc7392f681b5c6208fd6a0259d3675edacca37f Binary files /dev/null and b/Bigdata/atlas/doc/picture/logo.png differ diff --git a/Bigdata/avro/README.md b/Bigdata/avro/README.md new file mode 100644 index 0000000000000000000000000000000000000000..43b58342f6d27add6adfc8061c21fc4217944219 --- /dev/null +++ b/Bigdata/avro/README.md @@ -0,0 +1,32 @@ +# Quick reference + +- The official avro docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# avro | openEuler +Current avro docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +Apache Avro is the leading serialization format for record data, and first choice for streaming data pipelines. + +Learn more on [avro website](https://avro.apache.org/). + +# Supported tags and respective Dockerfile links +The tag of each avro docker image is consist of the version of avro and the version of basic image. The details are as follows +| Tags | Currently | Architectures| +|--|--|--| +|[1.12.0-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/avro/1.12.0/24.03-lts-sp1/Dockerfile)| Apache Avro 1.12.0 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +# Usage +Avro containers should be used as a devel environment, start a avro instance by following command: +```bash +docker run -it \ + --name avro \ + openeuler/avro:latest +``` + +Users can develope their projects based on avro. + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/avro/doc/image-info.yml b/Bigdata/avro/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..cc628ebbcb57ba32f35ab8d47af7918cf89bb6f0 --- /dev/null +++ b/Bigdata/avro/doc/image-info.yml @@ -0,0 +1,37 @@ +name: avro +category: bigdata +description: Apache Avro 是记录数据的领先序列化格式,也是流数据管道的首选。 +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` +tags: | + avro镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures| + |------|-----------|---------------| + |[1.12.0-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/avro/1.12.0/24.03-lts-sp1/Dockerfile)| Apache Asvro 1.12.0 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/avro:{Tag} + ``` + +usage: | + - 启动容器 + + ```bash + docker run -it \ + --name avro \ + openeuler/avro:{Tag} + ``` + 用户可以`avro`容器为基础环境,开发基于Avro的项目。 + +license: Apache-2.0 license +similar_packages: + - Thrift: 由 Facebook 开发,是一个跨语言的服务框架,包含数据序列化和远程过程调用(RPC)功能。 +dependency: + - openjdk + - python \ No newline at end of file diff --git a/Bigdata/avro/doc/picture/logo.png b/Bigdata/avro/doc/picture/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..531d33865fe6517a6764340a6f78d279f4b1f282 Binary files /dev/null and b/Bigdata/avro/doc/picture/logo.png differ diff --git a/Bigdata/consul/README.md b/Bigdata/consul/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b79ea609e37faa4998d4f5b3f9fc6e70376f1154 --- /dev/null +++ b/Bigdata/consul/README.md @@ -0,0 +1,32 @@ +# Quick reference + +- The official consul docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# Consul | openEuler +Current consul docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +Consul is a distributed, highly available, and data center aware solution to connect and configure applications across dynamic, distributed infrastructure. + +Learn more on [consul website](https://www.consul.io/). + +# Supported tags and respective Dockerfile links +The tag of each consul docker image is consist of the version of consul and the version of basic image. The details are as follows +| Tags | Currently | Architectures| +|--|--|--| +|[1.20.5-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/consul/1.20.5/24.03-lts-sp1/Dockerfile)| Consul 1.20.5 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +# Usage +Start a consul instance by following command: +```bash +docker run -it \ + -p 8500:8500 \ + openeuler/consul:latest + {command} +``` +Please replace `{command}` by your requirements. + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/consul/doc/image-info.yml b/Bigdata/consul/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f5b2226001e12d20a4c522f0290780eda9aebd8 --- /dev/null +++ b/Bigdata/consul/doc/image-info.yml @@ -0,0 +1,65 @@ +name: consul +category: bigdata +description: Consul 是一种分布式、高可用性和数据中心感知的解决方案,用于跨动态分布式基础设施连接和配置应用程序。 +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` +tags: | + consul镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures| + |------|-----------|---------------| + |[1.20.5-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/consul/1.20.5/24.03-lts-sp1/Dockerfile)| Consul 1.20.5 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/consul:{Tag} + ``` + +usage: | + - 启动容器 + + ```bash + docker run -it \ + --name my-consul \ + -p 8500:8500 \ + openeuler/consul:{Tag} + {command} + ``` + + 用户可根据自身需求替换`{command}`,可选: + ``` + Usage: consul [--version] [--help] [] + + Available commands are: + acl Interact with Consul's ACLs + agent Runs a Consul agent + catalog Interact with the catalog + config Interact with Consul's Centralized Configurations + connect Interact with Consul Connect + debug Records a debugging archive for operators + event Fire a new event + exec Executes a command on Consul nodes + force-leave Forces a member of the cluster to enter the "left" state + info Provides debugging information for operators. + intention Interact with Connect service intentions + join Tell Consul agent to join cluster + keygen Generates a new encryption key + ... + ``` + + - 访问容器 + + ```bash + docker exec -it my-consul /bin/bash + ``` + 进入容器即可修改相关配置,进行开发调试,参考[developer.hashicorp.com](https://developer.hashicorp.com/consul) + +license: Business Source License 1.1 +similar_packages: + - N/A +dependency: + - golang \ No newline at end of file diff --git a/Bigdata/consul/doc/picture/logo.jpg b/Bigdata/consul/doc/picture/logo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ea9ce1c3a18e5ffa86e0c14256302e8d8a194fa2 Binary files /dev/null and b/Bigdata/consul/doc/picture/logo.jpg differ diff --git a/Bigdata/doris/README.md b/Bigdata/doris/README.md new file mode 100644 index 0000000000000000000000000000000000000000..833827aa8348eff8f954cc06243d2a04792b6b60 --- /dev/null +++ b/Bigdata/doris/README.md @@ -0,0 +1,29 @@ +# Quick reference + +- The official doris docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# doris | openEuler +Current doris docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +Apache Doris is an easy-to-use, high-performance and real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrency point query scenarios but also high-throughput complex analysis scenarios. + +Learn more on [doris website](https://doris.apache.org/). + +# Supported tags and respective Dockerfile links +The tag of each doris docker image is consist of the version of doris and the version of basic image. The details are as follows +| Tags | Currently | Architectures| +|--|--|--| +|[2.1.9-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/doris/2.1.9/24.03-lts-sp1/Dockerfile)| doris 2.1.9 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +# Usage +Start a doris instance by following command: +```bash +docker run -it openeuler/doris:latest +``` +Please use doris according to [Doris Doc](https://doris.apache.org/). + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/doris/doc/image-info.yml b/Bigdata/doris/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..74d904ae1bb54997e43fd22e6156604e4f255812 --- /dev/null +++ b/Bigdata/doris/doc/image-info.yml @@ -0,0 +1,34 @@ +name: doris +category: bigdata +description: Apache Doris 是一款基于 MPP 架构的易用型高性能实时分析型数据库,以极速和易用著称,在海量数据下仅需亚秒级响应时间即可返回查询结果,不仅能够支持高并发的点查询场景,也能够支持高吞吐量的复杂分析场景。 +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` +tags: | + doris镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures| + |------|-----------|---------------| + |[2.1.9-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/doris/2.1.9/24.03-lts-sp1/Dockerfile)| doris 2.1.9 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/doris:{Tag} + ``` + +usage: | + - 启动容器 + + ```bash + docker run -it openeuler/doris:{Tag} + ``` + 进入容器后请参考[Doris Doc](https://doris.apache.org/)按使用场景操作。 + +license: Apache-2.0 license +similar_packages: + - N/A +dependency: + - N/A \ No newline at end of file diff --git a/Bigdata/doris/doc/picture/logo.png b/Bigdata/doris/doc/picture/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..acc7406b998e09d2434998d79b0017fd144285f1 Binary files /dev/null and b/Bigdata/doris/doc/picture/logo.png differ diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/Dockerfile b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/Dockerfile index 3eea02420248d05bf245657ecdccdf77528b2de4..22417ded30bd2c3792bf85e0c12ff9107968bb21 100644 --- a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/Dockerfile +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/Dockerfile @@ -3,16 +3,23 @@ FROM $BASE ARG VERSION=3.4.1 +COPY ssh/* /etc/ssh/ +COPY entrypoint.sh / +RUN yum install -y java-11-openjdk-devel openssh openssh-clients sudo hostname && \ + yum clean all && \ + ssh-keygen -A && ssh-keygen -t ed25519 -P '' -f /root/.ssh/id_ed25519 && \ + cat /root/.ssh/id_ed25519.pub > /root/.ssh/authorized_keys && \ + chmod 0400 /root/.ssh/authorized_keys && \ + chmod +x /entrypoint.sh + RUN curl -fSL -o hadoop.tar.gz https://dlcdn.apache.org/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}.tar.gz; \ mkdir -p /usr/local/hadoop && \ tar -zxf hadoop.tar.gz -C /usr/local/hadoop --strip-components=1 && \ rm -rf hadoop.tar.gz - ENV PATH=$PATH:/usr/local/hadoop/bin +ENV HADOOP_HOME=/usr/local/hadoop +COPY hadoop/* $HADOOP_HOME/etc/hadoop/ -RUN yum install -y java-11-openjdk-devel && \ - yum clean all - -ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk - -ENTRYPOINT ["hadoop", "version"] \ No newline at end of file +EXPOSE 9870 8088 19888 50070 50075 50010 50020 50090 +ENTRYPOINT [ "/entrypoint.sh" ] +CMD [ "start" ] \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/entrypoint.sh b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..8f5400ad04ccbd32712e4966b3a0d141f16eb01d --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/entrypoint.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Start SSH service +/usr/sbin/sshd + +# Format HDFS +echo "Formatting HDFS NameNode..." +hdfs namenode -format + +# Run Hadoop with CMD arguments +$HADOOP_HOME/sbin/start-all.sh "$@" + +echo "Start History Server" +$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh --config $HADOOP_HOME/etc/hadoop start historyserver + +tail -f /dev/null \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/capacity-scheduler.xml b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/capacity-scheduler.xml new file mode 100644 index 0000000000000000000000000000000000000000..5eb4cddb0b3f079a35b06de37056322da06acaf7 --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/capacity-scheduler.xml @@ -0,0 +1,65 @@ + + + + + + yarn.scheduler.capacity.default.minimum-user-limit-percent + 100 + + + yarn.scheduler.capacity.maximum-am-resource-percent + 0.5 + + + yarn.scheduler.capacity.maximum-applications + 10000 + + + yarn.scheduler.capacity.node-locality-delay + 40 + + + yarn.scheduler.capacity.resource-calculator + org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator + + + yarn.scheduler.capacity.root.capacity + 100 + + + yarn.scheduler.capacity.root.default.capacity + 100 + + + yarn.scheduler.capacity.root.default.maximum-am-resource-percent + 0.5 + + + yarn.scheduler.capacity.root.default.maximum-capacity + 100 + + + yarn.scheduler.capacity.root.default.state + RUNNING + + + yarn.scheduler.capacity.root.default.user-limit-factor + 1 + + + yarn.scheduler.capacity.root.queues + default + + \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/core-site.xml b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/core-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..2be9b2429a1800d41801b8e5cfbc7b7d26c23b4a --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/core-site.xml @@ -0,0 +1,36 @@ + + + + + + + + + hadoop.tmp.dir + /data/hadoop + + + fs.defaultFS + hdfs://localhost:9000 + + + hadoop.http.staticuser.user + root + + + fs.hdfs.impl + org.apache.hadoop.hdfs.DistributedFileSystem + + \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/hadoop-env.sh b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/hadoop-env.sh new file mode 100644 index 0000000000000000000000000000000000000000..3c57ab9c5ad5e25791831501351b207720a08d5a --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/hadoop-env.sh @@ -0,0 +1,8 @@ + +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk +export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} +export HDFS_NAMENODE_USER="root" +export HDFS_DATANODE_USER="root" +export HDFS_SECONDARYNAMENODE_USER="root" +export YARN_RESOURCEMANAGER_USER="root" +export YARN_NODEMANAGER_USER="root" \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/hdfs-site.xml b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/hdfs-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..73d9f71ae7b0420f0e97436b8deb0a7dea72ff4d --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/hdfs-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + dfs.replication + 1 + + \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/mapred-site.xml b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/mapred-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..1a9adfda2ec5d1e31ac9135195f69de972ea9084 --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/mapred-site.xml @@ -0,0 +1,28 @@ + + + + + + + + + mapreduce.framework.name + yarn + + + mapreduce.application.classpath + $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/yarn-site.xml b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/yarn-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..5957ce2e16a277516fe5688bc581522e1aeff2e0 --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/hadoop/yarn-site.xml @@ -0,0 +1,61 @@ + + + + + + yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage + 98 + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.env-whitelist + JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME + + + yarn.resourcemanager.scheduler.class + org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler + + + yarn.log-aggregation-enable + true + + + yarn.log.dir + /data/hadoop + + + yarn.log.server.url + http://localhost:19888/jobhistory/logs + + + yarn.scheduler.minimum-allocation-vcores + 1 + + + yarn.scheduler.maximum-allocation-vcores + 1 + + + yarn.scheduler.minimum-allocation-mb + 256 + + + yarn.scheduler.maximum-allocation-mb + 4096 + + \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/ssh/ssh_config b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/ssh/ssh_config new file mode 100644 index 0000000000000000000000000000000000000000..79611d11237584106f2f3a08c13a4659664afebc --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/ssh/ssh_config @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +Host * + StrictHostKeyChecking no \ No newline at end of file diff --git a/Bigdata/hadoop/3.4.1/24.03-lts-sp1/ssh/sshd_config b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/ssh/sshd_config new file mode 100644 index 0000000000000000000000000000000000000000..8a7c77b72137a4ca5e2c8a83c704c55666791dce --- /dev/null +++ b/Bigdata/hadoop/3.4.1/24.03-lts-sp1/ssh/sshd_config @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ListenAddress 0.0.0.0 \ No newline at end of file diff --git a/Bigdata/hadoop/README.md b/Bigdata/hadoop/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b9f73104b4cc7e6fc55dca0a16d85588dbd44074 --- /dev/null +++ b/Bigdata/hadoop/README.md @@ -0,0 +1,53 @@ +# Quick reference + +- The official hadoop docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# hadoop | openEuler +Current hadoop docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +The Apache Hadoop software library is a framework that allows for the distributed processing of large data sets across clusters of computers using simple programming models. + +Learn more on [hadoop website](https://hadoop.apache.org/). + +# Supported tags and respective Dockerfile links +The tag of each hadoop docker image is consist of the version of hadoop and the version of basic image. The details are as follows +| Tags | Currently | Architectures| +|--|--|--| +|[3.4.1-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/hadoop/3.4.1/24.03-lts-sp1/Dockerfile)| Apache hadoop 3.4.1 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +# Usage +Deploy a hadoop instance by following command: +```bash +docker run -d \ + --name hadoop \ + --hostname localhost \ + -p 9870:9870 \ + -p 8088:8088 \ + -p 19888:19888 \ + openeuler/hadoop:latest + +docker logs --follow hadoop +``` +The following message indicates that the hadoop is ready : +``` +Starting resourcemanager +Starting nodemanagers +``` +After that, please press Ctrl + C to exit docker logs, and visit hadoop web UI. +| Service Name | URL | +|----------------|-------------------------| +|NameNode | http://localhost:9870⁠ | +|ResourceManager | http://localhost:8088⁠ | +|JobHistory | http://localhost:19888⁠ | + +To stop and remove the container, use these commands. +``` +docker stop hadoop +docker rm hadoop +``` + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/hadoop/doc/image-info.yml b/Bigdata/hadoop/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..aea20c1896e4d04d28dd5a86680abc329228cddd --- /dev/null +++ b/Bigdata/hadoop/doc/image-info.yml @@ -0,0 +1,70 @@ +name: hadoop +category: bigdata +description: Apache Hadoop 软件库是一个框架,它允许使用简单的编程模型在计算机集群之间分布式处理大型数据集。 +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` +tags: | + hadoop镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures| + |------|-----------|---------------| + |[3.4.1-oe2403sp1](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/hadoop/3.4.1/24.03-lts-sp1/Dockerfile)| Apache hadoop 3.4.1 on openEuler 24.03-LTS-SP1 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/hadoop:{Tag} + ``` + +usage: | + - 启动容器 + + ```bash + docker run -d \ + --name hadoop \ + --hostname localhost \ + -p 9870:9870 \ + -p 8088:8088 \ + -p 19888:19888 \ + openeuler/hadoop:latest + ``` + + - 查看运行日志 + + ``` + docker logs --follow hadoop + ``` + + 出现如下信息则说明hadoop已经部署成功: + ``` + Starting resourcemanager + Starting nodemanagers + ``` + + - 访问服务 + + hadoop部署成功后,使用`Ctrl + C`退出容器的日志, 并使用如下链接访问hadoop web UI: + + | Service Name | URL | + |----------------|-------------------------| + |NameNode | http://localhost:9870⁠ | + |ResourceManager | http://localhost:8088⁠ | + |JobHistory | http://localhost:19888⁠ | + + - 停止服务 + + 使用如下命令终止容器: + ``` + docker stop hadoop + docker rm hadoop + ``` + +license: Apache-2.0 license +similar_packages: + - Apache Doris: 原名 Palo,MPP 架构的 OLAP 数据库,支持高并发点查询和实时分析。 +dependency: + - openjdk + - ssh \ No newline at end of file diff --git a/Bigdata/hadoop/doc/picture/logo.jpg b/Bigdata/hadoop/doc/picture/logo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9fceb3201f13a7d0260603a019f0c1cd9421fc8b Binary files /dev/null and b/Bigdata/hadoop/doc/picture/logo.jpg differ