美文网首页
clickhouse集群搭建

clickhouse集群搭建

作者: 后知不觉1 | 来源:发表于2025-04-29 19:28 被阅读0次

1. 配置说明

名称 ip 说明
node1 192.168.1.2
node2 192.168.1.3
node3 192.168.1.4

2. 调整内核参数

# 设置文件数,线程数
ansible cluster -m shell -a "mv /etc/security/limits.d/20-nproc.conf /etc/security/20-nproc.confbak"
ansible cluster -m shell -a "cp /etc/security/limits.conf  /etc/security/limits.confbak"
ansible cluster -m shell -a "echo -e '*    soft   nproc     524288\n*    hard  nproc     524288\n*    soft    nofile     524288\n*    hard    nofile     524288\n*    soft   sigpending    1030568\n*    hard  sigpending     1030568\n' >> /etc/security/limits.conf"

# 关闭swap
ansible cluster -mshell -a"sed -i '/swap/s/^/#/' /etc/fstab"
ansible cluster -mshell -a"swapoff -a"

3. zookeeper部署

注意部署zk时设置启动内存大小,配置文件

clientPort=2181
initLimit=10
autopurge.purgeInterval=24
syncLimit=5
tickTime=3000
dataDir=/data1/hadoop/zookeeper
autopurge.snapRetainCount=30
server.1=hwy-sfcloud-nn-dev-01.sfsscn.com:2888:3888
server.2=hwy-sfcloud-nn-dev-02.sfsscn.com:2888:3888
server.3=hwy-sfcloud-nn-dev-03.sfsscn.com:2888:3888

4. clickhouse部署

4.1 下载

阿里云上选择稳定版本的包,历史原因选择21.4.5.46的版本

https://mirrors.aliyun.com/clickhouse/tgz/stable/?spm=a2c6h.25603864.0.0.39c06dc5VbEXtV
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-client-21.4.5.46.tgz
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-common-static-21.4.5.46.tgz
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-common-static-dbg-21.4.5.46.tgz
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-server-21.4.5.46.tgz
4.2 解压安装

安装在/opt目录,当前下载在/opt/clickhouse目录
执行有顺序,会初始化环境,创建clickhouse用户,创建默认的配置目录

ansible cluster -mshell -a  "ls /opt/clickhouse|xargs -i{} tar -xvf {} -C /opt/clickhouse/"
ansible cluster -mshell -a  "bash /opt/clickhouse/clickhouse-client-21.4.5.46/install/doinst.sh"
ansible cluster -mshell -a  "bash /opt/clickhouse/clickhouse--common-static-21.4.5.46/install/doinst.sh"
ansible cluster -mshell -a  "bash /opt/clickhouse/clickhouse-common-static-dbg-21.4.5.46/install/doinst.sh"
ansible cluster -mshell -a  "bash /opt/clickhouse/clickhouse-server-21.4.5.46/install/doinst.sh"
4.3 配置

这里配置两个集群

  1. default 3分配2副本,用于提高吞吐,2. c3_all 1分片3副本,特殊的表用于关联查询

config.xml

<?xml version="1.0"?>
<yandex>
    <logger>
        <level>notice</level>
        <log>/data01/clickhouse/clickhouse-server.log</log>
        <errorlog>/data01/clickhouse/clickhouse-server.err.log</errorlog>
        <size>1024M</size>
        <count>10</count>
    </logger>
    <listen_host>0.0.0.0</listen_host>
    <http_port>8123</http_port>
    <tcp_port>9000</tcp_port>
    <interserver_http_port>9009</interserver_http_port>


    <max_connections>4096</max_connections>
    <keep_alive_timeout>3</keep_alive_timeout>
    <max_session_timeout>3600</max_session_timeout>
    <default_session_timeout>60</default_session_timeout>
    <max_concurrent_queries>600</max_concurrent_queries>

    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
    <mark_cache_size>5368709120</mark_cache_size>

    <storage_configuration>
        <disks>
            <!-- 默认磁盘配置 -->
            <default>
                <path>/var/lib/clickhouse/</path>
            </default>

            <!-- 附加磁盘1 -->
            <disk1>
                <path>/data01/clickhouse_disk1/</path>
                <keep_free_space_bytes>2147483648</keep_free_space_bytes> <!-- 保留1GB空闲空间 -->
            </disk1>

            <!-- 附加磁盘2 -->
            <disk2>
                <path>/data02/clickhouse_disk2/</path>
                <keep_free_space_bytes>2147483648</keep_free_space_bytes> <!-- 保留2GB空闲空间 -->
            </disk2>
        </disks>

        <policies>
            <!-- 轮询策略 -->
            <round_robin>
                <volumes>
                    <default>
                        <disk>default</disk>
                        <disk>disk1</disk>
                        <disk>disk2</disk>
                    </default>
                </volumes>
            </round_robin>

            <!-- 热冷数据分层策略 -->
            <tiered>
                <volumes>
                    <hot>
                        <!-- default库是高速盘 -->
                        <disk>default</disk>
                        <max_data_part_size_bytes>1073741824</max_data_part_size> <!-- 1GB以下存热数据 -->
                    </hot>
                    <cold>
                        <disk>disk1</disk>
                        <disk>disk2</disk>
                    </cold>
                </volumes>
            </tiered>
        </policies>
    </storage_configuration>
    <!-- 与storage_configuration冲突优先级更高 -->
    <path>/data/clickhouse</path>  
    <tmp_path>/data01/clickhouse/tmp_data/</tmp_path>
    <user_files_path>/data/clickhouse/user_files/</user_files_path>
    <format_schema_path>/data/clickhouse/format_schemas/</format_schema_path>

    <!-- 经管没有同层级没有macros相关配置,会自动查找config.d、conf.d下 -->
    <users_config>users.xml</users_config>
    <default_profile>default</default_profile>
    <system_profile>default</system_profile>
    <default_database>default</default_database>

    <distributed_ddl>
        <path>/clickhouse/basic_monitor/task_queue/ddl</path>
    </distributed_ddl>

    <builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
    <dictionaries_config>*_dictionary.xml</dictionaries_config>

    <max_table_size_to_drop>53687091200</max_table_size_to_drop>
    <max_partition_size_to_drop>0</max_partition_size_to_drop>

    <query_log>
        <database>system</database>
        <table>query_log</table>
        <partition_by>toYYYYMM(event_date)</partition_by>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </query_log>

    <merge_tree>
        <use_minimalistic_part_header_in_zookeeper>1</use_minimalistic_part_header_in_zookeeper>
        <parts_to_delay_insert>300</parts_to_delay_insert>
        <parts_to_throw_insert>600</parts_to_throw_insert>
        <max_delay_to_insert>2</max_delay_to_insert>
        <max_part_loading_threads>48</max_part_loading_threads>
    </merge_tree>

    <prometheus>
        <endpoint>/metrics</endpoint>
        <port>8001</port>
        <metrics>true</metrics>
        <events>true</events>
        <asynchronous_metrics>true</asynchronous_metrics>
    </prometheus>

    <mlock_executable>true</mlock_executable>

    <remote_servers incl="clickhouse_remote_servers"/>
    <include_from>/etc/clickhouse-server/config.d/cluster.xml</include_from>
    <!-- 经管没有同层级没有zk相关配置,会自动查找config.d、conf.d下 -->
    <zookeeper incl="zookeeper-servers" optional="true"/>
    <!-- 经管没有同层级没有macros相关配置,会自动查找config.d、conf.d下 -->
    <macros incl="macros" optional="true"/>
    <compression incl="clickhouse_compression" optional="true"/>

</yandex>

config.d/cluster

<yandex>
<clickhouse_remote_servers>
    <default>
        <shard>
            <internal_replication>true</internal_replication>
            <replica>
                <host>node1</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
            <replica>
                <host>node2</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
        </shard>
        <shard>
            <internal_replication>true</internal_replication>
            <replica>
                <host>node2</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
            <replica>
                <host>node3</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
        </shard>
        <shard>
            <internal_replication>true</internal_replication>
            <replica>
                <host>node1</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
            <replica>
                <host>node3</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
        </shard>
    </default>
    <c3_all>
        <shard>
            <internal_replication>true</internal_replication>
            <replica>
                <host>node1</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
             </replica>
            <replica>
                <host>node2</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
            <replica>
                <host>node3</host>
                <port>9000</port>
                <user>admin</user>
                <password>123</password>
            </replica>
         </shard>
    </c3_all>
</clickhouse_remote_servers>
</yandex>

config.d/user.xml

<?xml version="1.0"?>
<yandex>
    <profiles>
        <default>
          <max_memory_usage>21474836480</max_memory_usage>
          <max_threads>4</max_threads>
          <distributed_product_mode>local</distributed_product_mode>
          <max_partitions_per_insert_block>10000</max_partitions_per_insert_block>
          <log_queries>1</log_queries>
          <use_uncompressed_cache>1</use_uncompressed_cache>
          <load_balancing>random</load_balancing>
          <poll_interval>1</poll_interval>
          <idle_connection_timeout>2</idle_connection_timeout>
          <max_execution_time>600</max_execution_time>
          <input_format_skip_unknown_fields>1</input_format_skip_unknown_fields>
          <send_progress_in_http_headers>0</send_progress_in_http_headers>
          <insert_allow_materialized_columns>1</insert_allow_materialized_columns>
          <distributed_ddl_task_timeout>300</distributed_ddl_task_timeout>
          <os_thread_priority>-3</os_thread_priority>
          <background_pool_size>10</background_pool_size>
          <replication_alter_partitions_sync>2</replication_alter_partitions_sync>
        </default>
        <readonly>
            <readonly>2</readonly>
        </readonly>
        <ts>
            <max_query_size>1000000</max_query_size>
        </ts>

    </profiles>

    <!-- Users and ACL. -->
    <users>
        <default>
            <password>123</password>
            <networks incl="networks" replace="replace">
                <ip>0.0.0.0/0</ip>
            </networks>
            <profile>default</profile>
            <quota>default</quota>
        </default>
        <admin>
            <password>123</password>
            <networks incl="networks" replace="replace">
               <ip>0.0.0.0/0</ip>
            </networks>
            <profile>default</profile>
            <quota>default</quota>
        </admin>
    </users>

    <quotas>
        <default>
            <interval>
                <duration>3600</duration>
                <queries>0</queries>
                <errors>0</errors>
                <result_rows>0</result_rows>
                <read_rows>0</read_rows>
                <execution_time>0</execution_time>
            </interval>
        </default>
    </quotas>
</yandex>

config.d/marcos.xml
宏每台机器上配置是不一样的

按照上述集群宏分布应该如下

集群名称 node1(分片 副本) node2(分片 副本) node2(分片 副本) 备注
default 01 01, 03 01 01 02,02 01 02 03 ,03 03
c3_all 01 01 01 02 01 03
<yandex>
<macros>
    <shard>01</shard>
    <replica>02</replica>
    <c3_all_replica>02</c3_all_replica>
    <c3_all_shard>all</c3_all_shard>
</macros>
</yandex>

config.d/zk.xml

<yandex>
<zookeeper>
    <node>
        <host>node1</host>
        <port>2181</port>
    </node>
    <node>
        <host>node2</host>
        <port>2181</port>
    </node>
    <node>
        <host>node3</host>
        <port>2181</port>
    </node>
    <operation_timeout_ms>10000</operation_timeout_ms>
</zookeeper>
</yandex>

5. 启动clickhouse

ansible cluster -mshell -a "su clickhouse 'systemctl restart clickhouse'"

6 验证

6.1、分布式库验证
# 登录
clickhouse-client --host node1 --port 9000 --user admin --password 123

# 创建分布式库
CREATE DATABASE test123 ON CLUSTER 'default';

# 登录 另一台
clickhouse-client --host node2 --port 9000 --user admin --password 123
# 查看库
show databases;
#能查看到则正常
6.2、分布式表验证
# 登录
clickhouse-client --host node1 --port 9000 --user admin --password 123

# 创建本地表
CREATE TABLE test123.test2_local_table ON CLUSTER 'c6_all' (id UInt32,event_date Date,value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/{table}/{uuid}/{c6_all_shard}', '{c6_all_replica}') ORDER BY (id) SETTINGS index_granularity = 8192;

CREATE TABLE test123.test2_table
ENGINE = Distributed(default,test123,test2_local_table,rand());


# 登录 另一台
clickhouse-client --host node2 --port 9000 --user admin --password 123
# 查看库
use test123;
show tables;
#能查看到则正常
6.3、创建全副本表验证
# 登录
clickhouse-client --host node1 --port 9000 --user admin --password 123

# 创建本地表
CREATE TABLE test123.test123_local_table ON CLUSTER 'c3_all' (id UInt32,event_date Date,value String) ENGINE = ReplicatedMergeTree() ORDER BY (id);

INSERT INTO test123.test2_local_table VALUES (1, '2023-01-01', 'value1'),(2, '2023-01-02', 'value2');


# 登录 另一台
clickhouse-client --host node2 --port 9000 --user admin --password 123
# 查看库
select * from test123.test123_local_table
#能查看到则正常

相关文章

网友评论

      本文标题:clickhouse集群搭建

      本文链接:https://www.haomeiwen.com/subject/ltyfijtx.html