2021-05-31

Easy Tarantool and ClickHouse Replication Setup

These two are currently my favorite databases because of their speed and features, Tarantool (200K tps) for OLTP, and ClickHouse (600K insert/s, 100 qps) for OLAP. Today we will learn how to setup a multi-master replication on each database:

# docker-compose.yml # tarantool
version: '3.3'
services:
  tt1: # master
    image: tarantool/tarantool:2.7.2 # x.x.2+ = stable
    volumes:
      - ./scripts/:/opt/scripts
    command: tarantool /opt/scripts/app.lua
    environment:
      - TARANTOOL_USER_NAME=tester
      - TARANTOOL_USER_PASSWORD=tester
    ports:
      - 13301:3301
  tt2: # slave1
    image: tarantool/tarantool:2.7.2
    volumes:
      - ./scripts/:/opt/scripts
    command: tarantool /opt/scripts/app.lua
    environment:
      - TARANTOOL_USER_NAME=tester
      - TARANTOOL_USER_PASSWORD=tester
    ports:
      - 23301:3301
  tt3: # slave2
    image: tarantool/tarantool:2.7.2
    volumes:
      - ./scripts/:/opt/scripts
    command: tarantool /opt/scripts/app.lua
    environment:
      - TARANTOOL_USER_NAME=tester
      - TARANTOOL_USER_PASSWORD=tester
    ports:
      - 33301:3301

# scripts/app.lua # need to be set on different port/file if not using docker-compose
# for multi-master, you should not use counter data type or it would be out of sync/conflict
# so it's better to use master-slave (2 read_only replica)
box.cfg{
    listen = 3301,
    replication = {
        'replicator:password@tt1:3301', -- master URI
        'replicator:password@tt2:3301', -- replica 1 URI
        'replicator:password@tt3:3301', -- replica 2 URI
    },
    read_only = false # set to true for replica 1 and 2 if you want master-slave
}
box.once("schema", function()
    box.schema.user.create('replicator', {password = 'password'})
    box.schema.user.grant('replicator', 'replication') -- grant replication role
    box.schema.space.create("test")
    box.space.test:create_index("primary")
    print('box.once executed on master')
end)

# start it
docker-compose up

# create table and insert on master
tarantoolctl connect tester:tester@127.0.0.1:13301
connected to 127.0.0.1:13301

127.0.0.1:13301> box.execute [[ create table test1(id int primary key, name string) ]]
---
- row_count: 1
...

127.0.0.1:13301> box.execute [[ insert into test1(id,name) values(1,'test') ]]
---
- row_count: 1
...

# check on slave cluster node
tarantoolctl connect tester:tester@127.0.0.1:23301
connected to 127.0.0.1:23301
127.0.0.1:23301> box.execute [[ select * FROM test1 ]]
---
- metadata:
  - name: ID
    type: integer
  - name: NAME
    type: string
  rows:
  - [1, 'test']
...



That's it, very easy right? Now for the ClickHouse

# docker-compose.yml # clickhouse
version: '3.3'
services:  ch1:
    image: yandex/clickhouse-server
    restart: always
    volumes:
      - ./config.xml:/etc/clickhouse-server/config.d/local.xml
      - ./macro1.xml:/etc/clickhouse-server/config.d/macros.xml
      - ./data/1:/var/lib/clickhouse    
    ports:
      - '18123:8123'
      - '19000:9000'
      - '19009:9009'
    ulimits:
      nproc: 65536
      nofile:
        soft: 252144
        hard: 252144
  ch2:
    image: yandex/clickhouse-server
    restart: always
    volumes:
      - ./config.xml:/etc/clickhouse-server/config.d/local.xml
      - ./macro2.xml:/etc/clickhouse-server/config.d/macros.xml
      - ./data/2:/var/lib/clickhouse
    ports:
      - '28123:8123'
      - '29000:9000'
      - '29009:9009'
    ulimits:
      nproc: 65536
      nofile:
        soft: 252144
        hard: 252144
  ch3:
    image: yandex/clickhouse-server
    restart: always
    volumes:
      - ./config.xml:/etc/clickhouse-server/config.d/local.xml
      - ./macro3.xml:/etc/clickhouse-server/config.d/macros.xml
      - ./data/3:/var/lib/clickhouse
    ports:
      - '38123:8123'
      - '39000:9000'
      - '39009:9009'
    ulimits:
      nproc: 65536
      nofile:
        soft: 252144
        hard: 252144
  zookeeper:
    image: zookeeper

# config.xml
<yandex>
    <remote_servers>
        <replicated>
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>ch1</host>
                    <port>9000</port>
                </replica>
                <replica>
                    <host>ch2</host>
                    <port>9000</port>
                </replica>
                <replica>
                    <host>ch3</host>
                    <port>9000</port>
                </replica>
            </shard>
        </replicated>
    </remote_servers>
    <zookeeper>
        <node>
            <host>zookeeper</host>
            <port>2181</port>
        </node>
    </zookeeper>
</yandex>

# macroXX.xml # replace XX with 1, 2, or 3
<yandex>
    <macros replace="replace">
        <cluster>cluster1</cluster>
        <replica>chXX</replica>
    </macros>
</yandex>

# start it
docker-compose up

# create table and insert on first cluster node
clickhouse-client --port 19000
SELECT * FROM system.clusters;
CREATE DATABASE db1 ON CLUSTER replicated;
SHOW DATABASES;
USE db1;

CREATE TABLE IF NOT EXISTS db1.table1 ON CLUSTER replicated
( id UInt64
, dt Date
, val UInt64
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{cluster}/tables/table1',
'{replica}')
PARTITION BY modulo( id, 1000 )
ORDER BY (dt);

INSERT INTO db1.
table1
(id, dt, val)
VALUES (1,'2021-05-31',2);

# check on second cluster node
clickhouse-client --port 29000
SELECT * FROM db1.sbr2;

┌─id─┬─dt
─┬─val─┐
│  1 │ 2021-05-31 │   2 │
└────┴────────────┴─────┘
↘ Progress: 1.00 rows, 42.00 B (132.02 rows/s., 5.54 KB/s.)  99%
1 rows in set. Elapsed: 0.008 sec.


That's it, you now can have both database on single computer to start testing the replication or for development (it's recommended to downscale it just to single replica tho).