diff --git a/Examples.md b/Examples.md index 9d7a626b..2b985ddb 100644 --- a/Examples.md +++ b/Examples.md @@ -1,6 +1,6 @@ # Use cases of clickhouse-backup -# How to convert MergeTree to ReplicatedMegreTree +## How to convert MergeTree to ReplicatedMegreTree 1. Create backup ``` clickhouse-backup create --table='my_db.my_table' my_backup @@ -15,22 +15,22 @@ clickhouse-backup restore my_backup ``` -# How to store backups on NFS, backup drive or another server via SFTP +## How to store backups on NFS, backup drive or another server via SFTP Use 'rsync' 'rsync' supports hard links with means that backup on remote server or mounted fs will be stored as efficiently as in the '/var/lib/clickhouse/backup'. You can create daily backup by clickhouse-backup and sync backup folder to mounted fs with this command: `rsync -a -H --delete --progress --numeric-ids --update /var/lib/clickhouse/backup/ /mnt/data/clickhouse-backup/` or similar for sync over ssh. In this case rsync will copy only difference between backups. -# How to move data to another clickhouse server +## How to move data to another clickhouse server See abowe -# How to reduce number of partitions +## How to reduce number of partitions ... -# How to monitor that backups created and uploaded correctly +## How to monitor that backups created and uploaded correctly Use services like https://healthchecks.io or https://deadmanssnitch.com. -# How to backup sharded cluster with Ansible +## How to backup sharded cluster with Ansible On the first day of month full backup will be uploaded and increments on the others days. ```yaml @@ -70,9 +70,9 @@ On the first day of month full backup will be uploaded and increments on the oth - uri: url="https://hc-ping.com/{{ healthchecksio_clickhouse_upload_id }}/fail" ``` -# How to backup database with several terabytes of data +## How to backup database with several terabytes of data You can use clickhouse-backup for creating periodical backups and keep it local. It protect you from destructive operations. In addition you may create instance of ClickHouse on another DC and have it fresh by clickhouse-copier it protect you from hardware or DC failures. -# How to use clickhouse-backup in Kubernetes +## How to use clickhouse-backup in Kubernetes ... diff --git a/pkg/chbackup/clickhouse.go b/pkg/chbackup/clickhouse.go index 0b761bbd..379f3c29 100644 --- a/pkg/chbackup/clickhouse.go +++ b/pkg/chbackup/clickhouse.go @@ -143,8 +143,8 @@ func (ch *ClickHouse) FreezeTableOldWay(table Table) error { var partitions []struct { PartitionID string `db:"partition_id"` } - q := "SELECT DISTINCT partition_id FROM `system`.`parts` WHERE database='?' AND table='?'" - if err := ch.conn.Select(&partitions, q, table.Database, table.Name); err != nil { + q := fmt.Sprintf("SELECT DISTINCT partition_id FROM `system`.`parts` WHERE database='%s' AND table='%s'", table.Database, table.Name) + if err := ch.conn.Select(&partitions, q); err != nil { return fmt.Errorf("can't get partitions for \"%s.%s\" with %v", table.Database, table.Name, err) } log.Printf("Freeze '%v.%v'", table.Database, table.Name) diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index bf5efcd4..d8e832c6 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -20,7 +20,7 @@ import ( const dbName = "_test.ДБ_" -type TestDataStuct struct { +type TestDataStruct struct { Database string Table string Schema string @@ -29,53 +29,53 @@ type TestDataStuct struct { OrderBy string } -var testData = []TestDataStuct{ - TestDataStuct{ +var testData = []TestDataStruct{ + TestDataStruct{ Database: dbName, Table: ".inner.table1", Schema: "(Date Date, TimeStamp DateTime, Log String) ENGINE = MergeTree(Date, (TimeStamp, Log), 8192)", Rows: []map[string]interface{}{ - map[string]interface{}{"Date": toDate("2018-10-23"), "TimeStamp": toTS("2018-10-23 07:37:14"), "Log": "One"}, - map[string]interface{}{"Date": toDate("2018-10-23"), "TimeStamp": toTS("2018-10-23 07:37:15"), "Log": "Two"}, - map[string]interface{}{"Date": toDate("2018-10-24"), "TimeStamp": toTS("2018-10-24 07:37:16"), "Log": "Three"}, - map[string]interface{}{"Date": toDate("2018-10-24"), "TimeStamp": toTS("2018-10-24 07:37:17"), "Log": "Four"}, - map[string]interface{}{"Date": toDate("2019-10-25"), "TimeStamp": toTS("2019-01-25 07:37:18"), "Log": "Five"}, - map[string]interface{}{"Date": toDate("2019-10-25"), "TimeStamp": toTS("2019-01-25 07:37:19"), "Log": "Six"}, + {"Date": toDate("2018-10-23"), "TimeStamp": toTS("2018-10-23 07:37:14"), "Log": "One"}, + {"Date": toDate("2018-10-23"), "TimeStamp": toTS("2018-10-23 07:37:15"), "Log": "Two"}, + {"Date": toDate("2018-10-24"), "TimeStamp": toTS("2018-10-24 07:37:16"), "Log": "Three"}, + {"Date": toDate("2018-10-24"), "TimeStamp": toTS("2018-10-24 07:37:17"), "Log": "Four"}, + {"Date": toDate("2019-10-25"), "TimeStamp": toTS("2019-01-25 07:37:18"), "Log": "Five"}, + {"Date": toDate("2019-10-25"), "TimeStamp": toTS("2019-01-25 07:37:19"), "Log": "Six"}, }, Fields: []string{"Date", "TimeStamp", "Log"}, OrderBy: "TimeStamp", }, - TestDataStuct{ + TestDataStruct{ Database: dbName, Table: "2. Таблица №2", Schema: "(id UInt64, User String) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192", Rows: []map[string]interface{}{ - map[string]interface{}{"id": uint64(1), "User": "Alice"}, - map[string]interface{}{"id": uint64(2), "User": "Bob"}, - map[string]interface{}{"id": uint64(3), "User": "John"}, - map[string]interface{}{"id": uint64(4), "User": "Frank"}, - map[string]interface{}{"id": uint64(5), "User": "Nancy"}, - map[string]interface{}{"id": uint64(6), "User": "Brandon"}, + {"id": uint64(1), "User": "Alice"}, + {"id": uint64(2), "User": "Bob"}, + {"id": uint64(3), "User": "John"}, + {"id": uint64(4), "User": "Frank"}, + {"id": uint64(5), "User": "Nancy"}, + {"id": uint64(6), "User": "Brandon"}, }, Fields: []string{"id", "User"}, OrderBy: "id", }, - TestDataStuct{ + TestDataStruct{ Database: dbName, Table: "table3", Schema: "(TimeStamp DateTime, Item String, Date Date MATERIALIZED toDate(TimeStamp)) ENGINE = MergeTree() PARTITION BY Date ORDER BY TimeStamp SETTINGS index_granularity = 8192", Rows: []map[string]interface{}{ - map[string]interface{}{"TimeStamp": toTS("2018-10-23 07:37:14"), "Item": "One"}, - map[string]interface{}{"TimeStamp": toTS("2018-10-23 07:37:15"), "Item": "Two"}, - map[string]interface{}{"TimeStamp": toTS("2018-10-24 07:37:16"), "Item": "Three"}, - map[string]interface{}{"TimeStamp": toTS("2018-10-24 07:37:17"), "Item": "Four"}, - map[string]interface{}{"TimeStamp": toTS("2019-01-25 07:37:18"), "Item": "Five"}, - map[string]interface{}{"TimeStamp": toTS("2019-01-25 07:37:19"), "Item": "Six"}, + {"TimeStamp": toTS("2018-10-23 07:37:14"), "Item": "One"}, + {"TimeStamp": toTS("2018-10-23 07:37:15"), "Item": "Two"}, + {"TimeStamp": toTS("2018-10-24 07:37:16"), "Item": "Three"}, + {"TimeStamp": toTS("2018-10-24 07:37:17"), "Item": "Four"}, + {"TimeStamp": toTS("2019-01-25 07:37:18"), "Item": "Five"}, + {"TimeStamp": toTS("2019-01-25 07:37:19"), "Item": "Six"}, }, Fields: []string{"TimeStamp", "Item"}, OrderBy: "TimeStamp", }, - TestDataStuct{ + TestDataStruct{ Database: dbName, Table: "table4", Schema: "(id UInt64, Col1 String, Col2 String, Col3 String, Col4 String, Col5 String) ENGINE = MergeTree PARTITION BY id ORDER BY (id, Col1, Col2, Col3, Col4, Col5) SETTINGS index_granularity = 8192", @@ -89,44 +89,77 @@ var testData = []TestDataStuct{ Fields: []string{"id", "Col1", "Col2", "Col3", "Col4", "Col5"}, OrderBy: "id", }, + TestDataStruct{ + Database: dbName, + Table: "yuzhichang_table2", + Schema: "(order_id String, order_time DateTime, amount Float64) ENGINE = MergeTree() PARTITION BY toYYYYMM(order_time) ORDER BY (order_time, order_id)", + Rows: []map[string]interface{}{ + {"order_id": "1", "order_time": toTS("2010-01-01 00:00:00"), "amount": 1.0}, + {"order_id": "2", "order_time": toTS("2010-02-01 00:00:00"), "amount": 2.0}, + }, + Fields: []string{"order_id", "order_time", "amount"}, + OrderBy: "order_id", + }, + TestDataStruct{ + Database: dbName, + Table: "yuzhichang_table3", + Schema: "(order_id String, order_time DateTime, amount Float64) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(order_time) ORDER BY (order_time, order_id)", + Rows: []map[string]interface{}{ + {"order_id": "1", "order_time": toTS("2010-01-01 00:00:00"), "amount": 1.0}, + {"order_id": "2", "order_time": toTS("2010-02-01 00:00:00"), "amount": 2.0}, + }, + Fields: []string{"order_id", "order_time", "amount"}, + OrderBy: "order_id", + }, + TestDataStruct{ + Database: dbName, + Table: "yuzhichang_table4", + Schema: "(order_id String, order_time DateTime, amount Float64) ENGINE = MergeTree() ORDER BY (order_time, order_id)", + Rows: []map[string]interface{}{ + {"order_id": "1", "order_time": toTS("2010-01-01 00:00:00"), "amount": 1.0}, + {"order_id": "2", "order_time": toTS("2010-02-01 00:00:00"), "amount": 2.0}, + }, + Fields: []string{"order_id", "order_time", "amount"}, + OrderBy: "order_id", + }, } -var incrementData = []TestDataStuct{ - TestDataStuct{ +var incrementData = []TestDataStruct{ + TestDataStruct{ Database: dbName, Table: ".inner.table1", Schema: "(Date Date, TimeStamp DateTime, Log String) ENGINE = MergeTree(Date, (TimeStamp, Log), 8192)", Rows: []map[string]interface{}{ - map[string]interface{}{"Date": toDate("2019-10-26"), "TimeStamp": toTS("2019-01-26 07:37:19"), "Log": "Seven"}, + {"Date": toDate("2019-10-26"), "TimeStamp": toTS("2019-01-26 07:37:19"), "Log": "Seven"}, }, Fields: []string{"Date", "TimeStamp", "Log"}, OrderBy: "TimeStamp", }, - TestDataStuct{ + TestDataStruct{ Database: dbName, Table: "2. Таблица №2", Schema: "(id UInt64, User String) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192", Rows: []map[string]interface{}{ - map[string]interface{}{"id": uint64(7), "User": "Alice"}, - map[string]interface{}{"id": uint64(8), "User": "Bob"}, - map[string]interface{}{"id": uint64(9), "User": "John"}, - map[string]interface{}{"id": uint64(10), "User": "Frank"}, + {"id": uint64(7), "User": "Alice"}, + {"id": uint64(8), "User": "Bob"}, + {"id": uint64(9), "User": "John"}, + {"id": uint64(10), "User": "Frank"}, }, Fields: []string{"id", "User"}, OrderBy: "id", }, - TestDataStuct{ + TestDataStruct{ Database: dbName, Table: "table3", Schema: "(TimeStamp DateTime, Item String, Date Date MATERIALIZED toDate(TimeStamp)) ENGINE = MergeTree() PARTITION BY Date ORDER BY TimeStamp SETTINGS index_granularity = 8192", Rows: []map[string]interface{}{ - map[string]interface{}{"TimeStamp": toTS("2019-01-26 07:37:18"), "Item": "Seven"}, - map[string]interface{}{"TimeStamp": toTS("2019-01-27 07:37:19"), "Item": "Eight"}, + {"TimeStamp": toTS("2019-01-26 07:37:18"), "Item": "Seven"}, + {"TimeStamp": toTS("2019-01-27 07:37:19"), "Item": "Eight"}, }, Fields: []string{"TimeStamp", "Item"}, OrderBy: "TimeStamp", }, - TestDataStuct{ + TestDataStruct{ Database: dbName, Table: "table4", Schema: "(id UInt64, Col1 String, Col2 String, Col3 String, Col4 String, Col5 String) ENGINE = MergeTree PARTITION BY id ORDER BY (id, Col1, Col2, Col3, Col4, Col5) SETTINGS index_granularity = 8192", @@ -140,6 +173,39 @@ var incrementData = []TestDataStuct{ Fields: []string{"id", "Col1", "Col2", "Col3", "Col4", "Col5"}, OrderBy: "id", }, + TestDataStruct{ + Database: dbName, + Table: "yuzhichang_table2", + Schema: "(order_id String, order_time DateTime, amount Float64) ENGINE = MergeTree() PARTITION BY toYYYYMM(order_time) ORDER BY (order_time, order_id)", + Rows: []map[string]interface{}{ + {"order_id": "3", "order_time": toTS("2010-03-01 00:00:00"), "amount": 3.0}, + {"order_id": "4", "order_time": toTS("2010-04-01 00:00:00"), "amount": 4.0}, + }, + Fields: []string{"order_id", "order_time", "amount"}, + OrderBy: "order_id", + }, + TestDataStruct{ + Database: dbName, + Table: "yuzhichang_table3", + Schema: "(order_id String, order_time DateTime, amount Float64) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(order_time) ORDER BY (order_time, order_id)", + Rows: []map[string]interface{}{ + {"order_id": "3", "order_time": toTS("2010-03-01 00:00:00"), "amount": 3.0}, + {"order_id": "4", "order_time": toTS("2010-04-01 00:00:00"), "amount": 4.0}, + }, + Fields: []string{"order_id", "order_time", "amount"}, + OrderBy: "order_id", + }, + TestDataStruct{ + Database: dbName, + Table: "yuzhichang_table4", + Schema: "(order_id String, order_time DateTime, amount Float64) ENGINE = MergeTree() ORDER BY (order_time, order_id)", + Rows: []map[string]interface{}{ + {"order_id": "3", "order_time": toTS("2010-03-01 00:00:00"), "amount": 3.0}, + {"order_id": "4", "order_time": toTS("2010-04-01 00:00:00"), "amount": 4.0}, + }, + Fields: []string{"order_id", "order_time", "amount"}, + OrderBy: "order_id", + }, } func testRestoreLegacyBackupFormat(t *testing.T) { @@ -296,7 +362,7 @@ func (ch *TestClickHouse) connect() error { return ch.chbackup.Connect() } -func (ch *TestClickHouse) createTestData(data TestDataStuct) error { +func (ch *TestClickHouse) createTestData(data TestDataStruct) error { if err := ch.chbackup.CreateDatabase(data.Database); err != nil { return err } @@ -335,7 +401,7 @@ func (ch *TestClickHouse) dropDatabase(database string) error { return err } -func (ch *TestClickHouse) checkData(t *testing.T, data TestDataStuct) error { +func (ch *TestClickHouse) checkData(t *testing.T, data TestDataStruct) error { fmt.Printf("Check '%d' rows in '%s.%s'\n", len(data.Rows), data.Database, data.Table) rows, err := ch.chbackup.GetConn().Queryx(fmt.Sprintf("SELECT * FROM `%s`.`%s` ORDER BY %s", data.Database, data.Table, data.OrderBy)) if err != nil {