#!/usr/bin/env bash
set -e

SOURCE_RAW=/opt/ontime/ontime.csv
#SOURCE=/opt/ontime/ontime9M_id.csv
#SOURCE=/opt/ontime/ontime9M_id.Native
SOURCE=/opt/ontime/ontime_id.csv
if [[ ! -f $SOURCE ]]; then
    echo "Inserting id field from $SOURCE_RAW to $SOURCE ..."
    tail -n +2 "$SOURCE_RAW" | ./add_id_to_csv > $SOURCE
fi

STRUCT="id UInt32, "`cat ontime.struct`
COLUMNS=`echo "$STRUCT" | tr " " "\n" | awk 'NR % 2 == 1' | tr "\n" "," | head -c -1`
ACTIVE_COLUMNS=11

ST_OPTIONS=""
#ST_OPTIONS="--max_threads=1 --background_pool_size=1" # increase std. dev. of measurements

db="test"
table_name="ontime"
table="test.ontime"

function read_src_data {
	clickhouse-local -s --file "$SOURCE" --input-format CSV --structure "$STRUCT" -of Native --query "$@"
	#clickhouse-local -s --file "$SOURCE" --input-format Native --structure "$STRUCT" -of Native --query "$@"
}

function set_vertical_alg {
	echo "<yandex><merge_tree><enable_vertical_merge_algorithm>$1</enable_vertical_merge_algorithm></merge_tree></yandex>" | sudo tee /etc/clickhouse-server/conf.d/enable_vertical_merge_algorithm.xml >/dev/null
	echo "<yandex><merge_tree><vertical_merge_algorithm_min_rows_to_activate>0</vertical_merge_algorithm_min_rows_to_activate></merge_tree></yandex>" | sudo tee /etc/clickhouse-server/conf.d/vertical_merge_algorithm_min_rows_to_activate.xml >/dev/null
}

function set_and_restart {
	sudo service clickhouse-server stop 1>/dev/null
	set_vertical_alg $1
	sudo service clickhouse-server start 1>/dev/null
	./wait_clickhouse_server
}

function get_n_columns {
	echo $1 | cut -d ',' -f -$2
}

function parts_stat {
    clickhouse-client --query "SELECT count() as parts, round(avg(marks), 2) AS marks_avg, min(marks) AS marks_min, max(marks) AS marks_max FROM system.parts WHERE active AND table='$table_name' AND database='$db' FORMAT TSKV"
}

function parts_count {
    clickhouse-client --query "SELECT count() FROM system.parts WHERE active AND table='$table_name' AND database='$db'"
}

function merges_count {
    clickhouse-client --query "SELECT count() FROM system.merges WHERE table='$table_name' AND database='$db'"
}

function wait_merges {
	while [[ -n $(merges_count) ]]; do sleep 1; done
}

function drop_cache {
	sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
}

function get_last_merge_info {
    cat /var/log/clickhouse-server/clickhouse-server.log | grep "(Merger): Merging" | tail -1 | cut -d " " -f 12-
}

function get_last_merge_time {
    cat /var/log/clickhouse-server/clickhouse-server.log | grep "(Merger): Merge sorted" | tail -1 | cut -d " " -f 21
}

function total_merge_time_from_log {
    cat /var/log/clickhouse-server/clickhouse-server.log | grep "(Merger): Merge sorted" | cut -d " " -f 21 | clickhouse-local -s -S "d Float64" --query "SELECT round(sum(d), 3) FROM table"
}

function get_max_clickhouse_server_memory {
    cat /proc/`cat /var/run/clickhouse-server/clickhouse-server.pid`/status | grep VmPeak | awk '{ print $2/1024 }' #MiB
}

function optimize_rounds {
	[[ -z $1 ]] && NUM_ROUNDS=29 || NUM_ROUNDS=$1
	drop_cache

	echo "OPTIMIZE before: $(parts_stat)"
	t_optimize_total=0
	for i in $(seq 1 $NUM_ROUNDS); do
		local t=`clickhouse-client --time ${ST_OPTIONS} --query "OPTIMIZE TABLE $table" 2>&1`
        #echo "$t $(get_last_merge_time) $(get_last_merge_info)"
		t_optimize_total=`echo "$t_optimize_total + $t" | bc -l`
	done

	echo "OPTIMIZE after : $(parts_stat)"
	echo "OPTIMIZE time  : $t_optimize_total"
}

function run_case {
	case_func="case_$1"

	sudo service clickhouse-server stop 1>/dev/null
	sudo rm -f /var/log/clickhouse-server/clickhouse-server.log
	sudo service clickhouse-server start 1>/dev/null
	./wait_clickhouse_server

	clickhouse-client --query "DROP TABLE IF EXISTS $table"
	drop_cache

    $case_func 1>&2
	wait_merges
	t_merges=$(total_merge_time_from_log)

    echo "After INSERT: $(parts_stat)"
    #echo "Merges time : $t_merges"

    #optimize_rounds
	#optimize_times="$optimize_times $t_optimize_total"

    insert_times="$insert_times $t_insert"
    merges_times="$merges_times $t_merges"

    clickhouse-client --query "DROP TABLE IF EXISTS $table"
}

function run_cases {
	cur_struct=$(get_n_columns "$STRUCT" $ACTIVE_COLUMNS)
	cur_columns=$(get_n_columns "$COLUMNS" $ACTIVE_COLUMNS)

	t_insert=0
	insert_times=""
	merges_times=""
	optimize_times=""

	run_case 1
	run_case 2
	run_case 3

	echo "INSERT times  : $insert_times"
	echo "Merges times  : $merges_times"
	#echo "OPTIMIZE times: $optimize_times"
}

function case_1 {
	echo "Case #1. Trivial. All parts not intersected by PK."

	clickhouse-client --query "CREATE TABLE $table ($cur_struct) ENGINE = MergeTree(FlightDate, (FlightDate), 8192)"
	t_insert=`read_src_data "SELECT $cur_columns FROM table" | clickhouse-client --time ${ST_OPTIONS} --query "INSERT INTO $table FORMAT Native" 2>&1`
}

function case_2 {
	echo "Case #2. Strong mixture. Each new (merged) row comes from new part."

	clickhouse-client --query "CREATE TABLE $table ($cur_struct) ENGINE = MergeTree(FlightDate, (intHash32(id), FlightDate), 8192)"
	t_insert=`read_src_data "SELECT $cur_columns FROM table" | clickhouse-client --time ${ST_OPTIONS} --query "INSERT INTO $table FORMAT Native" 2>&1`
}

function case_3 {
	echo "Case #3. Chunked mixture. Merged row with dozens of its neighbors come from the same part."

	clickhouse-client --query "CREATE TABLE $table ($cur_struct) ENGINE = MergeTree(FlightDate, (bitAnd(id, 15), intHash32(bitShiftRight(id, 4))), 8192)"
	t_insert=`read_src_data "SELECT $cur_columns FROM table" | clickhouse-client --time ${ST_OPTIONS} --query "INSERT INTO $table FORMAT Native" 2>&1`
}


[[ $(whoami) -ne "root" ]] && echo "Run script as root"

echo "### Vertical ###"
set_and_restart 1
run_cases
vertical_optimize_times="$optimize_times"
vertical_merges_times="$merges_times"

echo

echo "### Horizontal ###"
set_and_restart 0
run_cases
horizontal_optimize_times="$optimize_times"
horizontal_merges_times="$merges_times"

echo

echo "#V" "Merges:" ${vertical_merges_times}   #"Optimitze:" ${vertical_optimize_times}
echo "#H" "Merges:" ${horizontal_merges_times} #"Optimitze:" ${horizontal_optimize_times}
