# LogicTest: 5node-dist 5node-dist-opt 5node-dist-metadata 5node-dist-disk

statement ok
CREATE TABLE data (a INT, b INT, c FLOAT, d DECIMAL, PRIMARY KEY (a, b, c, d))

# Prevent the merge queue from immediately discarding our splits.
statement ok
SET CLUSTER SETTING kv.range_merge.queue_enabled = false;

# Split into ten parts.
statement ok
ALTER TABLE data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)

# Relocate the ten parts to the five nodes.
statement ok
ALTER TABLE data EXPERIMENTAL_RELOCATE
  SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)

# Generate all combinations of values 1 to 10.
statement ok
INSERT INTO data SELECT a, b, c::FLOAT, d::DECIMAL FROM
   generate_series(1, 10) AS a(a),
   generate_series(1, 10) AS b(b),
   generate_series(1, 10) AS c(c),
   generate_series(1, 10) AS d(d)

# Verify data placement.
query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE data]
----
start_key  end_key  replicas  lease_holder
NULL       /1       {1}       1
/1         /2       {2}       2
/2         /3       {3}       3
/3         /4       {4}       4
/4         /5       {5}       5
/5         /6       {1}       1
/6         /7       {2}       2
/7         /8       {3}       3
/8         /9       {4}       4
/9         NULL     {5}       5

query R
SELECT sum(a) FROM data
----
55000

query R
SELECT sum((a-1)*1000 + (b-1)*100 + (c::INT-1)*10 + (d-1)) FROM data
----
49995000

query RII
SELECT sum(a), count(a), max(a) FROM data
----
55000 10000 10

query RII
SELECT sum(a+b), count(a+b), max(a+b) FROM data
----
110000 10000 20

query R
SELECT sum((a-1)*1000) + sum((b-1)*100) + sum((c::INT-1)*10) + sum(d-1) FROM data
----
49995000

query RIRI
SELECT sum(a), min(b), max(c), count(d) FROM data
----
55000 1 10 10000

query R
SELECT avg(a+b+c::INT+d) FROM data
----
22

query RR
SELECT sum(a), round(stddev(b), 1) FROM data
----
55000 2.9

query RR
SELECT sum(a), round(variance(b), 1) FROM data
----
55000 8.3

query R
SELECT stddev(a+b+c::INT+d) FROM data
----
5.7448498962142608187

query R
SELECT variance(a+b+c::INT+d) FROM data
----
33.0033003300330033

query RRRRRRR
SELECT sum(a), avg(b), sum(c), avg(d), stddev(a), variance(b), sum(a+b+c::INT+d) FROM data
----
55000 5.5 55000 5.5 2.8724249481071304094 8.2508250825082508251 220000

query RIRIRRR
SELECT sum(a), min(b), max(c), count(d), avg(a+b+c::INT+d), stddev(a+b), variance(c::INT+d) FROM data
----
55000 1 10 10000 22 4.0622223185119375800 16.50165016501650165

query RRRIRRRR
SELECT sum(a), stddev(a), avg(a) FILTER (WHERE a > 5), count(b), avg(b), variance(b) FILTER (WHERE b < 8), sum(b) FILTER (WHERE b < 8), stddev(b) FILTER (WHERE b > 2) FROM data
----
55000 2.8724249481071304094 8 10000 5.5 4.0005715102157451064 28000 2.2914310663953007487

query RRR
SELECT sum(a), avg(DISTINCT a), variance(a) FILTER (WHERE a > 0) FROM data
----
55000 5.5 8.2508250825082508251

query RRIRR
SELECT sum(a), avg(a), count(a), stddev(a), variance(a) FROM data
----
55000 5.5 10000 2.8724249481071304094 8.2508250825082508251

query RRRRR
SELECT sum(a), avg(b), sum(a), sum(a), avg(b) FROM data
----
55000 5.5 55000 55000 5.5

query RRRR
SELECT avg(c), sum(c), avg(d), sum(d) FROM data
----
5.5 55000 5.5 55000

query II
SELECT max(a), min(b) FROM data HAVING min(b) > 2
----


query I rowsort
SELECT DISTINCT (a) FROM data
----
1
2
3
4
5
6
7
8
9
10

query R
SELECT SUM (DISTINCT A) FROM data
----
55

query RR
SELECT SUM (DISTINCT A), SUM (DISTINCT B) from data
----
55 55

query II
SELECT DISTINCT a, b FROM data WHERE (a + b + c::INT) = 27 ORDER BY a,b
----
7   10
8   9
8   10
9   8
9   9
9   10
10  7
10  8
10  9
10  10

query II
SELECT DISTINCT a, b FROM data WHERE (a + b + c::INT) = 27 ORDER BY b,a
----
10  7
9   8
10  8
8   9
9   9
10  9
7  10
8  10
9  10
10 10

query RRR
SELECT c, d, sum(a+c::INT) + avg(b+d) FROM data GROUP BY c, d ORDER BY c, d
----
1   1   656.5
1   2   657.5
1   3   658.5
1   4   659.5
1   5   660.5
1   6   661.5
1   7   662.5
1   8   663.5
1   9   664.5
1   10  665.5
2   1   756.5
2   2   757.5
2   3   758.5
2   4   759.5
2   5   760.5
2   6   761.5
2   7   762.5
2   8   763.5
2   9   764.5
2   10  765.5
3   1   856.5
3   2   857.5
3   3   858.5
3   4   859.5
3   5   860.5
3   6   861.5
3   7   862.5
3   8   863.5
3   9   864.5
3   10  865.5
4   1   956.5
4   2   957.5
4   3   958.5
4   4   959.5
4   5   960.5
4   6   961.5
4   7   962.5
4   8   963.5
4   9   964.5
4   10  965.5
5   1   1056.5
5   2   1057.5
5   3   1058.5
5   4   1059.5
5   5   1060.5
5   6   1061.5
5   7   1062.5
5   8   1063.5
5   9   1064.5
5   10  1065.5
6   1   1156.5
6   2   1157.5
6   3   1158.5
6   4   1159.5
6   5   1160.5
6   6   1161.5
6   7   1162.5
6   8   1163.5
6   9   1164.5
6   10  1165.5
7   1   1256.5
7   2   1257.5
7   3   1258.5
7   4   1259.5
7   5   1260.5
7   6   1261.5
7   7   1262.5
7   8   1263.5
7   9   1264.5
7   10  1265.5
8   1   1356.5
8   2   1357.5
8   3   1358.5
8   4   1359.5
8   5   1360.5
8   6   1361.5
8   7   1362.5
8   8   1363.5
8   9   1364.5
8   10  1365.5
9   1   1456.5
9   2   1457.5
9   3   1458.5
9   4   1459.5
9   5   1460.5
9   6   1461.5
9   7   1462.5
9   8   1463.5
9   9   1464.5
9   10  1465.5
10  1   1556.5
10  2   1557.5
10  3   1558.5
10  4   1559.5
10  5   1560.5
10  6   1561.5
10  7   1562.5
10  8   1563.5
10  9   1564.5
10  10  1565.5

# Test plans with empty streams.
statement ok
CREATE TABLE one (k INT PRIMARY KEY, v INT)

statement ok
ALTER TABLE one SPLIT AT VALUES (0), (99)

statement ok
ALTER TABLE one EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 0)

statement ok
INSERT INTO one VALUES (1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10)

statement ok
CREATE TABLE two (k INT PRIMARY KEY, v INT);

statement ok
ALTER TABLE two SPLIT AT VALUES (0), (99)

statement ok
ALTER TABLE two EXPERIMENTAL_RELOCATE VALUES (ARRAY[2], 0)

statement ok
INSERT INTO two VALUES (1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10)

query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE one]
----
start_key  end_key  replicas  lease_holder
NULL       /0       {5}       5
/0         /99      {1}       1
/99        NULL     {5}       5

query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE two]
----
start_key  end_key  replicas  lease_holder
NULL       /0       {5}       5
/0         /99      {2}       2
/99        NULL     {5}       5

query I
SELECT count(*) FROM one AS a, one AS b, two AS c
----
1000

query RRR
SELECT sum(a), sum(b), sum(c) FROM data GROUP BY d HAVING sum(a+b) > 10
----
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500
5500  5500  5500


query RR rowsort
SELECT avg(a+b), c FROM data GROUP BY c, d HAVING c = d
----
11  1
11  2
11  3
11  4
11  5
11  6
11  7
11  8
11  9
11  10

query RRR rowsort
SELECT sum(a+b), sum(a+b) FILTER (WHERE a < d), sum(a+b) FILTER (WHERE a = c) FROM data GROUP BY d
----
11000  NULL  1100
11000  650   1100
11000  1400  1100
11000  3200  1100
11000  2250  1100
11000  4250  1100
11000  5400  1100
11000  6650  1100
11000  8000  1100
11000  9450  1100

# Same query but restricted to a single range; no local aggregation stage.
query RRR rowsort
SELECT sum(a+b), sum(a+b) FILTER (WHERE a < d), sum(a+b) FILTER (WHERE a = c) FROM data WHERE a = 1 GROUP BY d
----
650  NULL  65
650  650   65
650  650   65
650  650   65
650  650   65
650  650   65
650  650   65
650  650   65
650  650   65
650  650   65

query IIRT
VALUES (1, 2, 1.0, 'string1'), (4, 3, 2.3, 'string2')
----
1 2 1.0 string1
4 3 2.3 string2

query IIR
SELECT max(t.a), min(t.b), avg(t.c) FROM (VALUES (1, 2, 3), (4, 5, 6), (7, 8, 0)) AS t(a, b, c) WHERE b > 3
----
7 5 3

query ITIR
SELECT * FROM (VALUES (1, '222'), (2, '444')) t1(a,b) JOIN (VALUES (1, 100.0), (3, 32.0)) t2(a,b) ON t1.a = t2.a
----
1 222 1 100.0

statement ok
CREATE TABLE nullables (a INT, b INT, c INT, PRIMARY KEY (a))

statement ok
INSERT INTO nullables VALUES (1,1,1)

statement ok
INSERT INTO nullables VALUES (2,NULL,1)

query II
SELECT c, count(*) FROM nullables GROUP BY c;
----
1 2

query T
SELECT array_agg(a) FROM (SELECT a FROM data WHERE b = 1 AND c = 1.0 AND d = 1.0 ORDER BY a)
----
{1,2,3,4,5,6,7,8,9,10}

query T
SELECT array_agg(ab) FROM (SELECT a*b AS ab FROM data WHERE c = 1.0 AND d = 1.0 ORDER BY a*b)
----
{1,2,2,3,3,4,4,4,5,5,6,6,6,6,7,7,8,8,8,8,9,9,9,10,10,10,10,12,12,12,12,14,14,15,15,16,16,16,18,18,18,18,20,20,20,20,21,21,24,24,24,24,25,27,27,28,28,30,30,30,30,32,32,35,35,36,36,36,40,40,40,40,42,42,45,45,48,48,49,50,50,54,54,56,56,60,60,63,63,64,70,70,72,72,80,80,81,90,90,100}

query T
SELECT json_agg(a) FROM (SELECT a FROM data WHERE b = 1 AND c = 1.0 AND d = 1.0 ORDER BY a)
----
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

query T
SELECT jsonb_agg(a) FROM (SELECT a FROM data WHERE b = 1 AND c = 1.0 AND d = 1.0 ORDER BY a)
----
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Test that orderings on GROUP BY columns are propagated through aggregations.
statement ok
CREATE TABLE sorted_data (a INT PRIMARY KEY, b INT, c FLOAT, INDEX foo(b))

statement ok
INSERT INTO sorted_data VALUES
(1, 4, 5.0),
(2, 3, 3.4),
(3, 9, 2.2),
(4, 13, 1.99),
(5, 2, 5.7),
(6, 7, 6.2),
(7, 9, 8.9),
(8, 1, 1.22),
(9, -2, 23.0),
(10, 100, -3.1)

# Split into ten parts.
statement ok
ALTER TABLE sorted_data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)

# Relocate the ten parts to the five nodes.
statement ok
ALTER TABLE sorted_data EXPERIMENTAL_RELOCATE
  SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)

# The ordering is on all the GROUP BY columns, and isn't preserved after the
# aggregation.
query II rowsort
SELECT a, max(b) FROM sorted_data GROUP BY a
----
6   7
7   9
10  100
1   4
2   3
3   9
4   13
5   2
8   1
9   -2

# The ordering is on all the GROUP BY columns, and is preserved after the
# aggregation.
query II
SELECT a, max(b) FROM sorted_data GROUP BY a ORDER BY a
----
1   4
2   3
3   9
4   13
5   2
6   7
7   9
8   1
9   -2
10  100

# The ordering is on some of the GROUP BY columns, and isn't preserved after
# the aggregation.
query RII rowsort
SELECT c, min(b), a FROM sorted_data GROUP BY a, c
----
8.9   9    7
1.99  13   4
1.22  1    8
3.4   3    2
2.2   9    3
-3.1  100  10
23    -2   9
5     4    1
5.7   2    5
6.2   7    6

# The ordering is on some of the GROUP BY columns, and is preserved after
# the aggregation.
query RII
SELECT c, min(b), a FROM sorted_data GROUP BY a, c ORDER BY a
----
5     4    1
3.4   3    2
2.2   9    3
1.99  13   4
5.7   2    5
6.2   7    6
8.9   9    7
1.22  1    8
23    -2   9
-3.1  100  10

# If the underlying ordering isn't from the primary index, it needs to be hinted
# for now.
query IR rowsort
SELECT b, max(c) FROM sorted_data@foo GROUP BY b
----
-2   23
1    1.22
2    5.7
3    3.4
4    5
7    6.2
9    8.9
13   1.99
100  -3.1

# Test that a merge join is used on two aggregate subqueries with orderings on
# the GROUP BY columns. Note that an ORDER BY is not necessary on the
# subqueries.
query IRIR rowsort
SELECT * FROM (SELECT a, max(c) FROM sorted_data GROUP BY a) JOIN (SELECT b, min(c) FROM sorted_data@foo GROUP BY b) ON a = b
----
1  5     1  1.22
2  3.4   2  5.7
3  2.2   3  3.4
4  1.99  4  5
9  23    9  2.2
7  8.9   7  6.2

# Test that zeroNode is being handled correctly.
query R
SELECT sum(a) FROM data WHERE FALSE
----
NULL
