# LogicTest: 5node-dist 5node-dist-opt 5node-dist-metadata

########################
#  LOOKUP JOIN FORCED  #
########################
statement ok
SET experimental_force_lookup_join = true;

statement ok
CREATE TABLE data (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b, c, d))

# Prevent the merge queue from immediately discarding our splits.
statement ok
SET CLUSTER SETTING kv.range_merge.queue_enabled = false;

# Split into ten parts.
statement ok
ALTER TABLE data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)

# Relocate the ten parts to the five nodes.
statement ok
ALTER TABLE data EXPERIMENTAL_RELOCATE
  SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)

# Generate all combinations of values 1 to 10.
statement ok
INSERT INTO data SELECT a, b, c, d FROM
   generate_series(1, 10) AS a(a),
   generate_series(1, 10) AS b(b),
   generate_series(1, 10) AS c(c),
   generate_series(1, 10) AS d(d)

statement ok
CREATE TABLE distsql_lookup_test_1 (a INT, b INT, c INT, PRIMARY KEY (a, c));
INSERT INTO distsql_lookup_test_1 VALUES (1, 1, 2), (2, 1, 1), (2, NULL, 2)

statement ok
CREATE TABLE distsql_lookup_test_2 (d INT, e INT, f INT, PRIMARY KEY (f, e));
INSERT INTO distsql_lookup_test_2 VALUES (1, 1, 2), (2, 1, 1), (NULL, 2, 1)

statement ok
CREATE TABLE distsql_lookup_test_3 (g INT, h INT, INDEX g_idx (g));
INSERT INTO distsql_lookup_test_3 VALUES (NULL, 1)

# Set up the statistics as if the first table is much smaller than the others.
# This will make lookup join into the second table be the best plan.
# TODO(radu): we have to use very small row counts because of the poor row
# count estimation for joins (left-rows * right-rows / 10).
statement ok
ALTER TABLE distsql_lookup_test_1 INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1,
    "distinct_count": 1
  }
]'

statement ok
ALTER TABLE distsql_lookup_test_2 INJECT STATISTICS '[
  {
    "columns": ["f"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 10
  }
]'

statement ok
ALTER TABLE distsql_lookup_test_3 INJECT STATISTICS '[
  {
    "columns": ["g"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 10
  }
]'

query IIIIII rowsort
SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b
----
1  1  2  2     1  1
2  1  1  2     1  1
1  1  2  NULL  2  1
2  1  1  NULL  2  1

query IIIIII rowsort
SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b WHERE a > 1 AND e > 1
----
2  1  1  NULL  2  1

query IIIIII rowsort
SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b AND a > 1 AND e > 1
----
2  1  1  NULL  2  1

# Filter right side of a lookup join with a restriction on an indexed column.
query IIIIII rowsort
SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = a WHERE f > 1
----
2  1  1  1  1  2
2  NULL  2  1  1  2

# Test lookup join with restriction relating the left and right side.
query IIIIII rowsort
SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b WHERE a >= e
----
1  1  2  2  1  1
2  1  1  2  1  1
2  1  1  NULL  2  1

# Test lookup join with restriction relating the left and right side.
query IIIIII rowsort
SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b AND a >= e
----
1  1  2  2  1  1
2  1  1  2  1  1
2  1  1  NULL  2  1

# Test lookup join with selecting a subset of the columns.
query III rowsort
SELECT a, b, e FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b WHERE a >= e
----
1  1  1
2  1  1
2  1  2

# Test lookup join on NULL column. (https://github.com/cockroachdb/cockroach/issues/27032)
query I
SELECT h FROM distsql_lookup_test_1 JOIN distsql_lookup_test_3@g_idx ON b = g
----

# Test left lookup join on NULL column.
query II rowsort
SELECT b, h FROM distsql_lookup_test_1 LEFT JOIN distsql_lookup_test_3@g_idx ON b = g;
----
1     NULL
1     NULL
NULL  NULL

# Test left lookup join where all input rows are NULL.
query II
SELECT b, h FROM distsql_lookup_test_1 LEFT JOIN distsql_lookup_test_3@g_idx ON b = g WHERE b IS NULL
----
NULL  NULL

# Ensure join performs properly on input that has more than 100 rows.
query I
SELECT count(*) FROM data as d1 NATURAL JOIN data as d2
----
10000

# A lookup join that doesn't fully constrain the lookup side uses PrefixEnd, not
# the interleave marker, to form the lookup span.
query IIIIII
SET TRACING = on,kv; SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b WHERE a > 1 AND e > 1; SET TRACING=off
----
2  1  1  NULL  2  1

query T rowsort
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Scan%'
----
Scan /Table/54/{1/2-2}
Scan /Table/55/1/{1-2}

# A lookup join that fully constrains the lookup side uses the interleave marker
# to form the lookup span.
query IIIIII
SET TRACING = on,kv; SELECT * FROM distsql_lookup_test_1 JOIN distsql_lookup_test_2 ON f = b AND e = a; SET TRACING=off
----
1  1  2  2     1  1
2  1  1  NULL  2  1

query T rowsort
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Scan%'
----
Scan /Table/54/{1-2}
Scan /Table/55/1/1/1{-/#}, /Table/55/1/1/2{-/#}

statement ok
CREATE TABLE foo (a int, b int); INSERT INTO foo VALUES (0, 1), (0, 2), (1, 1)

statement ok
CREATE TABLE bar (a int PRIMARY KEY, c int); INSERT INTO bar VALUES (0, 1), (1, 2), (2, 1)

query III rowsort
SELECT * FROM foo NATURAL JOIN bar
----
0  1  1
0  2  1
1  1  2

statement ok
CREATE TABLE books (title STRING, edition INT, shelf INT, PRIMARY KEY (title, edition));
INSERT INTO books VALUES
  ('SICP', 1, 2),
  ('Intro to Algo', 1, 1),
  ('Intro to Algo', 2, 1),
  ('Intro to Algo', 3, 2),
  ('Art of Computer Programming', 1, 2),
  ('Art of Computer Programming', 2, 2)

statement ok
CREATE TABLE authors (name STRING, book STRING);
INSERT INTO authors VALUES
  ('Hal Abelson', 'SICP'),
  ('Geral Jay Sussman', 'SICP'),
  ('Thomas H Cormen', 'Intro to Algo'),
  ('Charles E Leiserson', 'Intro to Algo'),
  ('Ronald Rivest', 'Intro to Algo'),
  ('Clifford Stein', 'Intro to Algo'),
  ('Donald Knuth', 'Art of Computer Programming')

query T rowsort
SELECT DISTINCT(b1.title) FROM books as b1 JOIN books as b2 ON b1.title = b2.title WHERE b1.shelf <> b2.shelf
----
Intro to Algo

# Filter on a column that is not returned or in the equality columns.
query T rowsort
SELECT DISTINCT(b1.title) FROM books as b1 JOIN books as b2 USING(title) WHERE b1.shelf <> b2.shelf
----
Intro to Algo

query T rowsort
SELECT DISTINCT(authors.name) FROM authors, books AS b1, books AS b2 WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf
----
Thomas H Cormen
Charles E Leiserson
Ronald Rivest
Clifford Stein

# Ensure lookup join preserves sort from the left side.
query T rowsort
SELECT DISTINCT(a.name) FROM (SELECT * FROM authors ORDER BY name) AS a JOIN books AS b1 ON a.book = b1.title
----
Charles E Leiserson
Clifford Stein
Ronald Rivest
Thomas H Cormen
Donald Knuth
Geral Jay Sussman
Hal Abelson

statement ok
CREATE TABLE players (id DECIMAL PRIMARY KEY, name STRING, team INT); INSERT INTO players VALUES (1.0, 'ready', 0)

query B
SELECT p1.team = p2.team FROM players p1 JOIN players p2 ON p1.id = p2.id;
----
true

####################################
#  LOOKUP JOIN ON SECONDARY INDEX  #
####################################

# Create a table with a secondary index which stores another column.
statement ok
CREATE TABLE multiples (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b), INDEX bc (b) STORING (c))

# Prevent the merge queue from immediately discarding our splits.
statement ok
SET CLUSTER SETTING kv.range_merge.queue_enabled = false;

# Split into ten parts.
statement ok
ALTER TABLE multiples SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)

# Relocate the ten parts to the five nodes.
statement ok
ALTER TABLE multiples EXPERIMENTAL_RELOCATE
  SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)

# Generate 10 rows.
statement ok
INSERT INTO multiples SELECT x, 2*x, 3*x, 4*x FROM
  generate_series(1, 10) AS a(x)

# Lookup join on covering secondary index
query II rowsort
SELECT t1.a, t2.c FROM multiples t1 JOIN multiples@bc t2 ON t1.a = t2.b
----
2   3
4   6
6   9
8   12
10  15

# Lookup join on non-covering secondary index
# The index join should be subsumed by joinreader, which takes care of the
# primary index lookups.
query II rowsort
SELECT t1.a, t2.d FROM multiples t1 JOIN multiples@bc t2 ON t1.a = t2.b
----
2   4
4   8
6   12
8   16
10  20

# The final primary index lookup uses the interleave marker to constrain its
# span, because it was fully specified and might contain interleaves.
statement ok
SET TRACING = on,kv; SELECT t1.a, t2.d FROM multiples t1 JOIN multiples@bc t2 ON t1.a = t2.b AND t1.a=2; SET TRACING=off

query T rowsort
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Scan%'
----
Scan /Table/62/1/{2-3}
Scan /Table/62/2/{2-3}
Scan /Table/62/1/1/2{-/#}

############################
#  LEFT OUTER LOOKUP JOIN  #
############################
# Left join against primary index
query II rowsort
SELECT t1.b, t2.a FROM multiples t1 LEFT JOIN multiples t2 ON t1.b = t2.a
----
2 2
4 4
6 6
8 8
10 10
12 NULL
14 NULL
16 NULL
18 NULL
20 NULL

# Left join against covering secondary index
query II rowsort
SELECT t1.c, t2.c FROM multiples t1 LEFT JOIN multiples@bc t2 ON t1.c = t2.b
----
3   NULL
6   9
9   NULL
12  18
15  NULL
18  27
21  NULL
24  NULL
27  NULL
30  NULL

# Left join against non-covering secondary index
query II rowsort
SELECT t1.c, t2.d FROM multiples t1 LEFT JOIN multiples@bc t2 ON t1.c = t2.b
----
3   NULL
6   12
9   NULL
12  24
15  NULL
18  36
21  NULL
24  NULL
27  NULL
30  NULL

# Left join with ON filter on covering index
query II rowsort
SELECT t1.c, t2.c FROM multiples t1 LEFT JOIN multiples@bc t2 ON t1.c = t2.b AND t2.c < 20
----
3   NULL
6   9
9   NULL
12  18
15  NULL
18  NULL
21  NULL
24  NULL
27  NULL
30  NULL

# Left join with ON filter on non-covering index
query II rowsort
SELECT t1.c, t2.d FROM multiples t1 LEFT JOIN multiples@bc t2 ON t1.c = t2.b AND t2.d < 30
----
3   NULL
6   12
9   NULL
12  24
15  NULL
18  NULL
21  NULL
24  NULL
27  NULL
30  NULL

# Left join with ON filter and WHERE clause
query II rowsort
SELECT t1.c, t2.d FROM multiples t1 LEFT JOIN multiples@bc t2 ON t1.c = t2.b WHERE t2.d < 30
----
6   12
12  24

# Regression test for #30812: make sure that lookup joins with limit don't
# omit the final row.

statement ok
CREATE TABLE t_30182_l (id INT PRIMARY KEY, name TEXT NOT NULL)

statement ok
CREATE TABLE t_30182_r (id INT PRIMARY KEY, name TEXT NOT NULL, l_id int)

statement ok
INSERT INTO t_30182_l (id, name) values(1, 'a')

statement ok
INSERT INTO t_30182_r values(1, 'foo', 1);

query ITI
SELECT r.* FROM t_30182_r r
JOIN t_30182_l l on r.l_id = l.id
WHERE r.name = 'foo'
LIMIT 1
----
1 foo 1
