# LogicTest: fakedist-opt local-opt

statement ok
CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, c));
INSERT INTO abc VALUES (1, 1, 2), (2, 1, 1), (2, NULL, 2)

statement ok
CREATE TABLE def (d INT, e INT, f INT, PRIMARY KEY (f, e));
INSERT INTO def VALUES (1, 1, 2), (2, 1, 1), (NULL, 2, 1)

statement ok
CREATE TABLE gh (g INT, h INT, INDEX g_idx (g));
INSERT INTO gh VALUES (NULL, 1)

# Set up the statistics as if the first table is much smaller than the second.
# This will make lookup join into the second table be the best plan.
# TODO(radu): we have to use very small row counts because of the poor row
# count estimation for joins (left-rows * right-rows / 10).
statement ok
ALTER TABLE abc INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100
  }
]'

statement ok
ALTER TABLE def INJECT STATISTICS '[
  {
    "columns": ["f"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 10000
  }
]'

statement ok
ALTER TABLE gh INJECT STATISTICS '[
  {
    "columns": ["g"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 10000
  }
]'

query IIIIII rowsort
SELECT * FROM abc JOIN def ON f = b
----
1  1  2  2     1  1
2  1  1  2     1  1
1  1  2  NULL  2  1
2  1  1  NULL  2  1

query IIIIII rowsort
SELECT * FROM abc JOIN def ON f = b WHERE a > 1 AND e > 1
----
2  1  1  NULL  2  1

query IIIIII rowsort
SELECT * FROM abc JOIN def ON f = b AND a > 1 AND e > 1
----
2  1  1  NULL  2  1

# Filter right side of a lookup join with a restriction on an indexed column.
query IIIIII rowsort
SELECT * FROM abc JOIN def ON f = a WHERE f > 1
----
2  1  1  1  1  2
2  NULL  2  1  1  2

# Test lookup join with restriction relating the left and right side.
query IIIIII rowsort
SELECT * FROM abc JOIN def ON f = b WHERE a >= e
----
1  1  2  2  1  1
2  1  1  2  1  1
2  1  1  NULL  2  1

# Test lookup join with restriction relating the left and right side.
query IIIIII rowsort
SELECT * FROM abc JOIN def ON f = b AND a >= e
----
1  1  2  2  1  1
2  1  1  2  1  1
2  1  1  NULL  2  1

# Test lookup join with selecting a subset of the columns.
query III rowsort
SELECT a, b, e FROM abc JOIN def ON f = b WHERE a >= e
----
1  1  1
2  1  1
2  1  2

# Test lookup join on NULL column. (https://github.com/cockroachdb/cockroach/issues/27032)
query I
SELECT h FROM abc JOIN gh ON b = g
----

statement ok
CREATE TABLE data (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b, c, d))

# Generate all combinations of values 1 to 10.
statement ok
INSERT INTO data SELECT a, b, c, d FROM
   generate_series(1, 10) AS a(a),
   generate_series(1, 10) AS b(b),
   generate_series(1, 10) AS c(c),
   generate_series(1, 10) AS d(d)

# Ensure lookup join performs properly on input that has more than 100 rows.
query I
SELECT count(*) FROM (SELECT * FROM data WHERE c = 1) AS l NATURAL JOIN data AS r
----
1000

statement ok
CREATE TABLE foo (a int, b int); INSERT INTO foo VALUES (0, 1), (0, 2), (1, 1)

statement ok
CREATE TABLE bar (a int PRIMARY KEY, c int); INSERT INTO bar VALUES (0, 1), (1, 2), (2, 1)

query III rowsort
SELECT * FROM foo NATURAL JOIN bar
----
0  1  1
0  2  1
1  1  2

statement ok
CREATE TABLE books (title STRING, edition INT, shelf INT, PRIMARY KEY (title, edition));
INSERT INTO books VALUES
  ('SICP', 1, 2),
  ('Intro to Algo', 1, 1),
  ('Intro to Algo', 2, 1),
  ('Intro to Algo', 3, 2),
  ('Art of Computer Programming', 1, 2),
  ('Art of Computer Programming', 2, 2)

statement ok
CREATE TABLE books2 (title STRING, edition INT, shelf INT, PRIMARY KEY (title, edition));
INSERT INTO books2 VALUES
  ('SICP', 1, 2),
  ('Intro to Algo', 1, 1),
  ('Intro to Algo', 2, 1),
  ('Intro to Algo', 3, 2),
  ('Art of Computer Programming', 1, 2),
  ('Art of Computer Programming', 2, 2)

statement ok
ALTER TABLE books INJECT STATISTICS '[
  {
    "columns": ["title"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100
  }
]'

statement ok
ALTER TABLE books2 INJECT STATISTICS '[
  {
    "columns": ["title"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 1000
  }
]'

statement ok
CREATE TABLE authors (name STRING, book STRING);
INSERT INTO authors VALUES
  ('Hal Abelson', 'SICP'),
  ('Geral Jay Sussman', 'SICP'),
  ('Thomas H Cormen', 'Intro to Algo'),
  ('Charles E Leiserson', 'Intro to Algo'),
  ('Ronald Rivest', 'Intro to Algo'),
  ('Clifford Stein', 'Intro to Algo'),
  ('Donald Knuth', 'Art of Computer Programming')

statement ok
ALTER TABLE authors INJECT STATISTICS '[
  {
    "columns": ["name"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100
  }
]'

# Filter on a column that is not returned or in the equality columns.
query T rowsort
SELECT DISTINCT b1.title FROM books as b1 JOIN books2 as b2 ON b1.title = b2.title WHERE b1.shelf <> b2.shelf
----
Intro to Algo

query T rowsort
SELECT DISTINCT authors.name FROM books AS b1, books2 as b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf
----
Thomas H Cormen
Charles E Leiserson
Ronald Rivest
Clifford Stein

# Ensure lookup join preserves ordering from the left side.
query T
SELECT a.name FROM authors AS a JOIN books2 AS b2 ON a.book = b2.title ORDER BY a.name
----
Charles E Leiserson
Charles E Leiserson
Charles E Leiserson
Clifford Stein
Clifford Stein
Clifford Stein
Donald Knuth
Donald Knuth
Geral Jay Sussman
Hal Abelson
Ronald Rivest
Ronald Rivest
Ronald Rivest
Thomas H Cormen
Thomas H Cormen
Thomas H Cormen

####################################
#  LOOKUP JOIN ON SECONDARY INDEX  #
####################################

statement ok
CREATE TABLE small (a INT PRIMARY KEY, b INT, c INT, d INT)

statement ok
CREATE TABLE large (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b), INDEX bc (b) STORING (c))

# Generate 10 rows for both tables.
statement ok
INSERT INTO small SELECT x, 2*x, 3*x, 4*x FROM
  generate_series(1, 10) AS a(x)

statement ok
INSERT INTO large SELECT x, 2*x, 3*x, 4*x FROM
  generate_series(1, 10) AS a(x)

statement ok
ALTER TABLE small INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100
  }
]'

statement ok
ALTER TABLE large INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 10000
  }
]'

# Lookup join on covering secondary index
query II rowsort
SELECT small.a, large.c FROM small JOIN large ON small.a = large.b
----
2   3
4   6
6   9
8   12
10  15

# Lookup join on non-covering secondary index
query II rowsort
SELECT small.a, large.d FROM small JOIN large ON small.a = large.b
----
2   4
4   8
6   12
8   16
10  20

############################
#  LEFT OUTER LOOKUP JOIN  #
############################

# Left join against primary index
query II rowsort
SELECT small.b, large.a FROM small LEFT JOIN large ON small.b = large.a
----
2 2
4 4
6 6
8 8
10 10
12 NULL
14 NULL
16 NULL
18 NULL
20 NULL

# Left join should preserve input order.
query II
SELECT t1.a, t2.b FROM small t1 LEFT JOIN large t2 ON t1.a = t2.a AND t2.b % 6 = 0 ORDER BY t1.a
----
1   NULL
2   NULL
3   6
4   NULL
5   NULL
6   12
7   NULL
8   NULL
9   18
10  NULL

# Left join against covering secondary index
query II rowsort
SELECT small.c, large.c FROM small LEFT JOIN large ON small.c = large.b
----
3   NULL
6   9
9   NULL
12  18
15  NULL
18  27
21  NULL
24  NULL
27  NULL
30  NULL

# Left join against non-covering secondary index
query II rowsort
SELECT small.c, large.d FROM small LEFT JOIN large ON small.c = large.b
----
3   NULL
6   12
9   NULL
12  24
15  NULL
18  36
21  NULL
24  NULL
27  NULL
30  NULL

# Left join with ON filter on covering index
query II rowsort
SELECT small.c, large.c FROM small LEFT JOIN large ON small.c = large.b AND large.c < 20
----
3   NULL
6   9
9   NULL
12  18
15  NULL
18  NULL
21  NULL
24  NULL
27  NULL
30  NULL

## Left join with ON filter on non-covering index
query II rowsort
SELECT small.c, large.d FROM small LEFT JOIN large ON small.c = large.b AND large.d < 30
----
3   NULL
6   12
9   NULL
12  24
15  NULL
18  NULL
21  NULL
24  NULL
27  NULL
30  NULL

# Lookup joins against interleaved tables. Regression test for #28981.
# This is now tested more thoroughly by joinreader_test.go.

statement ok
CREATE TABLE parent (a INT, b INT, PRIMARY KEY(a, b))

statement ok
CREATE TABLE child (a INT, b INT, c INT, PRIMARY KEY(a, b, c)) INTERLEAVE IN PARENT parent(a, b)

statement ok
CREATE TABLE source (a INT)

statement ok
ALTER TABLE source INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1,
    "distinct_count": 1
  }
]'

statement ok
ALTER TABLE child INJECT STATISTICS '[
  {
    "columns": ["a", "b", "c"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 10
  }
]'

statement ok
INSERT INTO child VALUES(1, 2, 3)

statement ok
INSERT INTO source VALUES(1)

query IIII
SELECT * FROM source JOIN child ON source.a = child.a
----
1 1 2 3

###########################################################
#  LOOKUP JOINS ON IMPLICIT INDEX KEY COLUMNS             #
#  https://github.com/cockroachdb/cockroach/issues/31777  #
###########################################################
statement ok
CREATE TABLE t (a INT, b INT, c INT, d INT, e INT)

statement ok
CREATE TABLE u (a INT, b INT, c INT, d INT, e INT, PRIMARY KEY (a DESC, b, c))

statement ok
INSERT INTO t VALUES
  (1, 2, 3, 4, 5)

statement ok
INSERT INTO u VALUES
  (1, 2, 3, 4, 5),
  (2, 3, 4, 5, 6),
  (3, 4, 5, 6, 7)

# Test index with all primary key columns implicit.
statement ok
CREATE INDEX idx ON u (d)

query I
SELECT u.a FROM t JOIN u ON t.d = u.d AND t.a = u.a WHERE t.e = 5
----
1

# Test unique version of same index. (Lookup join should not use column a.)
statement ok
DROP INDEX u@idx

statement ok
CREATE UNIQUE INDEX idx ON u (d)

query I
SELECT u.a FROM t JOIN u ON t.d = u.d AND t.a = u.a WHERE t.e = 5
----
1

# Test index with first primary key column explicit and the rest implicit.
statement ok
DROP INDEX u@idx CASCADE

statement ok
CREATE INDEX idx ON u (d, a)

query I
SELECT u.a FROM t JOIN u ON t.d = u.d AND t.a = u.a AND t.b = u.b WHERE t.e = 5
----
1

# Test index with middle primary key column explicit and the rest implicit.
statement ok
DROP INDEX u@idx

statement ok
CREATE INDEX idx ON u (d, b)

query I
SELECT u.a FROM t JOIN u ON t.d = u.d AND t.a = u.a AND t.b = u.b WHERE t.e = 5
----
1

# Test index with last primary key column explicit and the rest implicit.
statement ok
DROP INDEX u@idx

statement ok
CREATE INDEX idx ON u (d, c)

query I
SELECT u.a FROM t JOIN u ON t.d = u.d AND t.a = u.a AND t.d = u.d WHERE t.e = 5
----
1

# Regression test for #33342. We want the ordering in the lookup join node to
# be +f not +a; this is achieved by making def show up in the query first (and
# thus get smaller opt.ColumnIDs).
query IIIIII colnames,partialsort(4)
SELECT * FROM def JOIN abc ON a=f ORDER BY a
----
d     e  f  a  b     c
2     1  1  1  1     2
NULL  2  1  1  1     2
1     1  2  2  NULL  2
1     1  2  2  1     1
